Merge pull request #519 from jmorganca/mxyng/decode

Mxyng/decode
2023-09-13 12:43:57 -07:00 · 2023-09-13 12:43:57 -07:00 · 949553db23
commit 949553db23
parent f59c4d03f7 0c5a454361
5 changed files with 138 additions and 161 deletions
--- a/llm/ggml.go
+++ b/llm/ggml.go
@ -8,54 +8,77 @@ import (
 	"sync"
 )
 type ModelFamily string
 const ModelFamilyUnknown ModelFamily = "unknown"
 type ModelType uint32
 const (
 	ModelType3B  ModelType = 26
 	ModelType7B  ModelType = 32
 	ModelType13B ModelType = 40
 	ModelType34B ModelType = 48
 	ModelType30B ModelType = 60
 	ModelType65B ModelType = 80
 )
 func (mt ModelType) String() string {
 	switch mt {
 	case ModelType3B:
 		return "3B"
 	case ModelType7B:
 		return "7B"
 	case ModelType13B:
 		return "13B"
 	case ModelType34B:
 		return "34B"
 	case ModelType30B:
 		return "30B"
 	case ModelType65B:
 		return "65B"
 	default:
 		return "Unknown"
 	}
 }
 type FileType interface {
 	String() string
 }
 type GGML struct {
 	magic uint32
 	container
 	model
 }
 const (
 	fileTypeF32 uint32 = iota
 	fileTypeF16
 	fileTypeQ4_0
 	fileTypeQ4_1
 	fileTypeQ4_1_F16
 	fileTypeQ8_0 uint32 = iota + 2
 	fileTypeQ5_0
 	fileTypeQ5_1
 	fileTypeQ2_K
 	fileTypeQ3_K_S
 	fileTypeQ3_K_M
 	fileTypeQ3_K_L
 	fileTypeQ4_K_S
 	fileTypeQ4_K_M
 	fileTypeQ5_K_S
 	fileTypeQ5_K_M
 	fileTypeQ6_K
 )
 func fileType(fileType uint32) string {
 	switch fileType {
 	case fileTypeF32:
 		return "F32"
 	case fileTypeF16:
 		return "F16"
 	case fileTypeQ4_0:
 		return "Q4_0"
 	case fileTypeQ4_1:
 		return "Q4_1"
 	case fileTypeQ4_1_F16:
 		return "Q4_1_F16"
 	case fileTypeQ8_0:
 		return "Q8_0"
 	case fileTypeQ5_0:
 		return "Q5_0"
 	case fileTypeQ5_1:
 		return "Q5_1"
 	case fileTypeQ2_K:
 		return "Q2_K"
 	case fileTypeQ3_K_S:
 		return "Q3_K_S"
 	case fileTypeQ3_K_M:
 		return "Q3_K_M"
 	case fileTypeQ3_K_L:
 		return "Q3_K_L"
 	case fileTypeQ4_K_S:
 		return "Q4_K_S"
 	case fileTypeQ4_K_M:
 		return "Q4_K_M"
 	case fileTypeQ5_K_S:
 		return "Q5_K_S"
 	case fileTypeQ5_K_M:
 		return "Q5_K_M"
 	case fileTypeQ6_K:
 		return "Q6_K"
 	default:
 		return "Unknown"
 	}
 }
 type model interface {
-	ModelFamily() ModelFamily
+	ModelFamily() string
-	ModelType() ModelType
+	ModelType() string
-	FileType() FileType
+	FileType() string
 }
 type container interface {
--- a/llm/gguf.go
+++ b/llm/gguf.go
@ -6,7 +6,6 @@ import (
 	"errors"
 	"fmt"
 	"io"
 	"log"
 	"path"
 	"sync"
 )
@ -87,38 +86,43 @@ func (llm *ggufModel) NumKV() uint64 {
 	return llm.V2.NumKV
 }
-func (llm *ggufModel) ModelFamily() ModelFamily {
+func (llm *ggufModel) ModelFamily() string {
 	t, ok := llm.kv["general.architecture"].(string)
 	if ok {
-		return ModelFamily(t)
+		return t
 	}
-	log.Printf("unknown model family: %T", t)
+	return "unknown"
 	return ModelFamilyUnknown
 }
-func (llm *ggufModel) ModelType() ModelType {
+func (llm *ggufModel) ModelType() string {
 	switch llm.ModelFamily() {
-	case ModelFamilyLlama:
+	case "llama":
-		blocks, ok := llm.kv["llama.block_count"].(uint32)
+		if blocks, ok := llm.kv["llama.block_count"].(uint32); ok {
-		if ok {
+			heads, headsOK := llm.kv["llama.head_count"].(uint32)
-			return ModelType(blocks)
+			headKVs, headsKVsOK := llm.kv["llama.head_count_kv"].(uint32)
 			if headsOK && headsKVsOK && heads/headKVs == 8 {
 				return "70B"
 			}
 			return llamaModelType(blocks)
 		}
 	case "falcon":
 		if blocks, ok := llm.kv["falcon.block_count"].(uint32); ok {
 			return falconModelType(blocks)
 		}
 	}
-	return ModelType7B
+	return "Unknown"
 }
-func (llm *ggufModel) FileType() FileType {
+func (llm *ggufModel) FileType() string {
-	switch llm.ModelFamily() {
+	t, ok := llm.kv["general.file_type"].(uint32)
-	case ModelFamilyLlama:
+	if ok {
-		t, ok := llm.kv["general.file_type"].(uint32)
+		return fileType(t)
 		if ok {
 			return llamaFileType(t)
 		}
 	}
-	return llamaFileTypeF16
+	return "Unknown"
 }
 func (llm *ggufModel) Decode(r io.Reader) error {
--- a/llm/llama.go
+++ b/llm/llama.go
@ -95,38 +95,39 @@ func chooseRunner(gpuPath, cpuPath string) string {
 	return runPath
 }
 const ModelFamilyLlama ModelFamily = "llama"
 type llamaModel struct {
 	hyperparameters llamaHyperparameters
 }
-func (llm *llamaModel) ModelFamily() ModelFamily {
+func (llm *llamaModel) ModelFamily() string {
-	return ModelFamilyLlama
+	return "llama"
 }
-func (llm *llamaModel) ModelType() ModelType {
+func llamaModelType(numLayer uint32) string {
-	switch llm.hyperparameters.NumLayer {
+	switch numLayer {
 	case 26:
-		return ModelType3B
+		return "3B"
 	case 32:
-		return ModelType7B
+		return "7B"
 	case 40:
-		return ModelType13B
+		return "13B"
 	case 48:
-		return ModelType34B
+		return "34B"
 	case 60:
-		return ModelType30B
+		return "30B"
 	case 80:
-		return ModelType65B
+		return "65B"
 	default:
 		return "Unknown"
 	}
 	// TODO: find a better default
 	return ModelType7B
 }
-func (llm *llamaModel) FileType() FileType {
+func (llm *llamaModel) ModelType() string {
-	return llm.hyperparameters.FileType
+	return llamaModelType(llm.hyperparameters.NumLayer)
 }
 func (llm *llamaModel) FileType() string {
 	return fileType(llm.hyperparameters.FileType)
 }
 type llamaHyperparameters struct {
@ -143,70 +144,7 @@ type llamaHyperparameters struct {
 	NumRot   uint32
 	// FileType describes the quantization level of the model, e.g. Q4_0, Q5_K, etc.
-	FileType llamaFileType
+	FileType uint32
 }
 type llamaFileType uint32
 const (
 	llamaFileTypeF32 llamaFileType = iota
 	llamaFileTypeF16
 	llamaFileTypeQ4_0
 	llamaFileTypeQ4_1
 	llamaFileTypeQ4_1_F16
 	llamaFileTypeQ8_0 llamaFileType = iota + 2
 	llamaFileTypeQ5_0
 	llamaFileTypeQ5_1
 	llamaFileTypeQ2_K
 	llamaFileTypeQ3_K_S
 	llamaFileTypeQ3_K_M
 	llamaFileTypeQ3_K_L
 	llamaFileTypeQ4_K_S
 	llamaFileTypeQ4_K_M
 	llamaFileTypeQ5_K_S
 	llamaFileTypeQ5_K_M
 	llamaFileTypeQ6_K
 )
 func (ft llamaFileType) String() string {
 	switch ft {
 	case llamaFileTypeF32:
 		return "F32"
 	case llamaFileTypeF16:
 		return "F16"
 	case llamaFileTypeQ4_0:
 		return "Q4_0"
 	case llamaFileTypeQ4_1:
 		return "Q4_1"
 	case llamaFileTypeQ4_1_F16:
 		return "Q4_1_F16"
 	case llamaFileTypeQ8_0:
 		return "Q8_0"
 	case llamaFileTypeQ5_0:
 		return "Q5_0"
 	case llamaFileTypeQ5_1:
 		return "Q5_1"
 	case llamaFileTypeQ2_K:
 		return "Q2_K"
 	case llamaFileTypeQ3_K_S:
 		return "Q3_K_S"
 	case llamaFileTypeQ3_K_M:
 		return "Q3_K_M"
 	case llamaFileTypeQ3_K_L:
 		return "Q3_K_L"
 	case llamaFileTypeQ4_K_S:
 		return "Q4_K_S"
 	case llamaFileTypeQ4_K_M:
 		return "Q4_K_M"
 	case llamaFileTypeQ5_K_S:
 		return "Q5_K_S"
 	case llamaFileTypeQ5_K_M:
 		return "Q5_K_M"
 	case llamaFileTypeQ6_K:
 		return "Q6_K"
 	default:
 		return "Unknown"
 	}
 }
 type Running struct {
--- a/llm/llm.go
+++ b/llm/llm.go
@ -37,7 +37,7 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
 		return nil, err
 	}
-	switch ggml.FileType().String() {
+	switch ggml.FileType() {
 	case "Q8_0":
 		if ggml.Name() != "gguf" && opts.NumGPU != 0 {
 			// GGML Q8_0 do not support Metal API and will
@ -56,30 +56,36 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
 	totalResidentMemory := memory.TotalMemory()
 	switch ggml.ModelType() {
-	case ModelType3B, ModelType7B:
+	case "3B", "7B":
-		if ggml.FileType().String() == "F16" && totalResidentMemory < 16*1024*1024 {
+		if ggml.FileType() == "F16" && totalResidentMemory < 16*1024*1024 {
 			return nil, fmt.Errorf("F16 model requires at least 16GB of memory")
 		} else if totalResidentMemory < 8*1024*1024 {
 			return nil, fmt.Errorf("model requires at least 8GB of memory")
 		}
-	case ModelType13B:
+	case "13B":
-		if ggml.FileType().String() == "F16" && totalResidentMemory < 32*1024*1024 {
+		if ggml.FileType() == "F16" && totalResidentMemory < 32*1024*1024 {
 			return nil, fmt.Errorf("F16 model requires at least 32GB of memory")
 		} else if totalResidentMemory < 16*1024*1024 {
 			return nil, fmt.Errorf("model requires at least 16GB of memory")
 		}
-	case ModelType30B, ModelType34B:
+	case "30B", "34B", "40B":
-		if ggml.FileType().String() == "F16" && totalResidentMemory < 64*1024*1024 {
+		if ggml.FileType() == "F16" && totalResidentMemory < 64*1024*1024 {
 			return nil, fmt.Errorf("F16 model requires at least 64GB of memory")
 		} else if totalResidentMemory < 32*1024*1024 {
 			return nil, fmt.Errorf("model requires at least 32GB of memory")
 		}
-	case ModelType65B:
+	case "65B", "70B":
-		if ggml.FileType().String() == "F16" && totalResidentMemory < 128*1024*1024 {
+		if ggml.FileType() == "F16" && totalResidentMemory < 128*1024*1024 {
 			return nil, fmt.Errorf("F16 model requires at least 128GB of memory")
 		} else if totalResidentMemory < 64*1024*1024 {
 			return nil, fmt.Errorf("model requires at least 64GB of memory")
 		}
 	case "180B":
 		if ggml.FileType() == "F16" && totalResidentMemory < 512*1024*1024 {
 			return nil, fmt.Errorf("F16 model requires at least 512GB of memory")
 		} else if totalResidentMemory < 128*1024*1024 {
 			return nil, fmt.Errorf("model requires at least 128GB of memory")
 		}
 	}
 	switch ggml.Name() {
--- a/server/images.go
+++ b/server/images.go
@ -114,11 +114,11 @@ type LayerReader struct {
 }
 type ConfigV2 struct {
-	ModelFamily llm.ModelFamily `json:"model_family"`
+	ModelFormat string `json:"model_format"`
-	ModelType   string          `json:"model_type"`
+	ModelFamily string `json:"model_family"`
-	ModelFormat string          `json:"model_format"`
+	ModelType   string `json:"model_type"`
-	FileType    string          `json:"file_type"`
+	FileType    string `json:"file_type"`
-	RootFS      RootFS          `json:"rootfs"`
+	RootFS      RootFS `json:"rootfs"`
 	// required by spec
 	Architecture string `json:"architecture"`
@ -357,10 +357,10 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
 						return err
 					}
 					config.ModelFamily = ggml.ModelFamily()
 					config.ModelType = ggml.ModelType().String()
 					config.ModelFormat = ggml.Name()
-					config.FileType = ggml.FileType().String()
+					config.ModelFamily = ggml.ModelFamily()
 					config.ModelType = ggml.ModelType()
 					config.FileType = ggml.FileType()
 					// reset the file
 					file.Seek(0, io.SeekStart)
@ -498,6 +498,12 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
 			}
 		}
 		if config.ModelType == "65B" {
 			if numGQA, ok := formattedParams["num_gqa"].(int); ok && numGQA == 8 {
 				config.ModelType = "70B"
 			}
 		}
 		bts, err := json.Marshal(formattedParams)
 		if err != nil {
 			return err
@ -815,14 +821,14 @@ func formatParams(params map[string][]string) (map[string]interface{}, error) {
 						return nil, fmt.Errorf("invalid float value %s", vals)
 					}
-					out[key] = floatVal
+					out[key] = float32(floatVal)
 				case reflect.Int:
-					intVal, err := strconv.ParseInt(vals[0], 10, 0)
+					intVal, err := strconv.ParseInt(vals[0], 10, 64)
 					if err != nil {
 						return nil, fmt.Errorf("invalid int value %s", vals)
 					}
-					out[key] = intVal
+					out[key] = int(intVal)
 				case reflect.Bool:
 					boolVal, err := strconv.ParseBool(vals[0])
 					if err != nil {