From 7dee25a07f6057a4afd42097357ffbdae0fdaacc Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Tue, 12 Sep 2023 10:01:20 -0700 Subject: [PATCH] fix falcon decode get model and file type from bin file --- llm/ggml.go | 105 +++++++++++++++++++++++++++++------------------ llm/gguf.go | 36 ++++++++-------- llm/llama.go | 100 +++++++++----------------------------------- llm/llm.go | 24 +++++++---- server/images.go | 16 ++++---- 5 files changed, 123 insertions(+), 158 deletions(-) diff --git a/llm/ggml.go b/llm/ggml.go index 14675da1..e95f5fc6 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -8,54 +8,77 @@ import ( "sync" ) -type ModelFamily string - -const ModelFamilyUnknown ModelFamily = "unknown" - -type ModelType uint32 - -const ( - ModelType3B ModelType = 26 - ModelType7B ModelType = 32 - ModelType13B ModelType = 40 - ModelType34B ModelType = 48 - ModelType30B ModelType = 60 - ModelType65B ModelType = 80 -) - -func (mt ModelType) String() string { - switch mt { - case ModelType3B: - return "3B" - case ModelType7B: - return "7B" - case ModelType13B: - return "13B" - case ModelType34B: - return "34B" - case ModelType30B: - return "30B" - case ModelType65B: - return "65B" - default: - return "Unknown" - } -} - -type FileType interface { - String() string -} - type GGML struct { magic uint32 container model } +const ( + fileTypeF32 uint32 = iota + fileTypeF16 + fileTypeQ4_0 + fileTypeQ4_1 + fileTypeQ4_1_F16 + fileTypeQ8_0 uint32 = iota + 2 + fileTypeQ5_0 + fileTypeQ5_1 + fileTypeQ2_K + fileTypeQ3_K_S + fileTypeQ3_K_M + fileTypeQ3_K_L + fileTypeQ4_K_S + fileTypeQ4_K_M + fileTypeQ5_K_S + fileTypeQ5_K_M + fileTypeQ6_K +) + +func fileType(fileType uint32) string { + switch fileType { + case fileTypeF32: + return "F32" + case fileTypeF16: + return "F16" + case fileTypeQ4_0: + return "Q4_0" + case fileTypeQ4_1: + return "Q4_1" + case fileTypeQ4_1_F16: + return "Q4_1_F16" + case fileTypeQ8_0: + return "Q8_0" + case fileTypeQ5_0: + return "Q5_0" + case fileTypeQ5_1: + return "Q5_1" + case fileTypeQ2_K: + return "Q2_K" + case fileTypeQ3_K_S: + return "Q3_K_S" + case fileTypeQ3_K_M: + return "Q3_K_M" + case fileTypeQ3_K_L: + return "Q3_K_L" + case fileTypeQ4_K_S: + return "Q4_K_S" + case fileTypeQ4_K_M: + return "Q4_K_M" + case fileTypeQ5_K_S: + return "Q5_K_S" + case fileTypeQ5_K_M: + return "Q5_K_M" + case fileTypeQ6_K: + return "Q6_K" + default: + return "Unknown" + } +} + type model interface { - ModelFamily() ModelFamily - ModelType() ModelType - FileType() FileType + ModelFamily() string + ModelType() string + FileType() string } type container interface { diff --git a/llm/gguf.go b/llm/gguf.go index 804c3650..047d17cf 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -6,7 +6,6 @@ import ( "errors" "fmt" "io" - "log" "path" "sync" ) @@ -87,38 +86,37 @@ func (llm *ggufModel) NumKV() uint64 { return llm.V2.NumKV } -func (llm *ggufModel) ModelFamily() ModelFamily { +func (llm *ggufModel) ModelFamily() string { t, ok := llm.kv["general.architecture"].(string) if ok { - return ModelFamily(t) + return t } - log.Printf("unknown model family: %T", t) - return ModelFamilyUnknown + return "unknown" } -func (llm *ggufModel) ModelType() ModelType { +func (llm *ggufModel) ModelType() string { switch llm.ModelFamily() { - case ModelFamilyLlama: - blocks, ok := llm.kv["llama.block_count"].(uint32) - if ok { - return ModelType(blocks) + case "llama": + if blocks, ok := llm.kv["llama.block_count"].(uint32); ok { + return llamaModelType(blocks) + } + case "falcon": + if blocks, ok := llm.kv["falcon.block_count"].(uint32); ok { + return falconModelType(blocks) } } - return ModelType7B + return "Unknown" } -func (llm *ggufModel) FileType() FileType { - switch llm.ModelFamily() { - case ModelFamilyLlama: - t, ok := llm.kv["general.file_type"].(uint32) - if ok { - return llamaFileType(t) - } +func (llm *ggufModel) FileType() string { + t, ok := llm.kv["general.file_type"].(uint32) + if ok { + return fileType(t) } - return llamaFileTypeF16 + return "Unknown" } func (llm *ggufModel) Decode(r io.Reader) error { diff --git a/llm/llama.go b/llm/llama.go index 3153d091..4b7e6e01 100644 --- a/llm/llama.go +++ b/llm/llama.go @@ -95,38 +95,39 @@ func chooseRunner(gpuPath, cpuPath string) string { return runPath } -const ModelFamilyLlama ModelFamily = "llama" - type llamaModel struct { hyperparameters llamaHyperparameters } -func (llm *llamaModel) ModelFamily() ModelFamily { - return ModelFamilyLlama +func (llm *llamaModel) ModelFamily() string { + return "llama" } -func (llm *llamaModel) ModelType() ModelType { - switch llm.hyperparameters.NumLayer { +func llamaModelType(numLayer uint32) string { + switch numLayer { case 26: - return ModelType3B + return "3B" case 32: - return ModelType7B + return "7B" case 40: - return ModelType13B + return "13B" case 48: - return ModelType34B + return "34B" case 60: - return ModelType30B + return "30B" case 80: - return ModelType65B + return "65B" + default: + return "Unknown" } - - // TODO: find a better default - return ModelType7B } -func (llm *llamaModel) FileType() FileType { - return llm.hyperparameters.FileType +func (llm *llamaModel) ModelType() string { + return llamaModelType(llm.hyperparameters.NumLayer) +} + +func (llm *llamaModel) FileType() string { + return fileType(llm.hyperparameters.FileType) } type llamaHyperparameters struct { @@ -143,70 +144,7 @@ type llamaHyperparameters struct { NumRot uint32 // FileType describes the quantization level of the model, e.g. Q4_0, Q5_K, etc. - FileType llamaFileType -} - -type llamaFileType uint32 - -const ( - llamaFileTypeF32 llamaFileType = iota - llamaFileTypeF16 - llamaFileTypeQ4_0 - llamaFileTypeQ4_1 - llamaFileTypeQ4_1_F16 - llamaFileTypeQ8_0 llamaFileType = iota + 2 - llamaFileTypeQ5_0 - llamaFileTypeQ5_1 - llamaFileTypeQ2_K - llamaFileTypeQ3_K_S - llamaFileTypeQ3_K_M - llamaFileTypeQ3_K_L - llamaFileTypeQ4_K_S - llamaFileTypeQ4_K_M - llamaFileTypeQ5_K_S - llamaFileTypeQ5_K_M - llamaFileTypeQ6_K -) - -func (ft llamaFileType) String() string { - switch ft { - case llamaFileTypeF32: - return "F32" - case llamaFileTypeF16: - return "F16" - case llamaFileTypeQ4_0: - return "Q4_0" - case llamaFileTypeQ4_1: - return "Q4_1" - case llamaFileTypeQ4_1_F16: - return "Q4_1_F16" - case llamaFileTypeQ8_0: - return "Q8_0" - case llamaFileTypeQ5_0: - return "Q5_0" - case llamaFileTypeQ5_1: - return "Q5_1" - case llamaFileTypeQ2_K: - return "Q2_K" - case llamaFileTypeQ3_K_S: - return "Q3_K_S" - case llamaFileTypeQ3_K_M: - return "Q3_K_M" - case llamaFileTypeQ3_K_L: - return "Q3_K_L" - case llamaFileTypeQ4_K_S: - return "Q4_K_S" - case llamaFileTypeQ4_K_M: - return "Q4_K_M" - case llamaFileTypeQ5_K_S: - return "Q5_K_S" - case llamaFileTypeQ5_K_M: - return "Q5_K_M" - case llamaFileTypeQ6_K: - return "Q6_K" - default: - return "Unknown" - } + FileType uint32 } type Running struct { diff --git a/llm/llm.go b/llm/llm.go index de9d89c9..537898f5 100644 --- a/llm/llm.go +++ b/llm/llm.go @@ -37,7 +37,7 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) { return nil, err } - switch ggml.FileType().String() { + switch ggml.FileType() { case "Q8_0": if ggml.Name() != "gguf" && opts.NumGPU != 0 { // GGML Q8_0 do not support Metal API and will @@ -56,30 +56,36 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) { totalResidentMemory := memory.TotalMemory() switch ggml.ModelType() { - case ModelType3B, ModelType7B: - if ggml.FileType().String() == "F16" && totalResidentMemory < 16*1024*1024 { + case "3B", "7B": + if ggml.FileType() == "F16" && totalResidentMemory < 16*1024*1024 { return nil, fmt.Errorf("F16 model requires at least 16GB of memory") } else if totalResidentMemory < 8*1024*1024 { return nil, fmt.Errorf("model requires at least 8GB of memory") } - case ModelType13B: - if ggml.FileType().String() == "F16" && totalResidentMemory < 32*1024*1024 { + case "13B": + if ggml.FileType() == "F16" && totalResidentMemory < 32*1024*1024 { return nil, fmt.Errorf("F16 model requires at least 32GB of memory") } else if totalResidentMemory < 16*1024*1024 { return nil, fmt.Errorf("model requires at least 16GB of memory") } - case ModelType30B, ModelType34B: - if ggml.FileType().String() == "F16" && totalResidentMemory < 64*1024*1024 { + case "30B", "34B", "40B": + if ggml.FileType() == "F16" && totalResidentMemory < 64*1024*1024 { return nil, fmt.Errorf("F16 model requires at least 64GB of memory") } else if totalResidentMemory < 32*1024*1024 { return nil, fmt.Errorf("model requires at least 32GB of memory") } - case ModelType65B: - if ggml.FileType().String() == "F16" && totalResidentMemory < 128*1024*1024 { + case "65B", "70B": + if ggml.FileType() == "F16" && totalResidentMemory < 128*1024*1024 { return nil, fmt.Errorf("F16 model requires at least 128GB of memory") } else if totalResidentMemory < 64*1024*1024 { return nil, fmt.Errorf("model requires at least 64GB of memory") } + case "180B": + if ggml.FileType() == "F16" && totalResidentMemory < 512*1024*1024 { + return nil, fmt.Errorf("F16 model requires at least 512GB of memory") + } else if totalResidentMemory < 128*1024*1024 { + return nil, fmt.Errorf("model requires at least 128GB of memory") + } } switch ggml.Name() { diff --git a/server/images.go b/server/images.go index f4181f43..01ec4306 100644 --- a/server/images.go +++ b/server/images.go @@ -114,11 +114,11 @@ type LayerReader struct { } type ConfigV2 struct { - ModelFamily llm.ModelFamily `json:"model_family"` - ModelType string `json:"model_type"` - ModelFormat string `json:"model_format"` - FileType string `json:"file_type"` - RootFS RootFS `json:"rootfs"` + ModelFormat string `json:"model_format"` + ModelFamily string `json:"model_family"` + ModelType string `json:"model_type"` + FileType string `json:"file_type"` + RootFS RootFS `json:"rootfs"` // required by spec Architecture string `json:"architecture"` @@ -357,10 +357,10 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api return err } - config.ModelFamily = ggml.ModelFamily() - config.ModelType = ggml.ModelType().String() config.ModelFormat = ggml.Name() - config.FileType = ggml.FileType().String() + config.ModelFamily = ggml.ModelFamily() + config.ModelType = ggml.ModelType() + config.FileType = ggml.FileType() // reset the file file.Seek(0, io.SeekStart)