From 9685c34509db4c5b0bc20aff3bf9252921908055 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Fri, 12 Apr 2024 13:55:12 -0700 Subject: [PATCH 1/9] quantize any fp16/fp32 model - FROM /path/to/{safetensors,pytorch} - FROM /path/to/fp{16,32}.bin - FROM model:fp{16,32} --- convert/convert.go | 3 +- convert/gemma.go | 15 +- convert/llama.go | 13 +- convert/mistral.go | 15 +- llm/filetype.go | 138 +++++++++ llm/ggml.go | 95 ++---- llm/llm.go | 56 +--- server/images.go | 509 +++++++++++---------------------- server/{layers.go => layer.go} | 73 ++--- server/model.go | 254 ++++++++++++++++ server/routes.go | 7 +- types/ordered/map.go | 32 +++ 12 files changed, 654 insertions(+), 556 deletions(-) create mode 100644 llm/filetype.go rename server/{layers.go => layer.go} (53%) create mode 100644 server/model.go create mode 100644 types/ordered/map.go diff --git a/convert/convert.go b/convert/convert.go index 42de080c..f4210e50 100644 --- a/convert/convert.go +++ b/convert/convert.go @@ -5,6 +5,7 @@ import ( "encoding/binary" "encoding/json" "fmt" + "io" "log/slog" "os" "path/filepath" @@ -47,7 +48,7 @@ type ByteOrder interface { type ModelArch interface { GetTensors() error LoadVocab() error - WriteGGUF() (string, error) + WriteGGUF(io.WriteSeeker) error } type ModelFormat interface { diff --git a/convert/gemma.go b/convert/gemma.go index 648a4ad9..88abe646 100644 --- a/convert/gemma.go +++ b/convert/gemma.go @@ -94,7 +94,7 @@ func (m *GemmaModel) LoadVocab() error { return nil } -func (m *GemmaModel) WriteGGUF() (string, error) { +func (m *GemmaModel) WriteGGUF(ws io.WriteSeeker) error { kv := llm.KV{ "general.architecture": "gemma", "general.name": m.Name, @@ -122,16 +122,5 @@ func (m *GemmaModel) WriteGGUF() (string, error) { "tokenizer.ggml.add_eos_token": false, } - f, err := os.CreateTemp("", "ollama-gguf") - if err != nil { - return "", err - } - defer f.Close() - - mod := llm.NewGGUFV3(m.Params.ByteOrder) - if err := mod.Encode(f, kv, m.Tensors); err != nil { - return "", err - } - - return f.Name(), nil + return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors) } diff --git a/convert/llama.go b/convert/llama.go index c7f7b290..6da46ec9 100644 --- a/convert/llama.go +++ b/convert/llama.go @@ -132,7 +132,7 @@ func (m *LlamaModel) LoadVocab() error { return nil } -func (m *LlamaModel) WriteGGUF() (string, error) { +func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error { kv := llm.KV{ "general.architecture": "llama", "general.name": m.Name, @@ -161,16 +161,9 @@ func (m *LlamaModel) WriteGGUF() (string, error) { f, err := os.CreateTemp("", "ollama-gguf") if err != nil { - return "", err + return err } defer f.Close() - mod := llm.NewGGUFV3(m.Params.ByteOrder) - if err := mod.Encode(f, kv, m.Tensors); err != nil { - return "", err - } - - slog.Debug(fmt.Sprintf("gguf file = %s", f.Name())) - - return f.Name(), nil + return llm.NewGGUFV3(m.Params.ByteOrder).Encode(f, kv, m.Tensors) } diff --git a/convert/mistral.go b/convert/mistral.go index 70c92edd..f88de12b 100644 --- a/convert/mistral.go +++ b/convert/mistral.go @@ -132,7 +132,7 @@ func (m *MistralModel) LoadVocab() error { return nil } -func (m *MistralModel) WriteGGUF() (string, error) { +func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error { kv := llm.KV{ "general.architecture": "llama", "general.name": m.Name, @@ -158,16 +158,5 @@ func (m *MistralModel) WriteGGUF() (string, error) { "tokenizer.ggml.unknown_token_id": uint32(0), } - f, err := os.CreateTemp("", "ollama-gguf") - if err != nil { - return "", err - } - defer f.Close() - - mod := llm.NewGGUFV3(m.Params.ByteOrder) - if err := mod.Encode(f, kv, m.Tensors); err != nil { - return "", err - } - - return f.Name(), nil + return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors) } diff --git a/llm/filetype.go b/llm/filetype.go new file mode 100644 index 00000000..bae2f5d1 --- /dev/null +++ b/llm/filetype.go @@ -0,0 +1,138 @@ +package llm + +import "fmt" + +type filetype uint32 + +const ( + filetypeF32 filetype = iota + filetypeF16 + filetypeQ4_0 + filetypeQ4_1 + filetypeQ4_1_F16 + filetypeQ8_0 filetype = iota + 2 + filetypeQ5_0 + filetypeQ5_1 + filetypeQ2_K + filetypeQ3_K_S + filetypeQ3_K_M + filetypeQ3_K_L + filetypeQ4_K_S + filetypeQ4_K_M + filetypeQ5_K_S + filetypeQ5_K_M + filetypeQ6_K + filetypeIQ2_XXS + filetypeIQ2_XS + filetypeQ2_K_S + filetypeQ3_K_XS + filetypeIQ3_XXS + + filetypeUnknown +) + +func ParseFileType(s string) (filetype, error) { + switch s { + case "F32": + return filetypeF32, nil + case "F16": + return filetypeF16, nil + case "Q4_0": + return filetypeQ4_0, nil + case "Q4_1": + return filetypeQ4_1, nil + case "Q4_1_F16": + return filetypeQ4_1_F16, nil + case "Q8_0": + return filetypeQ8_0, nil + case "Q5_0": + return filetypeQ5_0, nil + case "Q5_1": + return filetypeQ5_1, nil + case "Q2_K": + return filetypeQ2_K, nil + case "Q3_K_S": + return filetypeQ3_K_S, nil + case "Q3_K_M": + return filetypeQ3_K_M, nil + case "Q3_K_L": + return filetypeQ3_K_L, nil + case "Q4_K_S": + return filetypeQ4_K_S, nil + case "Q4_K_M": + return filetypeQ4_K_M, nil + case "Q5_K_S": + return filetypeQ5_K_S, nil + case "Q5_K_M": + return filetypeQ5_K_M, nil + case "Q6_K": + return filetypeQ6_K, nil + case "IQ2_XXS": + return filetypeIQ2_XXS, nil + case "IQ2_XS": + return filetypeIQ2_XS, nil + case "Q2_K_S": + return filetypeQ2_K_S, nil + case "Q3_K_XS": + return filetypeQ3_K_XS, nil + case "IQ3_XXS": + return filetypeIQ3_XXS, nil + default: + return filetypeUnknown, fmt.Errorf("unknown filetype: %s", s) + } +} + +func (t filetype) String() string { + switch t { + case filetypeF32: + return "F32" + case filetypeF16: + return "F16" + case filetypeQ4_0: + return "Q4_0" + case filetypeQ4_1: + return "Q4_1" + case filetypeQ4_1_F16: + return "Q4_1_F16" + case filetypeQ8_0: + return "Q8_0" + case filetypeQ5_0: + return "Q5_0" + case filetypeQ5_1: + return "Q5_1" + case filetypeQ2_K: + return "Q2_K" + case filetypeQ3_K_S: + return "Q3_K_S" + case filetypeQ3_K_M: + return "Q3_K_M" + case filetypeQ3_K_L: + return "Q3_K_L" + case filetypeQ4_K_S: + return "Q4_K_S" + case filetypeQ4_K_M: + return "Q4_K_M" + case filetypeQ5_K_S: + return "Q5_K_S" + case filetypeQ5_K_M: + return "Q5_K_M" + case filetypeQ6_K: + return "Q6_K" + case filetypeIQ2_XXS: + return "IQ2_XXS" + case filetypeIQ2_XS: + return "IQ2_XS" + case filetypeQ2_K_S: + return "Q2_K_S" + case filetypeQ3_K_XS: + return "Q3_K_XS" + case filetypeIQ3_XXS: + return "IQ3_XXS" + default: + return "unknown" + } +} + +func (t filetype) Value() uint32 { + return uint32(t) +} diff --git a/llm/ggml.go b/llm/ggml.go index 1b094027..a7dbfeb3 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -13,82 +13,6 @@ type GGML struct { model } -const ( - fileTypeF32 uint32 = iota - fileTypeF16 - fileTypeQ4_0 - fileTypeQ4_1 - fileTypeQ4_1_F16 - fileTypeQ8_0 uint32 = iota + 2 - fileTypeQ5_0 - fileTypeQ5_1 - fileTypeQ2_K - fileTypeQ3_K_S - fileTypeQ3_K_M - fileTypeQ3_K_L - fileTypeQ4_K_S - fileTypeQ4_K_M - fileTypeQ5_K_S - fileTypeQ5_K_M - fileTypeQ6_K - fileTypeIQ2_XXS - fileTypeIQ2_XS - fileTypeQ2_K_S - fileTypeQ3_K_XS - fileTypeIQ3_XXS -) - -func fileType(fileType uint32) string { - switch fileType { - case fileTypeF32: - return "F32" - case fileTypeF16: - return "F16" - case fileTypeQ4_0: - return "Q4_0" - case fileTypeQ4_1: - return "Q4_1" - case fileTypeQ4_1_F16: - return "Q4_1_F16" - case fileTypeQ8_0: - return "Q8_0" - case fileTypeQ5_0: - return "Q5_0" - case fileTypeQ5_1: - return "Q5_1" - case fileTypeQ2_K: - return "Q2_K" - case fileTypeQ3_K_S: - return "Q3_K_S" - case fileTypeQ3_K_M: - return "Q3_K_M" - case fileTypeQ3_K_L: - return "Q3_K_L" - case fileTypeQ4_K_S: - return "Q4_K_S" - case fileTypeQ4_K_M: - return "Q4_K_M" - case fileTypeQ5_K_S: - return "Q5_K_S" - case fileTypeQ5_K_M: - return "Q5_K_M" - case fileTypeQ6_K: - return "Q6_K" - case fileTypeIQ2_XXS: - return "IQ2_XXS" - case fileTypeIQ2_XS: - return "IQ2_XS" - case fileTypeQ2_K_S: - return "Q2_K_S" - case fileTypeQ3_K_XS: - return "Q3_K_XS" - case fileTypeIQ3_XXS: - return "IQ3_XXS" - default: - return "unknown" - } -} - type model interface { KV() KV Tensors() Tensors @@ -123,7 +47,7 @@ func (kv KV) ParameterCount() uint64 { func (kv KV) FileType() string { if u64 := kv.u64("general.file_type"); u64 > 0 { - return fileType(uint32(u64)) + return filetype(uint32(u64)).String() } return "unknown" @@ -286,6 +210,23 @@ const ( var ErrUnsupportedFormat = errors.New("unsupported model format") +func DetectGGMLType(b []byte) string { + switch binary.LittleEndian.Uint32(b[:4]) { + case FILE_MAGIC_GGML: + return "ggml" + case FILE_MAGIC_GGMF: + return "ggmf" + case FILE_MAGIC_GGJT: + return "ggjt" + case FILE_MAGIC_GGLA: + return "ggla" + case FILE_MAGIC_GGUF_LE, FILE_MAGIC_GGUF_BE: + return "gguf" + default: + return "" + } +} + func DecodeGGML(rs io.ReadSeeker) (*GGML, int64, error) { var magic uint32 if err := binary.Read(rs, binary.LittleEndian, &magic); err != nil { diff --git a/llm/llm.go b/llm/llm.go index c81e2edf..0e96511f 100644 --- a/llm/llm.go +++ b/llm/llm.go @@ -20,7 +20,7 @@ func SystemInfo() string { return C.GoString(C.llama_print_system_info()) } -func Quantize(infile, outfile, filetype string) error { +func Quantize(infile, outfile string, ftype filetype) error { cinfile := C.CString(infile) defer C.free(unsafe.Pointer(cinfile)) @@ -29,58 +29,10 @@ func Quantize(infile, outfile, filetype string) error { params := C.llama_model_quantize_default_params() params.nthread = -1 + params.ftype = ftype.Value() - switch filetype { - case "F32": - params.ftype = fileTypeF32 - case "F16": - params.ftype = fileTypeF16 - case "Q4_0": - params.ftype = fileTypeQ4_0 - case "Q4_1": - params.ftype = fileTypeQ4_1 - case "Q4_1_F16": - params.ftype = fileTypeQ4_1_F16 - case "Q8_0": - params.ftype = fileTypeQ8_0 - case "Q5_0": - params.ftype = fileTypeQ5_0 - case "Q5_1": - params.ftype = fileTypeQ5_1 - case "Q2_K": - params.ftype = fileTypeQ2_K - case "Q3_K_S": - params.ftype = fileTypeQ3_K_S - case "Q3_K_M": - params.ftype = fileTypeQ3_K_M - case "Q3_K_L": - params.ftype = fileTypeQ3_K_L - case "Q4_K_S": - params.ftype = fileTypeQ4_K_S - case "Q4_K_M": - params.ftype = fileTypeQ4_K_M - case "Q5_K_S": - params.ftype = fileTypeQ5_K_S - case "Q5_K_M": - params.ftype = fileTypeQ5_K_M - case "Q6_K": - params.ftype = fileTypeQ6_K - case "IQ2_XXS": - params.ftype = fileTypeIQ2_XXS - case "IQ2_XS": - params.ftype = fileTypeIQ2_XS - case "Q2_K_S": - params.ftype = fileTypeQ2_K_S - case "Q3_K_XS": - params.ftype = fileTypeQ3_K_XS - case "IQ3_XXS": - params.ftype = fileTypeIQ3_XXS - default: - return fmt.Errorf("unknown filetype: %s", filetype) - } - - if retval := C.llama_model_quantize(cinfile, coutfile, ¶ms); retval != 0 { - return fmt.Errorf("llama_model_quantize: %d", retval) + if rc := C.llama_model_quantize(cinfile, coutfile, ¶ms); rc != 0 { + return fmt.Errorf("llama_model_quantize: %d", rc) } return nil diff --git a/server/images.go b/server/images.go index 76205392..1de10929 100644 --- a/server/images.go +++ b/server/images.go @@ -1,8 +1,8 @@ package server import ( - "archive/zip" "bytes" + "cmp" "context" "crypto/sha256" "encoding/base64" @@ -11,7 +11,6 @@ import ( "errors" "fmt" "io" - "io/fs" "log" "log/slog" "net/http" @@ -26,12 +25,12 @@ import ( "github.com/ollama/ollama/api" "github.com/ollama/ollama/auth" - "github.com/ollama/ollama/convert" "github.com/ollama/ollama/format" "github.com/ollama/ollama/llm" "github.com/ollama/ollama/server/envconfig" "github.com/ollama/ollama/types/errtypes" "github.com/ollama/ollama/types/model" + "github.com/ollama/ollama/types/ordered" "github.com/ollama/ollama/version" ) @@ -158,36 +157,6 @@ type ConfigV2 struct { RootFS RootFS `json:"rootfs"` } -func (c *ConfigV2) SetModelFormat(format string) { - if c.ModelFormat == "" { - c.ModelFormat = format - } -} - -func (c *ConfigV2) SetModelFamily(families ...string) { - for _, family := range families { - if c.ModelFamily == "" { - c.ModelFamily = family - } - - if !slices.Contains(c.ModelFamilies, family) { - c.ModelFamilies = append(c.ModelFamilies, family) - } - } -} - -func (c *ConfigV2) SetModelType(modelType string) { - if c.ModelType == "" { - c.ModelType = modelType - } -} - -func (c *ConfigV2) SetFileType(fileType string) { - if c.FileType == "" { - c.FileType = fileType - } -} - type RootFS struct { Type string `json:"type"` DiffIDs []string `json:"diff_ids"` @@ -332,7 +301,7 @@ func GetModel(name string) (*Model, error) { return model, nil } -func realpath(mfDir, from string) string { +func realpath(rel, from string) string { abspath, err := filepath.Abs(from) if err != nil { return from @@ -349,22 +318,15 @@ func realpath(mfDir, from string) string { return filepath.Join(home, from[2:]) } - if _, err := os.Stat(filepath.Join(mfDir, from)); err == nil { + if _, err := os.Stat(filepath.Join(rel, from)); err == nil { // this is a file relative to the Modelfile - return filepath.Join(mfDir, from) + return filepath.Join(rel, from) } return abspath } -func CreateModel(ctx context.Context, name, modelFileDir, quantization string, modelfile *model.File, fn func(resp api.ProgressResponse)) error { - deleteMap := make(map[string]struct{}) - if manifest, _, err := GetManifest(ParseModelPath(name)); err == nil { - for _, layer := range append(manifest.Layers, manifest.Config) { - deleteMap[layer.Digest] = struct{}{} - } - } - +func CreateModel(ctx context.Context, name, modelFileDir, quantization string, modelfile *model.File, fn func(resp api.ProgressResponse)) (err error) { config := ConfigV2{ OS: "linux", Architecture: "amd64", @@ -373,250 +335,197 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, m }, } - var layers Layers - messages := []string{} - - params := make(map[string][]string) - fromParams := make(map[string]any) + var messages []*api.Message + parameters := make(map[string]any) + var layers []*Layer for _, c := range modelfile.Commands { mediatype := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name) switch c.Name { - case "model": - if strings.HasPrefix(c.Args, "@") { - blobPath, err := GetBlobsPath(strings.TrimPrefix(c.Args, "@")) + case "model", "adapter": + var layers2 *ordered.Map[*Layer, *llm.GGML] + if name := model.ParseName(c.Args, ""); name.IsValid() { + layers2, err = parseFromModel(ctx, name, fn) + if err != nil { + return err + } + } else if strings.HasPrefix(c.Args, "@") { + blobpath, err := GetBlobsPath(strings.TrimPrefix(c.Args, "@")) if err != nil { return err } - c.Args = blobPath - } - - pathName := realpath(modelFileDir, c.Args) - - ggufName, err := convertModel(name, pathName, fn) - if err != nil { - var pathErr *fs.PathError - switch { - case errors.Is(err, zip.ErrFormat): - // it's not a safetensor archive - case errors.As(err, &pathErr): - // it's not a file on disk, could be a model reference - default: + blob, err := os.Open(blobpath) + if err != nil { return err } + defer blob.Close() + + layers2, err = parseFromFile(ctx, blob, fn) + if err != nil { + return err + } + } else if file, err := os.Open(realpath(modelFileDir, c.Args)); err == nil { + defer file.Close() + + layers2, err = parseFromFile(ctx, file, fn) + if err != nil { + return err + } + } else { + return fmt.Errorf("invalid model reference: %s", c.Args) } - if ggufName != "" { - pathName = ggufName - defer os.RemoveAll(ggufName) + var err2 error + var tempfiles []*os.File - if quantization != "" { - quantization = strings.ToUpper(quantization) - fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", "F16", quantization)}) - tempfile, err := os.CreateTemp(filepath.Dir(ggufName), quantization) + // TODO(mxyng): replace with rangefunc + layers2.Items()(func(layer *Layer, ggml *llm.GGML) bool { + if quantization != "" && ggml != nil && ggml.Name() == "gguf" { + ftype, err := llm.ParseFileType(quantization) if err != nil { - return err - } - defer os.RemoveAll(tempfile.Name()) - - if err := llm.Quantize(ggufName, tempfile.Name(), quantization); err != nil { - return err + err2 = err + return false } - if err := tempfile.Close(); err != nil { - return err + filetype := ggml.KV().FileType() + if !slices.Contains([]string{"F16", "F32"}, filetype) { + err2 = errors.New("quantization is only supported for F16 and F32 models") + return false } - pathName = tempfile.Name() - } - } + fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", filetype, quantization)}) - bin, err := os.Open(pathName) - if err != nil { - // not a file on disk so must be a model reference - modelpath := ParseModelPath(c.Args) - manifest, _, err := GetManifest(modelpath) - switch { - case errors.Is(err, os.ErrNotExist): - fn(api.ProgressResponse{Status: "pulling model"}) - if err := PullModel(ctx, c.Args, ®istryOptions{}, fn); err != nil { - return err - } - - manifest, _, err = GetManifest(modelpath) + blob, err := GetBlobsPath(layer.Digest) if err != nil { - return err - } - case err != nil: - return err - } - - fn(api.ProgressResponse{Status: "reading model metadata"}) - fromConfigPath, err := GetBlobsPath(manifest.Config.Digest) - if err != nil { - return err - } - - fromConfigFile, err := os.Open(fromConfigPath) - if err != nil { - return err - } - defer fromConfigFile.Close() - - var fromConfig ConfigV2 - if err := json.NewDecoder(fromConfigFile).Decode(&fromConfig); err != nil { - return err - } - - // if the model is still not in gguf format, error out - if fromConfig.ModelFormat != "gguf" { - return fmt.Errorf("%s is not in gguf format, this base model is not compatible with this version of ollama", c.Args) - } - - config.SetModelFormat(fromConfig.ModelFormat) - config.SetModelFamily(append(fromConfig.ModelFamilies, fromConfig.ModelFamily)...) - config.SetModelType(fromConfig.ModelType) - config.SetFileType(fromConfig.FileType) - - for _, layer := range manifest.Layers { - deleteMap[layer.Digest] = struct{}{} - if layer.MediaType == "application/vnd.ollama.image.params" { - fromParamsPath, err := GetBlobsPath(layer.Digest) - if err != nil { - return err - } - - fromParamsFile, err := os.Open(fromParamsPath) - if err != nil { - return err - } - defer fromParamsFile.Close() - - if err := json.NewDecoder(fromParamsFile).Decode(&fromParams); err != nil { - return err - } + err2 = err + return false } - layer, err := NewLayerFromLayer(layer.Digest, layer.MediaType, modelpath.GetShortTagname()) + temp, err := os.CreateTemp(filepath.Dir(blob), quantization) if err != nil { - return err + err2 = err + return false + } + tempfiles = append(tempfiles, temp) + + if err := llm.Quantize(blob, temp.Name(), ftype); err != nil { + err2 = err + return false } - layers.Add(layer) + layer, err = NewLayer(temp, layer.MediaType) + if err != nil { + err2 = err + return false + } } - deleteMap[manifest.Config.Digest] = struct{}{} - continue - } - defer bin.Close() - - var offset int64 - for { - fn(api.ProgressResponse{Status: "creating model layer"}) - if _, err := bin.Seek(offset, io.SeekStart); err != nil { - return err + if ggml != nil { + config.ModelFormat = cmp.Or(config.ModelFormat, ggml.Name()) + config.ModelFamily = cmp.Or(config.ModelFamily, ggml.KV().Architecture()) + config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(ggml.KV().ParameterCount())) + config.FileType = cmp.Or(config.FileType, ggml.KV().FileType()) + config.ModelFamilies = append(config.ModelFamilies, ggml.KV().Architecture()) } - ggml, size, err := llm.DecodeGGML(bin) - if errors.Is(err, io.EOF) { - break - } else if errors.Is(err, llm.ErrUnsupportedFormat) { - return fmt.Errorf("model binary specified in FROM field is not a valid gguf format model, %w", err) - } else if err != nil { - return err - } + layers = append(layers, layer) + return true + }) - config.SetModelFormat(ggml.Name()) - config.SetModelFamily(ggml.KV().Architecture()) - config.SetModelType(format.HumanNumber(ggml.KV().ParameterCount())) - config.SetFileType(ggml.KV().FileType()) - - mediatype := mediatype - if ggml.KV().Architecture() == "clip" { - mediatype = "application/vnd.ollama.image.projector" - } - - sr := io.NewSectionReader(bin, offset, size) - layer, err := NewLayer(sr, mediatype) - if err != nil { - return err - } - - layers.Add(layer) - - offset += size - } - case "adapter": - if strings.HasPrefix(c.Args, "@") { - blobPath, err := GetBlobsPath(strings.TrimPrefix(c.Args, "@")) - if err != nil { - return err - } - - c.Args = blobPath + for _, tempfile := range tempfiles { + defer tempfile.Close() + defer os.Remove(tempfile.Name()) } - fn(api.ProgressResponse{Status: "creating adapter layer"}) - bin, err := os.Open(realpath(modelFileDir, c.Args)) - if err != nil { - return err + if err2 != nil { + return err2 } - defer bin.Close() - - _, size, err := llm.DecodeGGML(bin) + case "license", "template", "system": + blob := strings.NewReader(c.Args) + layer, err := NewLayer(blob, mediatype) if err != nil { return err } - sr := io.NewSectionReader(bin, 0, size) - layer, err := NewLayer(sr, mediatype) - if err != nil { - return err + if c.Name != "license" { + // replace + layers = slices.DeleteFunc(layers, func(layer *Layer) bool { + return layer.MediaType == mediatype + }) } - layers.Add(layer) - case "license": - fn(api.ProgressResponse{Status: "creating license layer"}) - - bin := strings.NewReader(c.Args) - layer, err := NewLayer(bin, mediatype) - if err != nil { - return err - } - - layers.Add(layer) - case "template", "system": - fn(api.ProgressResponse{Status: fmt.Sprintf("creating %s layer", c.Name)}) - - bin := strings.NewReader(c.Args) - layer, err := NewLayer(bin, mediatype) - if err != nil { - return err - } - - layers.Replace(layer) + layers = append(layers, layer) case "message": - messages = append(messages, c.Args) + role, content, ok := strings.Cut(c.Args, ": ") + if !ok { + return fmt.Errorf("invalid message: %s", c.Args) + } + + messages = append(messages, &api.Message{Role: role, Content: content}) default: - params[c.Name] = append(params[c.Name], c.Args) + ps, err := api.FormatParams(map[string][]string{c.Name: {c.Args}}) + if err != nil { + return err + } + + for k, v := range ps { + if ks, ok := parameters[k].([]string); ok { + parameters[k] = append(ks, v.([]string)...) + } else if vs, ok := v.([]string); ok { + parameters[k] = vs + } else { + parameters[k] = v + } + } } } - if len(messages) > 0 { - fn(api.ProgressResponse{Status: "creating parameters layer"}) + var err2 error + layers = slices.DeleteFunc(layers, func(layer *Layer) bool { + switch layer.MediaType { + case "application/vnd.ollama.image.message": + // if there are new messages, remove the inherited ones + if len(messages) > 0 { + return true + } - msgs := make([]api.Message, 0) + return false + case "application/vnd.ollama.image.params": + // merge inherited parameters with new ones + r, err := layer.Open() + if err != nil { + err2 = err + return false + } + defer r.Close() - for _, m := range messages { - // todo: handle images - msg := strings.SplitN(m, ": ", 2) - msgs = append(msgs, api.Message{Role: msg[0], Content: msg[1]}) + var ps map[string]any + if err := json.NewDecoder(r).Decode(&ps); err != nil { + err2 = err + return false + } + + for k, v := range ps { + if _, ok := parameters[k]; !ok { + parameters[k] = v + } + } + + return true + default: + return false } + }) + if err2 != nil { + return err2 + } + + if len(messages) > 0 { var b bytes.Buffer - if err := json.NewEncoder(&b).Encode(msgs); err != nil { + if err := json.NewEncoder(&b).Encode(messages); err != nil { return err } @@ -625,39 +534,25 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, m return err } - layers.Replace(layer) + layers = append(layers, layer) } - if len(params) > 0 { - fn(api.ProgressResponse{Status: "creating parameters layer"}) - - formattedParams, err := api.FormatParams(params) - if err != nil { - return err - } - - for k, v := range fromParams { - if _, ok := formattedParams[k]; !ok { - formattedParams[k] = v - } - } - + if len(parameters) > 0 { var b bytes.Buffer - if err := json.NewEncoder(&b).Encode(formattedParams); err != nil { + if err := json.NewEncoder(&b).Encode(parameters); err != nil { return err } - fn(api.ProgressResponse{Status: "creating config layer"}) layer, err := NewLayer(&b, "application/vnd.ollama.image.params") if err != nil { return err } - layers.Replace(layer) + layers = append(layers, layer) } - digests := make([]string, len(layers.items)) - for i, layer := range layers.items { + digests := make([]string, len(layers)) + for i, layer := range layers { digests[i] = layer.Digest } @@ -668,36 +563,37 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, m return err } - configLayer, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json") + layer, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json") if err != nil { return err } - delete(deleteMap, configLayer.Digest) + for _, layer := range append(layers, layer) { + if layer.status != "" { + fn(api.ProgressResponse{Status: layer.status}) + } + } - for _, layer := range append(layers.items, configLayer) { - committed, err := layer.Commit() - if err != nil { - return err + unref := make(map[string]struct{}) + if manifest, _, err := GetManifest(ParseModelPath(name)); err == nil { + for _, layer := range manifest.Layers { + if !slices.Contains(digests, layer.Digest) { + unref[layer.Digest] = struct{}{} + } } - status := "writing layer" - if !committed { - status = "using already created layer" + if manifest.Config.Digest != layer.Digest { + unref[manifest.Config.Digest] = struct{}{} } - - fn(api.ProgressResponse{Status: fmt.Sprintf("%s %s", status, layer.Digest)}) - - delete(deleteMap, layer.Digest) } fn(api.ProgressResponse{Status: "writing manifest"}) - if err := WriteManifest(name, configLayer, layers.items); err != nil { + if err := WriteManifest(name, layer, layers); err != nil { return err } if !envconfig.NoPrune { - if err := deleteUnusedLayers(nil, deleteMap, false); err != nil { + if err := deleteUnusedLayers(nil, unref, false); err != nil { return err } } @@ -706,73 +602,6 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, m return nil } -func convertModel(name, path string, fn func(resp api.ProgressResponse)) (string, error) { - r, err := zip.OpenReader(path) - if err != nil { - return "", err - } - defer r.Close() - - tempDir, err := os.MkdirTemp("", "ollama-convert") - if err != nil { - return "", err - } - defer os.RemoveAll(tempDir) - - fn(api.ProgressResponse{Status: "unpacking model metadata"}) - for _, f := range r.File { - fpath := filepath.Join(tempDir, f.Name) - outFile, err := os.OpenFile(fpath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode()) - if err != nil { - return "", err - } - - rc, err := f.Open() - if err != nil { - return "", err - } - - _, err = io.Copy(outFile, rc) - if err != nil { - return "", err - } - - outFile.Close() - rc.Close() - } - - mf, err := convert.GetModelFormat(tempDir) - if err != nil { - return "", err - } - - params, err := mf.GetParams(tempDir) - if err != nil { - return "", err - } - - mArch, err := mf.GetModelArch(name, tempDir, params) - if err != nil { - return "", err - } - - fn(api.ProgressResponse{Status: "processing tensors"}) - if err := mArch.GetTensors(); err != nil { - return "", err - } - - if err := mArch.LoadVocab(); err != nil { - return "", err - } - - fn(api.ProgressResponse{Status: "converting model"}) - path, err = mArch.WriteGGUF() - if err != nil { - return "", err - } - - return path, nil -} func CopyModel(src, dst model.Name) error { if !dst.IsFullyQualified() { diff --git a/server/layers.go b/server/layer.go similarity index 53% rename from server/layers.go rename to server/layer.go index 07787406..dcca3854 100644 --- a/server/layers.go +++ b/server/layer.go @@ -5,39 +5,14 @@ import ( "fmt" "io" "os" - "strings" - - "golang.org/x/exp/slices" ) -type Layers struct { - items []*Layer -} - -func (ls *Layers) Add(layer *Layer) { - if layer.Size > 0 { - ls.items = append(ls.items, layer) - } -} - -func (ls *Layers) Replace(layer *Layer) { - if layer.Size > 0 { - mediatype := layer.MediaType - layers := slices.DeleteFunc(ls.items, func(l *Layer) bool { - return l.MediaType == mediatype - }) - - ls.items = append(layers, layer) - } -} - type Layer struct { MediaType string `json:"mediaType"` Digest string `json:"digest"` Size int64 `json:"size"` From string `json:"from,omitempty"` - - tempFileName string + status string } func NewLayer(r io.Reader, mediatype string) (*Layer, error) { @@ -46,14 +21,12 @@ func NewLayer(r io.Reader, mediatype string) (*Layer, error) { return nil, err } - const delimiter = "-" - - pattern := strings.Join([]string{"sha256", "*-partial"}, delimiter) - temp, err := os.CreateTemp(blobs, pattern) + temp, err := os.CreateTemp(blobs, "sha256-") if err != nil { return nil, err } defer temp.Close() + defer os.Remove(temp.Name()) sha256sum := sha256.New() n, err := io.Copy(io.MultiWriter(temp, sha256sum), r) @@ -61,11 +34,29 @@ func NewLayer(r io.Reader, mediatype string) (*Layer, error) { return nil, err } + if err := temp.Close(); err != nil { + return nil, err + } + + digest := fmt.Sprintf("sha256:%x", sha256sum.Sum(nil)) + blob, err := GetBlobsPath(digest) + if err != nil { + return nil, err + } + + status := "using existing layer" + if _, err := os.Stat(blob); err != nil { + status = "creating new layer" + if err := os.Rename(temp.Name(), blob); err != nil { + return nil, err + } + } + return &Layer{ - MediaType: mediatype, - Digest: fmt.Sprintf("sha256:%x", sha256sum.Sum(nil)), - Size: n, - tempFileName: temp.Name(), + MediaType: mediatype, + Digest: digest, + Size: n, + status: fmt.Sprintf("%s %s", status, digest), }, nil } @@ -85,21 +76,15 @@ func NewLayerFromLayer(digest, mediatype, from string) (*Layer, error) { Digest: digest, Size: fi.Size(), From: from, + status: fmt.Sprintf("using existing layer %s", digest), }, nil } -func (l *Layer) Commit() (bool, error) { - // always remove temp - defer os.Remove(l.tempFileName) - +func (l *Layer) Open() (io.ReadCloser, error) { blob, err := GetBlobsPath(l.Digest) if err != nil { - return false, err + return nil, err } - if _, err := os.Stat(blob); err != nil { - return true, os.Rename(l.tempFileName, blob) - } - - return false, nil + return os.Open(blob) } diff --git a/server/model.go b/server/model.go new file mode 100644 index 00000000..2d7797f0 --- /dev/null +++ b/server/model.go @@ -0,0 +1,254 @@ +package server + +import ( + "archive/zip" + "bytes" + "context" + "errors" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + + "github.com/ollama/ollama/api" + "github.com/ollama/ollama/convert" + "github.com/ollama/ollama/llm" + "github.com/ollama/ollama/types/model" + "github.com/ollama/ollama/types/ordered" +) + +func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressResponse)) (*ordered.Map[*Layer, *llm.GGML], error) { + modelpath := ParseModelPath(name.DisplayLongest()) + manifest, _, err := GetManifest(modelpath) + switch { + case errors.Is(err, os.ErrNotExist): + if err := PullModel(ctx, name.DisplayLongest(), ®istryOptions{}, fn); err != nil { + return nil, err + } + + return parseFromModel(ctx, name, fn) + case err != nil: + return nil, err + } + + layers := ordered.NewMap[*Layer, *llm.GGML]() + for _, layer := range manifest.Layers { + layer, err := NewLayerFromLayer(layer.Digest, layer.MediaType, modelpath.GetShortTagname()) + if err != nil { + return nil, err + } + + switch layer.MediaType { + case "application/vnd.ollama.image.model", + "application/vnd.ollama.image.projector", + "application/vnd.ollama.image.adapter": + blobpath, err := GetBlobsPath(layer.Digest) + if err != nil { + return nil, err + } + + blob, err := os.Open(blobpath) + if err != nil { + return nil, err + } + defer blob.Close() + + ggml, _, err := llm.DecodeGGML(blob) + if err != nil { + return nil, err + } + layers.Add(layer, ggml) + default: + layers.Add(layer, nil) + } + + } + + return layers, nil +} + +func parseFromZipFile(_ context.Context, file *os.File, fn func(api.ProgressResponse)) (*ordered.Map[*Layer, *llm.GGML], error) { + stat, err := file.Stat() + if err != nil { + return nil, err + } + + r, err := zip.NewReader(file, stat.Size()) + if err != nil { + return nil, err + } + + tempdir, err := os.MkdirTemp(filepath.Dir(file.Name()), "") + if err != nil { + return nil, err + } + defer os.RemoveAll(tempdir) + + fn(api.ProgressResponse{Status: "unpacking model metadata"}) + for _, f := range r.File { + // TODO(mxyng): this should not write out all files to disk + outfile, err := os.Create(filepath.Join(tempdir, f.Name)) + if err != nil { + return nil, err + } + + infile, err := f.Open() + if err != nil { + return nil, err + } + + if _, err = io.Copy(outfile, infile); err != nil { + return nil, err + } + + if err := outfile.Close(); err != nil { + return nil, err + } + + if err := infile.Close(); err != nil { + return nil, err + } + } + + mf, err := convert.GetModelFormat(tempdir) + if err != nil { + return nil, err + } + + params, err := mf.GetParams(tempdir) + if err != nil { + return nil, err + } + + mArch, err := mf.GetModelArch("", tempdir, params) + if err != nil { + return nil, err + } + + fn(api.ProgressResponse{Status: "processing tensors"}) + if err := mArch.GetTensors(); err != nil { + return nil, err + } + + if err := mArch.LoadVocab(); err != nil { + return nil, err + } + + fn(api.ProgressResponse{Status: "converting model"}) + + // TODO(mxyng): this should write directly into a layer + // e.g. NewLayer(arch.Reader(), "application/vnd.ollama.image.model") + temp, err := os.CreateTemp(tempdir, "fp16") + if err != nil { + return nil, err + } + defer temp.Close() + defer os.Remove(temp.Name()) + + if err = mArch.WriteGGUF(temp); err != nil { + return nil, err + } + + if _, err := temp.Seek(0, io.SeekStart); err != nil { + return nil, err + } + + layer, err := NewLayer(temp, "application/vnd.ollama.image.model") + if err != nil { + return nil, fmt.Errorf("aaa: %w", err) + } + + blobpath, err := GetBlobsPath(layer.Digest) + if err != nil { + return nil, err + } + + bin, err := os.Open(blobpath) + if err != nil { + return nil, err + } + defer bin.Close() + + ggml, _, err := llm.DecodeGGML(bin) + if err != nil { + return nil, err + } + + layer, err = NewLayerFromLayer(layer.Digest, layer.MediaType, "") + if err != nil { + return nil, err + } + + layers := ordered.NewMap[*Layer, *llm.GGML]() + layers.Add(layer, ggml) + return layers, nil +} + +func parseFromFile(ctx context.Context, file *os.File, fn func(api.ProgressResponse)) (*ordered.Map[*Layer, *llm.GGML], error) { + sr := io.NewSectionReader(file, 0, 512) + contentType, err := detectContentType(sr) + if err != nil { + return nil, err + } + + switch contentType { + case "gguf", "ggla": + // noop + case "application/zip": + return parseFromZipFile(ctx, file, fn) + default: + return nil, fmt.Errorf("unsupported content type: %s", contentType) + } + + layers := ordered.NewMap[*Layer, *llm.GGML]() + + stat, err := file.Stat() + if err != nil { + return nil, err + } + + var offset int64 + for offset < stat.Size() { + ggml, n, err := llm.DecodeGGML(file) + if errors.Is(err, io.EOF) { + break + } else if err != nil { + return nil, err + } + + mediatype := "application/vnd.ollama.image.model" + if ggml.Name() == "ggla" { + mediatype = "application/vnd.ollama.image.adapter" + } else if ggml.KV().Architecture() == "clip" { + mediatype = "application/vnd.ollama.image.projector" + } + + layer, err := NewLayer(io.NewSectionReader(file, offset, n), mediatype) + if err != nil { + return nil, err + } + + layers.Add(layer, ggml) + offset = n + } + + return layers, nil +} + +func detectContentType(r io.Reader) (string, error) { + var b bytes.Buffer + if _, err := io.Copy(&b, r); err != nil { + return "", err + } + + if contentType := llm.DetectGGMLType(b.Bytes()); contentType != "" { + return contentType, nil + } + + if contentType := http.DetectContentType(b.Bytes()); contentType != "application/octet-stream" { + return contentType, nil + } + + return "unknown", nil +} diff --git a/server/routes.go b/server/routes.go index e878598a..0a11909c 100644 --- a/server/routes.go +++ b/server/routes.go @@ -560,7 +560,7 @@ func (s *Server) CreateModelHandler(c *gin.Context) { ctx, cancel := context.WithCancel(c.Request.Context()) defer cancel() - if err := CreateModel(ctx, name.String(), filepath.Dir(req.Path), req.Quantization, modelfile, fn); err != nil { + if err := CreateModel(ctx, name.String(), filepath.Dir(req.Path), strings.ToUpper(req.Quantization), modelfile, fn); err != nil { ch <- gin.H{"error": err.Error()} } }() @@ -852,11 +852,6 @@ func (s *Server) CreateBlobHandler(c *gin.Context) { return } - if _, err := layer.Commit(); err != nil { - c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) - return - } - c.Status(http.StatusCreated) } diff --git a/types/ordered/map.go b/types/ordered/map.go new file mode 100644 index 00000000..076d657d --- /dev/null +++ b/types/ordered/map.go @@ -0,0 +1,32 @@ +package ordered + +type Map[K comparable, V any] struct { + s []K + m map[K]V +} + +func NewMap[K comparable, V any]() *Map[K, V] { + return &Map[K, V]{ + s: make([]K, 0), + m: make(map[K]V), + } +} + +type iter_Seq2[K, V any] func(func(K, V) bool) + +func (m *Map[K, V]) Items() iter_Seq2[K, V] { + return func(yield func(K, V) bool) { + for _, k := range m.s { + if !yield(k, m.m[k]) { + return + } + } + } +} + +func (m *Map[K, V]) Add(k K, v V) { + if _, ok := m.m[k]; !ok { + m.s = append(m.s, k) + m.m[k] = v + } +} From a7248f6ea8fc277b81916dffb238cdcb1f0d9c58 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Tue, 16 Apr 2024 15:37:28 -0700 Subject: [PATCH 2/9] update tests --- integration/utils_test.go | 2 +- server/routes_test.go | 38 +++++++++++++++++--------------------- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/integration/utils_test.go b/integration/utils_test.go index 3e91187a..e133e76d 100644 --- a/integration/utils_test.go +++ b/integration/utils_test.go @@ -107,7 +107,7 @@ func startServer(ctx context.Context, ollamaHost string) error { if tmp := os.Getenv("OLLAMA_HOST"); tmp != ollamaHost { slog.Info("setting env", "OLLAMA_HOST", ollamaHost) - os.Setenv("OLLAMA_HOST", ollamaHost) + t.Setenv("OLLAMA_HOST", ollamaHost) } slog.Info("starting server", "url", ollamaHost) diff --git a/server/routes_test.go b/server/routes_test.go index 27e53cbd..896dc27b 100644 --- a/server/routes_test.go +++ b/server/routes_test.go @@ -124,14 +124,12 @@ func Test_Routes(t *testing.T) { Method: http.MethodPost, Path: "/api/create", Setup: func(t *testing.T, req *http.Request) { - f, err := os.CreateTemp(t.TempDir(), "ollama-model") - assert.Nil(t, err) - defer f.Close() + fname := createTestFile(t, "ollama-model") stream := false createReq := api.CreateRequest{ Name: "t-bone", - Modelfile: fmt.Sprintf("FROM %s", f.Name()), + Modelfile: fmt.Sprintf("FROM %s", fname), Stream: &stream, } jsonData, err := json.Marshal(createReq) @@ -216,27 +214,25 @@ func Test_Routes(t *testing.T) { httpSrv := httptest.NewServer(router) t.Cleanup(httpSrv.Close) - workDir, err := os.MkdirTemp("", "ollama-test") - assert.Nil(t, err) - defer os.RemoveAll(workDir) - os.Setenv("OLLAMA_MODELS", workDir) + t.Setenv("OLLAMA_MODELS", t.TempDir()) for _, tc := range testCases { - t.Logf("Running Test: [%s]", tc.Name) - u := httpSrv.URL + tc.Path - req, err := http.NewRequestWithContext(context.TODO(), tc.Method, u, nil) - assert.Nil(t, err) + t.Run(tc.Name, func(t *testing.T) { + u := httpSrv.URL + tc.Path + req, err := http.NewRequestWithContext(context.TODO(), tc.Method, u, nil) + assert.Nil(t, err) - if tc.Setup != nil { - tc.Setup(t, req) - } + if tc.Setup != nil { + tc.Setup(t, req) + } - resp, err := httpSrv.Client().Do(req) - assert.Nil(t, err) - defer resp.Body.Close() + resp, err := httpSrv.Client().Do(req) + assert.Nil(t, err) + defer resp.Body.Close() - if tc.Expected != nil { - tc.Expected(t, resp) - } + if tc.Expected != nil { + tc.Expected(t, resp) + } + }) } } From 01811c176a43e2aa5bc288188f94949b8a0299b5 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Tue, 23 Apr 2024 15:18:45 -0700 Subject: [PATCH 3/9] comments --- llm/filetype.go | 146 ++++++++++++++++++++++++----------------------- llm/ggml.go | 2 +- llm/llm.go | 2 +- server/images.go | 10 ++-- server/model.go | 6 +- 5 files changed, 86 insertions(+), 80 deletions(-) diff --git a/llm/filetype.go b/llm/filetype.go index bae2f5d1..e5e9410d 100644 --- a/llm/filetype.go +++ b/llm/filetype.go @@ -2,137 +2,139 @@ package llm import "fmt" -type filetype uint32 +type fileType uint32 const ( - filetypeF32 filetype = iota - filetypeF16 - filetypeQ4_0 - filetypeQ4_1 - filetypeQ4_1_F16 - filetypeQ8_0 filetype = iota + 2 - filetypeQ5_0 - filetypeQ5_1 - filetypeQ2_K - filetypeQ3_K_S - filetypeQ3_K_M - filetypeQ3_K_L - filetypeQ4_K_S - filetypeQ4_K_M - filetypeQ5_K_S - filetypeQ5_K_M - filetypeQ6_K - filetypeIQ2_XXS - filetypeIQ2_XS - filetypeQ2_K_S - filetypeQ3_K_XS - filetypeIQ3_XXS + fileTypeF32 fileType = iota + fileTypeF16 + fileTypeQ4_0 + fileTypeQ4_1 + fileTypeQ4_1_F16 + fileTypeQ4_2 // unused + fileTypeQ4_3 // unused + fileTypeQ8_0 + fileTypeQ5_0 + fileTypeQ5_1 + fileTypeQ2_K + fileTypeQ3_K_S + fileTypeQ3_K_M + fileTypeQ3_K_L + fileTypeQ4_K_S + fileTypeQ4_K_M + fileTypeQ5_K_S + fileTypeQ5_K_M + fileTypeQ6_K + fileTypeIQ2_XXS + fileTypeIQ2_XS + fileTypeQ2_K_S + fileTypeQ3_K_XS + fileTypeIQ3_XXS - filetypeUnknown + fileTypeUnknown ) -func ParseFileType(s string) (filetype, error) { +func ParseFileType(s string) (fileType, error) { switch s { case "F32": - return filetypeF32, nil + return fileTypeF32, nil case "F16": - return filetypeF16, nil + return fileTypeF16, nil case "Q4_0": - return filetypeQ4_0, nil + return fileTypeQ4_0, nil case "Q4_1": - return filetypeQ4_1, nil + return fileTypeQ4_1, nil case "Q4_1_F16": - return filetypeQ4_1_F16, nil + return fileTypeQ4_1_F16, nil case "Q8_0": - return filetypeQ8_0, nil + return fileTypeQ8_0, nil case "Q5_0": - return filetypeQ5_0, nil + return fileTypeQ5_0, nil case "Q5_1": - return filetypeQ5_1, nil + return fileTypeQ5_1, nil case "Q2_K": - return filetypeQ2_K, nil + return fileTypeQ2_K, nil case "Q3_K_S": - return filetypeQ3_K_S, nil + return fileTypeQ3_K_S, nil case "Q3_K_M": - return filetypeQ3_K_M, nil + return fileTypeQ3_K_M, nil case "Q3_K_L": - return filetypeQ3_K_L, nil + return fileTypeQ3_K_L, nil case "Q4_K_S": - return filetypeQ4_K_S, nil + return fileTypeQ4_K_S, nil case "Q4_K_M": - return filetypeQ4_K_M, nil + return fileTypeQ4_K_M, nil case "Q5_K_S": - return filetypeQ5_K_S, nil + return fileTypeQ5_K_S, nil case "Q5_K_M": - return filetypeQ5_K_M, nil + return fileTypeQ5_K_M, nil case "Q6_K": - return filetypeQ6_K, nil + return fileTypeQ6_K, nil case "IQ2_XXS": - return filetypeIQ2_XXS, nil + return fileTypeIQ2_XXS, nil case "IQ2_XS": - return filetypeIQ2_XS, nil + return fileTypeIQ2_XS, nil case "Q2_K_S": - return filetypeQ2_K_S, nil + return fileTypeQ2_K_S, nil case "Q3_K_XS": - return filetypeQ3_K_XS, nil + return fileTypeQ3_K_XS, nil case "IQ3_XXS": - return filetypeIQ3_XXS, nil + return fileTypeIQ3_XXS, nil default: - return filetypeUnknown, fmt.Errorf("unknown filetype: %s", s) + return fileTypeUnknown, fmt.Errorf("unknown fileType: %s", s) } } -func (t filetype) String() string { +func (t fileType) String() string { switch t { - case filetypeF32: + case fileTypeF32: return "F32" - case filetypeF16: + case fileTypeF16: return "F16" - case filetypeQ4_0: + case fileTypeQ4_0: return "Q4_0" - case filetypeQ4_1: + case fileTypeQ4_1: return "Q4_1" - case filetypeQ4_1_F16: + case fileTypeQ4_1_F16: return "Q4_1_F16" - case filetypeQ8_0: + case fileTypeQ8_0: return "Q8_0" - case filetypeQ5_0: + case fileTypeQ5_0: return "Q5_0" - case filetypeQ5_1: + case fileTypeQ5_1: return "Q5_1" - case filetypeQ2_K: + case fileTypeQ2_K: return "Q2_K" - case filetypeQ3_K_S: + case fileTypeQ3_K_S: return "Q3_K_S" - case filetypeQ3_K_M: + case fileTypeQ3_K_M: return "Q3_K_M" - case filetypeQ3_K_L: + case fileTypeQ3_K_L: return "Q3_K_L" - case filetypeQ4_K_S: + case fileTypeQ4_K_S: return "Q4_K_S" - case filetypeQ4_K_M: + case fileTypeQ4_K_M: return "Q4_K_M" - case filetypeQ5_K_S: + case fileTypeQ5_K_S: return "Q5_K_S" - case filetypeQ5_K_M: + case fileTypeQ5_K_M: return "Q5_K_M" - case filetypeQ6_K: + case fileTypeQ6_K: return "Q6_K" - case filetypeIQ2_XXS: + case fileTypeIQ2_XXS: return "IQ2_XXS" - case filetypeIQ2_XS: + case fileTypeIQ2_XS: return "IQ2_XS" - case filetypeQ2_K_S: + case fileTypeQ2_K_S: return "Q2_K_S" - case filetypeQ3_K_XS: + case fileTypeQ3_K_XS: return "Q3_K_XS" - case filetypeIQ3_XXS: + case fileTypeIQ3_XXS: return "IQ3_XXS" default: return "unknown" } } -func (t filetype) Value() uint32 { +func (t fileType) Value() uint32 { return uint32(t) } diff --git a/llm/ggml.go b/llm/ggml.go index a7dbfeb3..a83bba8f 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -47,7 +47,7 @@ func (kv KV) ParameterCount() uint64 { func (kv KV) FileType() string { if u64 := kv.u64("general.file_type"); u64 > 0 { - return filetype(uint32(u64)).String() + return fileType(uint32(u64)).String() } return "unknown" diff --git a/llm/llm.go b/llm/llm.go index 0e96511f..2a0c4b91 100644 --- a/llm/llm.go +++ b/llm/llm.go @@ -20,7 +20,7 @@ func SystemInfo() string { return C.GoString(C.llama_print_system_info()) } -func Quantize(infile, outfile string, ftype filetype) error { +func Quantize(infile, outfile string, ftype fileType) error { cinfile := C.CString(infile) defer C.free(unsafe.Pointer(cinfile)) diff --git a/server/images.go b/server/images.go index 1de10929..998d1335 100644 --- a/server/images.go +++ b/server/images.go @@ -344,9 +344,9 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, m switch c.Name { case "model", "adapter": - var layers2 *ordered.Map[*Layer, *llm.GGML] + var baseLayers *ordered.Map[*Layer, *llm.GGML] if name := model.ParseName(c.Args, ""); name.IsValid() { - layers2, err = parseFromModel(ctx, name, fn) + baseLayers, err = parseFromModel(ctx, name, fn) if err != nil { return err } @@ -362,14 +362,14 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, m } defer blob.Close() - layers2, err = parseFromFile(ctx, blob, fn) + baseLayers, err = parseFromFile(ctx, blob, fn) if err != nil { return err } } else if file, err := os.Open(realpath(modelFileDir, c.Args)); err == nil { defer file.Close() - layers2, err = parseFromFile(ctx, file, fn) + baseLayers, err = parseFromFile(ctx, file, fn) if err != nil { return err } @@ -381,7 +381,7 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, m var tempfiles []*os.File // TODO(mxyng): replace with rangefunc - layers2.Items()(func(layer *Layer, ggml *llm.GGML) bool { + baseLayers.Items()(func(layer *Layer, ggml *llm.GGML) bool { if quantization != "" && ggml != nil && ggml.Name() == "gguf" { ftype, err := llm.ParseFileType(quantization) if err != nil { diff --git a/server/model.go b/server/model.go index 2d7797f0..cf036052 100644 --- a/server/model.go +++ b/server/model.go @@ -27,7 +27,11 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe return nil, err } - return parseFromModel(ctx, name, fn) + modelpath = ParseModelPath(name.DisplayLongest()) + manifest, _, err = GetManifest(modelpath) + if err != nil { + return nil, err + } case err != nil: return nil, err } From 7ffe45734d1e2fada01837383afc20053a5b4c0f Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Wed, 24 Apr 2024 15:06:47 -0700 Subject: [PATCH 4/9] rebase --- convert/mixtral.go | 17 +++-------------- server/images.go | 2 +- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/convert/mixtral.go b/convert/mixtral.go index e31e84af..940df55d 100644 --- a/convert/mixtral.go +++ b/convert/mixtral.go @@ -1,7 +1,7 @@ package convert import ( - "os" + "io" "regexp" "github.com/ollama/ollama/llm" @@ -47,7 +47,7 @@ func (m *MixtralModel) LoadVocab() error { return nil } -func (m *MixtralModel) WriteGGUF() (string, error) { +func (m *MixtralModel) WriteGGUF(ws io.WriteSeeker) error { kv := llm.KV{ "general.architecture": "llama", "general.name": m.Name, @@ -81,16 +81,5 @@ func (m *MixtralModel) WriteGGUF() (string, error) { "tokenizer.ggml.add_eos_token": false, } - f, err := os.CreateTemp("", "ollama-gguf") - if err != nil { - return "", err - } - defer f.Close() - - mod := llm.NewGGUFV3(m.Params.ByteOrder) - if err := mod.Encode(f, kv, m.Tensors); err != nil { - return "", err - } - - return f.Name(), nil + return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors) } diff --git a/server/images.go b/server/images.go index 998d1335..4d4b47c4 100644 --- a/server/images.go +++ b/server/images.go @@ -345,7 +345,7 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, m switch c.Name { case "model", "adapter": var baseLayers *ordered.Map[*Layer, *llm.GGML] - if name := model.ParseName(c.Args, ""); name.IsValid() { + if name := model.ParseName(c.Args); name.IsValid() { baseLayers, err = parseFromModel(ctx, name, fn) if err != nil { return err From 4d0d0fa3839e8e0fed8b210a88290b5f15e04baa Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Thu, 25 Apr 2024 08:53:08 -0700 Subject: [PATCH 5/9] no iterator --- server/images.go | 62 +++++++++++++++----------------------------- server/model.go | 25 +++++++++--------- types/ordered/map.go | 32 ----------------------- 3 files changed, 34 insertions(+), 85 deletions(-) delete mode 100644 types/ordered/map.go diff --git a/server/images.go b/server/images.go index 4d4b47c4..5da47b79 100644 --- a/server/images.go +++ b/server/images.go @@ -30,7 +30,6 @@ import ( "github.com/ollama/ollama/server/envconfig" "github.com/ollama/ollama/types/errtypes" "github.com/ollama/ollama/types/model" - "github.com/ollama/ollama/types/ordered" "github.com/ollama/ollama/version" ) @@ -344,7 +343,7 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, m switch c.Name { case "model", "adapter": - var baseLayers *ordered.Map[*Layer, *llm.GGML] + var baseLayers []*layerWithGGML if name := model.ParseName(c.Args); name.IsValid() { baseLayers, err = parseFromModel(ctx, name, fn) if err != nil { @@ -377,70 +376,51 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, m return fmt.Errorf("invalid model reference: %s", c.Args) } - var err2 error - var tempfiles []*os.File - - // TODO(mxyng): replace with rangefunc - baseLayers.Items()(func(layer *Layer, ggml *llm.GGML) bool { - if quantization != "" && ggml != nil && ggml.Name() == "gguf" { + for _, baseLayer := range baseLayers { + if quantization != "" && baseLayer.GGML != nil && baseLayer.GGML.Name() == "gguf" { ftype, err := llm.ParseFileType(quantization) if err != nil { - err2 = err - return false + return err } - filetype := ggml.KV().FileType() + filetype := baseLayer.GGML.KV().FileType() if !slices.Contains([]string{"F16", "F32"}, filetype) { - err2 = errors.New("quantization is only supported for F16 and F32 models") - return false + return errors.New("quantization is only supported for F16 and F32 models") } fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", filetype, quantization)}) - blob, err := GetBlobsPath(layer.Digest) + blob, err := GetBlobsPath(baseLayer.Digest) if err != nil { - err2 = err - return false + return err } temp, err := os.CreateTemp(filepath.Dir(blob), quantization) if err != nil { - err2 = err - return false + return err } - tempfiles = append(tempfiles, temp) + defer temp.Close() + defer os.Remove(temp.Name()) if err := llm.Quantize(blob, temp.Name(), ftype); err != nil { - err2 = err - return false + return err } - layer, err = NewLayer(temp, layer.MediaType) + baseLayer.Layer, err = NewLayer(temp, baseLayer.Layer.MediaType) if err != nil { - err2 = err - return false + return err } } - if ggml != nil { - config.ModelFormat = cmp.Or(config.ModelFormat, ggml.Name()) - config.ModelFamily = cmp.Or(config.ModelFamily, ggml.KV().Architecture()) - config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(ggml.KV().ParameterCount())) - config.FileType = cmp.Or(config.FileType, ggml.KV().FileType()) - config.ModelFamilies = append(config.ModelFamilies, ggml.KV().Architecture()) + if baseLayer.GGML != nil { + config.ModelFormat = cmp.Or(config.ModelFormat, baseLayer.GGML.Name()) + config.ModelFamily = cmp.Or(config.ModelFamily, baseLayer.GGML.KV().Architecture()) + config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(baseLayer.GGML.KV().ParameterCount())) + config.FileType = cmp.Or(config.FileType, baseLayer.GGML.KV().FileType()) + config.ModelFamilies = append(config.ModelFamilies, baseLayer.GGML.KV().Architecture()) } - layers = append(layers, layer) - return true - }) - - for _, tempfile := range tempfiles { - defer tempfile.Close() - defer os.Remove(tempfile.Name()) - } - - if err2 != nil { - return err2 + layers = append(layers, baseLayer.Layer) } case "license", "template", "system": blob := strings.NewReader(c.Args) diff --git a/server/model.go b/server/model.go index cf036052..b27c7083 100644 --- a/server/model.go +++ b/server/model.go @@ -15,10 +15,14 @@ import ( "github.com/ollama/ollama/convert" "github.com/ollama/ollama/llm" "github.com/ollama/ollama/types/model" - "github.com/ollama/ollama/types/ordered" ) -func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressResponse)) (*ordered.Map[*Layer, *llm.GGML], error) { +type layerWithGGML struct { + *Layer + *llm.GGML +} + +func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) { modelpath := ParseModelPath(name.DisplayLongest()) manifest, _, err := GetManifest(modelpath) switch { @@ -36,7 +40,6 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe return nil, err } - layers := ordered.NewMap[*Layer, *llm.GGML]() for _, layer := range manifest.Layers { layer, err := NewLayerFromLayer(layer.Digest, layer.MediaType, modelpath.GetShortTagname()) if err != nil { @@ -62,9 +65,10 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe if err != nil { return nil, err } - layers.Add(layer, ggml) + + layers = append(layers, &layerWithGGML{layer, ggml}) default: - layers.Add(layer, nil) + layers = append(layers, &layerWithGGML{layer, nil}) } } @@ -72,7 +76,7 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe return layers, nil } -func parseFromZipFile(_ context.Context, file *os.File, fn func(api.ProgressResponse)) (*ordered.Map[*Layer, *llm.GGML], error) { +func parseFromZipFile(_ context.Context, file *os.File, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) { stat, err := file.Stat() if err != nil { return nil, err @@ -184,12 +188,11 @@ func parseFromZipFile(_ context.Context, file *os.File, fn func(api.ProgressResp return nil, err } - layers := ordered.NewMap[*Layer, *llm.GGML]() - layers.Add(layer, ggml) + layers = append(layers, &layerWithGGML{layer, ggml}) return layers, nil } -func parseFromFile(ctx context.Context, file *os.File, fn func(api.ProgressResponse)) (*ordered.Map[*Layer, *llm.GGML], error) { +func parseFromFile(ctx context.Context, file *os.File, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) { sr := io.NewSectionReader(file, 0, 512) contentType, err := detectContentType(sr) if err != nil { @@ -205,8 +208,6 @@ func parseFromFile(ctx context.Context, file *os.File, fn func(api.ProgressRespo return nil, fmt.Errorf("unsupported content type: %s", contentType) } - layers := ordered.NewMap[*Layer, *llm.GGML]() - stat, err := file.Stat() if err != nil { return nil, err @@ -233,7 +234,7 @@ func parseFromFile(ctx context.Context, file *os.File, fn func(api.ProgressRespo return nil, err } - layers.Add(layer, ggml) + layers = append(layers, &layerWithGGML{layer, ggml}) offset = n } diff --git a/types/ordered/map.go b/types/ordered/map.go deleted file mode 100644 index 076d657d..00000000 --- a/types/ordered/map.go +++ /dev/null @@ -1,32 +0,0 @@ -package ordered - -type Map[K comparable, V any] struct { - s []K - m map[K]V -} - -func NewMap[K comparable, V any]() *Map[K, V] { - return &Map[K, V]{ - s: make([]K, 0), - m: make(map[K]V), - } -} - -type iter_Seq2[K, V any] func(func(K, V) bool) - -func (m *Map[K, V]) Items() iter_Seq2[K, V] { - return func(yield func(K, V) bool) { - for _, k := range m.s { - if !yield(k, m.m[k]) { - return - } - } - } -} - -func (m *Map[K, V]) Add(k K, v V) { - if _, ok := m.m[k]; !ok { - m.s = append(m.s, k) - m.m[k] = v - } -} From d2454603626510ab1366a7f077452fb49c4ca47c Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Thu, 25 Apr 2024 09:01:20 -0700 Subject: [PATCH 6/9] only quantize language models --- server/images.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/server/images.go b/server/images.go index 5da47b79..2817b1d3 100644 --- a/server/images.go +++ b/server/images.go @@ -377,7 +377,10 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, m } for _, baseLayer := range baseLayers { - if quantization != "" && baseLayer.GGML != nil && baseLayer.GGML.Name() == "gguf" { + if quantization != "" && + baseLayer.MediaType == "application/vnd.ollama.image.model" && + baseLayer.GGML != nil && + baseLayer.GGML.Name() == "gguf" { ftype, err := llm.ParseFileType(quantization) if err != nil { return err @@ -582,7 +585,6 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, m return nil } - func CopyModel(src, dst model.Name) error { if !dst.IsFullyQualified() { return model.Unqualified(dst) From f5e8b207fb87582ecb16edba6ac681c148ad0e15 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Wed, 1 May 2024 10:34:39 -0700 Subject: [PATCH 7/9] s/DisplayLongest/String/ --- server/model.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/model.go b/server/model.go index b27c7083..09fa2651 100644 --- a/server/model.go +++ b/server/model.go @@ -23,15 +23,15 @@ type layerWithGGML struct { } func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressResponse)) (layers []*layerWithGGML, err error) { - modelpath := ParseModelPath(name.DisplayLongest()) + modelpath := ParseModelPath(name.String()) manifest, _, err := GetManifest(modelpath) switch { case errors.Is(err, os.ErrNotExist): - if err := PullModel(ctx, name.DisplayLongest(), ®istryOptions{}, fn); err != nil { + if err := PullModel(ctx, name.String(), ®istryOptions{}, fn); err != nil { return nil, err } - modelpath = ParseModelPath(name.DisplayLongest()) + modelpath = ParseModelPath(name.String()) manifest, _, err = GetManifest(modelpath) if err != nil { return nil, err From 6694be5e5027b2f27f6eeeb51a5284a2b18129ad Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Mon, 6 May 2024 14:00:50 -0700 Subject: [PATCH 8/9] convert/llama: use WriteSeeker --- convert/llama.go | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/convert/llama.go b/convert/llama.go index 6da46ec9..fb576e2e 100644 --- a/convert/llama.go +++ b/convert/llama.go @@ -5,7 +5,6 @@ import ( "fmt" "io" "log/slog" - "os" "regexp" "strings" @@ -159,11 +158,5 @@ func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error { "tokenizer.ggml.add_eos_token": false, } - f, err := os.CreateTemp("", "ollama-gguf") - if err != nil { - return err - } - defer f.Close() - - return llm.NewGGUFV3(m.Params.ByteOrder).Encode(f, kv, m.Tensors) + return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors) } From b2f00aa9771d44a1423a2e2f23c5218f1bbc834d Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Mon, 6 May 2024 15:27:19 -0700 Subject: [PATCH 9/9] close zip files --- server/model.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/model.go b/server/model.go index 09fa2651..eea5d13a 100644 --- a/server/model.go +++ b/server/model.go @@ -100,11 +100,13 @@ func parseFromZipFile(_ context.Context, file *os.File, fn func(api.ProgressResp if err != nil { return nil, err } + defer outfile.Close() infile, err := f.Open() if err != nil { return nil, err } + defer infile.Close() if _, err = io.Copy(outfile, infile); err != nil { return nil, err