From d88582dffd4a4ff0dcf7f347091f023945f9a26f Mon Sep 17 00:00:00 2001 From: Patrick Devine Date: Thu, 18 Apr 2024 16:00:20 -0700 Subject: [PATCH 1/9] some changes for llama3 --- convert/convert.go | 3 ++- convert/torch.go | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/convert/convert.go b/convert/convert.go index f4210e50..dbc26da1 100644 --- a/convert/convert.go +++ b/convert/convert.go @@ -77,7 +77,8 @@ func GetModelFormat(dirname string) (ModelFormat, error) { slog.Debug(fmt.Sprintf("file = %s", fn)) if strings.HasSuffix(fn, ".safetensors") { return &SafetensorFormat{}, nil - } else if strings.HasSuffix(fn, ".bin") { + //} else if strings.HasSuffix(fn, ".bin") { + } else if strings.HasSuffix(fn, ".pth") { slog.Debug("model is torch") return &TorchFormat{}, nil } diff --git a/convert/torch.go b/convert/torch.go index 92c58872..0ad10c0e 100644 --- a/convert/torch.go +++ b/convert/torch.go @@ -33,7 +33,8 @@ type TorchFormat struct{} func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) { slog.Debug("getting torch tensors") - files, err := filepath.Glob(filepath.Join(dirpath, "pytorch_model-*.bin")) + //files, err := filepath.Glob(filepath.Join(dirpath, "pytorch_model-*.bin")) + files, err := filepath.Glob(filepath.Join(dirpath, "consolidatedr.*.pth")) if err != nil { slog.Error("didn't find any torch files") return nil, err @@ -120,7 +121,7 @@ func getAltParams(dirpath string) (*Params, error) { AttentionHeads int `json:"n_heads"` KeyValHeads int `json:"n_kv_heads"` HiddenLayers int `json:"n_layers"` - RopeTheta int `json:"rope_theta"` + RopeTheta float64 `json:"rope_theta"` NormEPS float64 `json:"norm_eps"` } @@ -133,6 +134,7 @@ func getAltParams(dirpath string) (*Params, error) { } params := &Params{ + Architectures: []string{"LlamaForCausalLM"}, HiddenSize: tparams.HiddenSize, AttentionHeads: tparams.AttentionHeads, KeyValHeads: tparams.KeyValHeads, From 4730762e5c9453f304aa456b549530e165ff1936 Mon Sep 17 00:00:00 2001 From: Patrick Devine Date: Wed, 24 Apr 2024 18:32:01 -0700 Subject: [PATCH 2/9] add safetensors version --- convert/llama.go | 15 +++++++++++---- convert/safetensors.go | 9 +++++++++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/convert/llama.go b/convert/llama.go index fb576e2e..5dfb8d7d 100644 --- a/convert/llama.go +++ b/convert/llama.go @@ -20,7 +20,7 @@ type LlamaModel struct { ModelData } -func llamaLayerHandler(w io.Writer, r torchWriterTo) error { +func llamaTorchLayerHandler(w io.Writer, r torchWriterTo) error { slog.Debug(fmt.Sprintf("repacking layer '%s'", r.t.Name)) data := r.storage.(*pytorch.HalfStorage).Data @@ -105,9 +105,16 @@ func (m *LlamaModel) GetTensors() error { matches := re.FindAllStringSubmatch(l.Name, -1) if len(matches) > 0 { slog.Debug(fmt.Sprintf("setting handler for: %s", l.Name)) - wt := l.WriterTo.(torchWriterTo) - wt.handler = llamaLayerHandler - l.WriterTo = wt + switch l.WriterTo.(type) { + case torchWriterTo: + wt := l.WriterTo.(torchWriterTo) + wt.handler = llamaTorchLayerHandler + l.WriterTo = wt + case safetensorWriterTo: + wt := l.WriterTo.(safetensorWriterTo) + wt.handler = mistralLayerHandler + l.WriterTo = wt + } } m.Tensors = append(m.Tensors, l) } diff --git a/convert/safetensors.go b/convert/safetensors.go index 69424c4d..64aaf866 100644 --- a/convert/safetensors.go +++ b/convert/safetensors.go @@ -281,6 +281,15 @@ func (m *SafetensorFormat) GetModelArch(name, dirPath string, params *Params) (M return nil, fmt.Errorf("No architecture specified to convert") case 1: switch params.Architectures[0] { + case "LlamaForCausalLM": + return &LlamaModel{ + ModelData{ + Name: name, + Path: dirPath, + Params: params, + Format: m, + }, + }, nil case "MistralForCausalLM": return &MistralModel{ ModelData{ From c8cf0d94edeae0c71e3a0877895d9519b5d4d5e3 Mon Sep 17 00:00:00 2001 From: Patrick Devine Date: Sun, 28 Apr 2024 10:36:38 -0700 Subject: [PATCH 3/9] llama3 conversion --- convert/convert.go | 1 + convert/llama.go | 70 +++++++++++++++++++++++++++++++++++----------- llm/gguf.go | 1 + 3 files changed, 56 insertions(+), 16 deletions(-) diff --git a/convert/convert.go b/convert/convert.go index dbc26da1..899c8c44 100644 --- a/convert/convert.go +++ b/convert/convert.go @@ -93,6 +93,7 @@ type Vocab struct { Tokens []string Scores []float32 Types []int32 + Merges []string } func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) { diff --git a/convert/llama.go b/convert/llama.go index 5dfb8d7d..8cb162e7 100644 --- a/convert/llama.go +++ b/convert/llama.go @@ -5,6 +5,8 @@ import ( "fmt" "io" "log/slog" + "os" + "path/filepath" "regexp" "strings" @@ -105,12 +107,12 @@ func (m *LlamaModel) GetTensors() error { matches := re.FindAllStringSubmatch(l.Name, -1) if len(matches) > 0 { slog.Debug(fmt.Sprintf("setting handler for: %s", l.Name)) - switch l.WriterTo.(type) { - case torchWriterTo: + switch m.Format.(type) { + case *TorchFormat: wt := l.WriterTo.(torchWriterTo) wt.handler = llamaTorchLayerHandler l.WriterTo = wt - case safetensorWriterTo: + case *SafetensorFormat: wt := l.WriterTo.(safetensorWriterTo) wt.handler = mistralLayerHandler l.WriterTo = wt @@ -123,18 +125,46 @@ func (m *LlamaModel) GetTensors() error { } func (m *LlamaModel) LoadVocab() error { - var v *Vocab - var err error - - slog.Debug("loading vocab") - v, err = LoadSentencePieceTokens(m.Path, m.Params) - if err != nil { - return err + v := &Vocab{ + Tokens: []string{}, + Types: []int32{}, + Merges: []string{}, } - slog.Debug("vocab loaded") + tokpath := filepath.Join(m.Path, "tokenizer.json") + slog.Debug(fmt.Sprintf("looking for %s", tokpath)) + if _, err := os.Stat(tokpath); !os.IsNotExist(err) { + t, err := newTokenizer(tokpath) + if err != nil { + return err + } + for _, tok := range t.Model.Tokens { + v.Tokens = append(v.Tokens, tok.Content) + var tokType int32 + switch { + case tok.Special: + tokType = 3 + case tok.UserDefined: + tokType = 4 + default: + tokType = 1 + } + v.Types = append(v.Types, tokType) + } + v.Merges = t.Model.Merges + } else { + slog.Debug("loading sentence piece vocab") + v, err = LoadSentencePieceTokens(m.Path, m.Params) + if err != nil { + return err + } + + slog.Debug("vocab loaded") + + } m.Vocab = v + return nil } @@ -147,22 +177,30 @@ func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error { "llama.embedding_length": uint32(m.Params.HiddenSize), "llama.block_count": uint32(m.Params.HiddenLayers), "llama.feed_forward_length": uint32(m.Params.IntermediateSize), + "llama.rope.freq_base": float32(m.Params.RopeFrequencyBase), "llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads), "llama.attention.head_count": uint32(m.Params.AttentionHeads), "llama.attention.head_count_kv": uint32(m.Params.KeyValHeads), "llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS), - "general.file_type": uint32(1), - "tokenizer.ggml.model": "llama", + //"general.file_type": uint32(1), + "general.file_type": uint32(2), + //"tokenizer.ggml.model": "llama", + "tokenizer.ggml.model": "gpt2", "tokenizer.ggml.tokens": m.Vocab.Tokens, - "tokenizer.ggml.scores": m.Vocab.Scores, "tokenizer.ggml.token_type": m.Vocab.Types, "tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID), "tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID), "tokenizer.ggml.unknown_token_id": uint32(0), - "tokenizer.ggml.add_bos_token": true, - "tokenizer.ggml.add_eos_token": false, + //"tokenizer.ggml.add_bos_token": true, + //"tokenizer.ggml.add_eos_token": false, + } + + if len(m.Vocab.Merges) > 0 { + kv["tokenizer.ggml.merges"] = m.Vocab.Merges + } else { + kv["tokenizer.ggml.scores"] = m.Vocab.Scores } return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors) diff --git a/llm/gguf.go b/llm/gguf.go index 5f6e8004..c3cc3d41 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -483,6 +483,7 @@ var ggufKVOrder = map[string][]string{ "tokenizer.ggml.model", "tokenizer.ggml.tokens", "tokenizer.ggml.scores", + "tokenizer.ggml.merges", "tokenizer.ggml.token_type", "tokenizer.ggml.bos_token_id", "tokenizer.ggml.eos_token_id", From d355d2020fcfc54c375eb697b7873742c3851881 Mon Sep 17 00:00:00 2001 From: Patrick Devine Date: Wed, 8 May 2024 16:07:46 -0700 Subject: [PATCH 4/9] add fixes for llama --- cmd/cmd.go | 2 +- convert/convert.go | 4 +--- convert/llama.go | 33 +++++++++++++++++++-------------- convert/safetensors.go | 2 ++ convert/torch.go | 38 ++++++++++++++++++++++++++++++++------ 5 files changed, 55 insertions(+), 24 deletions(-) diff --git a/cmd/cmd.go b/cmd/cmd.go index f79f8b97..5d919d9a 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -208,7 +208,7 @@ func tempZipFiles(path string) (string, error) { // pytorch files might also be unresolved git lfs references; skip if they are // covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin files = append(files, pt...) - } else if pt, _ := glob(filepath.Join(path, "consolidated*.pth"), "application/octet-stream"); len(pt) > 0 { + } else if pt, _ := glob(filepath.Join(path, "consolidated*.pth"), "application/zip"); len(pt) > 0 { // pytorch files might also be unresolved git lfs references; skip if they are // covers consolidated.x.pth, consolidated.pth files = append(files, pt...) diff --git a/convert/convert.go b/convert/convert.go index 899c8c44..9a05fb52 100644 --- a/convert/convert.go +++ b/convert/convert.go @@ -74,11 +74,9 @@ func GetModelFormat(dirname string) (ModelFormat, error) { } for _, fn := range files { - slog.Debug(fmt.Sprintf("file = %s", fn)) if strings.HasSuffix(fn, ".safetensors") { return &SafetensorFormat{}, nil - //} else if strings.HasSuffix(fn, ".bin") { - } else if strings.HasSuffix(fn, ".pth") { + } else if strings.HasSuffix(fn, ".bin") || strings.HasSuffix(fn, ".pth") { slog.Debug("model is torch") return &TorchFormat{}, nil } diff --git a/convert/llama.go b/convert/llama.go index 8cb162e7..9fdcd02b 100644 --- a/convert/llama.go +++ b/convert/llama.go @@ -23,12 +23,24 @@ type LlamaModel struct { } func llamaTorchLayerHandler(w io.Writer, r torchWriterTo) error { - slog.Debug(fmt.Sprintf("repacking layer '%s'", r.t.Name)) - data := r.storage.(*pytorch.HalfStorage).Data - tData := make([]uint16, len(data)) - for cnt, v := range data { - tData[cnt] = uint16(float16.Fromfloat32(v)) + var tData []uint16 + switch r.storage.(type) { + case *pytorch.HalfStorage: + data := r.storage.(*pytorch.HalfStorage).Data + tData = make([]uint16, len(data)) + for cnt, v := range data { + tData[cnt] = uint16(float16.Fromfloat32(v)) + } + case *pytorch.BFloat16Storage: + data := r.storage.(*pytorch.BFloat16Storage).Data + tData = make([]uint16, len(data)) + + for cnt, v := range data { + tData[cnt] = uint16(float16.Fromfloat32(v)) + } + default: + return fmt.Errorf("unknown storage type for torch") } var err error @@ -44,8 +56,6 @@ func llamaTorchLayerHandler(w io.Writer, r torchWriterTo) error { return fmt.Errorf("unknown layer type") } - slog.Debug(fmt.Sprintf("heads = %d", heads)) - tData, err = llamaRepack(tData, int(heads), r.t.Shape) if err != nil { return err @@ -106,7 +116,6 @@ func (m *LlamaModel) GetTensors() error { for _, l := range t { matches := re.FindAllStringSubmatch(l.Name, -1) if len(matches) > 0 { - slog.Debug(fmt.Sprintf("setting handler for: %s", l.Name)) switch m.Format.(type) { case *TorchFormat: wt := l.WriterTo.(torchWriterTo) @@ -182,10 +191,8 @@ func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error { "llama.attention.head_count": uint32(m.Params.AttentionHeads), "llama.attention.head_count_kv": uint32(m.Params.KeyValHeads), "llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS), - //"general.file_type": uint32(1), - "general.file_type": uint32(2), - //"tokenizer.ggml.model": "llama", - "tokenizer.ggml.model": "gpt2", + "general.file_type": uint32(2), + "tokenizer.ggml.model": "gpt2", "tokenizer.ggml.tokens": m.Vocab.Tokens, "tokenizer.ggml.token_type": m.Vocab.Types, @@ -193,8 +200,6 @@ func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error { "tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID), "tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID), "tokenizer.ggml.unknown_token_id": uint32(0), - //"tokenizer.ggml.add_bos_token": true, - //"tokenizer.ggml.add_eos_token": false, } if len(m.Vocab.Merges) > 0 { diff --git a/convert/safetensors.go b/convert/safetensors.go index 64aaf866..b52a048d 100644 --- a/convert/safetensors.go +++ b/convert/safetensors.go @@ -131,6 +131,8 @@ func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params) shape[i] = uint64(data.Shape[i]) } + slog.Debug(fmt.Sprintf("'%45s': '%30s' %10d [%#v]", k, ggufName, size, data.Shape)) + t := llm.Tensor{ Name: ggufName, Kind: kind, diff --git a/convert/torch.go b/convert/torch.go index 0ad10c0e..803827ba 100644 --- a/convert/torch.go +++ b/convert/torch.go @@ -33,11 +33,15 @@ type TorchFormat struct{} func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) { slog.Debug("getting torch tensors") - //files, err := filepath.Glob(filepath.Join(dirpath, "pytorch_model-*.bin")) - files, err := filepath.Glob(filepath.Join(dirpath, "consolidatedr.*.pth")) + var files []string + var err error + files, err = filepath.Glob(filepath.Join(dirpath, "consolidated.*.pth")) if err != nil { - slog.Error("didn't find any torch files") - return nil, err + files, err = filepath.Glob(filepath.Join(dirpath, "pytorch_model-*.bin")) + if err != nil { + slog.Error("didn't find any torch files") + return nil, err + } } var offset uint64 @@ -78,7 +82,7 @@ func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, slog.Error(err.Error()) return nil, err } - slog.Debug(fmt.Sprintf("finding name for '%s' -> '%s'", k.(string), ggufName)) + slog.Debug(fmt.Sprintf("'%35s': '%30s' %10d [%#v]", k.(string), ggufName, size, tshape)) shape := []uint64{0, 0, 0, 0} for i := range tshape { @@ -236,7 +240,7 @@ func (r torchWriterTo) WriteTo(w io.Writer) (n int64, err error) { return 0, r.handler(w, r) } - switch r.storage.(type) { + switch storage := r.storage.(type) { case *pytorch.FloatStorage: slog.Warn(fmt.Sprintf("unexpected storage found for layer '%s'; skipping", r.t.Name)) return 0, nil @@ -259,6 +263,28 @@ func (r torchWriterTo) WriteTo(w io.Writer) (n int64, err error) { return 0, err } } + case *pytorch.BFloat16Storage: + data := r.storage.(*pytorch.BFloat16Storage).Data + switch r.t.Kind { + case 0: + if err = binary.Write(w, r.bo, data); err != nil { + return 0, err + } + case 1: + tData := make([]uint16, len(data)) + + for cnt, v := range data { + tData[cnt] = uint16(float16.Fromfloat32(v)) + } + + if err = binary.Write(w, r.bo, tData); err != nil { + return 0, err + } + default: + return 0, fmt.Errorf("unknown storage kind: %d", r.t.Kind) + } + default: + return 0, fmt.Errorf("unknown storage type: %T", storage) } return 0, nil From 2d315ba9a984f8db8f108b967b3af6fa4aa67669 Mon Sep 17 00:00:00 2001 From: Patrick Devine Date: Wed, 8 May 2024 16:56:18 -0700 Subject: [PATCH 5/9] add missing file --- convert/tokenizer.go | 72 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 convert/tokenizer.go diff --git a/convert/tokenizer.go b/convert/tokenizer.go new file mode 100644 index 00000000..a7da81e6 --- /dev/null +++ b/convert/tokenizer.go @@ -0,0 +1,72 @@ +package convert + +import ( + "encoding/json" + "io/ioutil" + "os" +) + +type Tokenizer struct { + Version string `json:"version"` + AddedTokens []Token `json:"added_tokens"` + Model TokenizerModel `json:"model"` +} + +type TokenizerModel struct { + Type string `json:"type"` + Vocab map[string]int `json:"vocab"` + Merges []string `json:"merges"` + Tokens []Token +} + +type Token struct { + ID int `json:"id"` + Content string `json:"content"` + Special bool `json:"special"` + UserDefined bool +} + +func (t *Tokenizer) getMaxID() int { + var maxID int + for _, v := range t.Model.Vocab { + maxID = max(maxID, v) + } + + for _, v := range t.AddedTokens { + maxID = max(maxID, v.ID) + } + return maxID +} + +func newTokenizer(dirpath string) (*Tokenizer, error) { + f, err := os.Open(dirpath) + if err != nil { + panic(err) + } + defer f.Close() + + data, err := ioutil.ReadAll(f) + if err != nil { + return nil, err + } + + var tdata Tokenizer + + if err := json.Unmarshal(data, &tdata); err != nil { + return nil, err + } + + maxID := tdata.getMaxID() + tdata.Model.Tokens = make([]Token, maxID+1) + + for k, v := range tdata.Model.Vocab { + tdata.Model.Tokens[v] = Token{ID: v, Content: k, Special: false, UserDefined: false} + } + + for _, v := range tdata.AddedTokens { + v.UserDefined = true + tdata.Model.Tokens[v.ID] = v + } + + return &tdata, nil +} From 547132e820dcdc20c325d1de876a86a708b5744e Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Wed, 15 May 2024 11:53:14 -0700 Subject: [PATCH 6/9] bpe pretokenizer --- convert/convert.go | 2 + convert/llama.go | 46 +++++++--------------- convert/tokenizer.go | 93 +++++++++++++++++++++++++++++++------------- llm/gguf.go | 1 + 4 files changed, 83 insertions(+), 59 deletions(-) diff --git a/convert/convert.go b/convert/convert.go index 9a05fb52..e9c2ef2d 100644 --- a/convert/convert.go +++ b/convert/convert.go @@ -37,6 +37,8 @@ type Params struct { Experts int `json:"num_local_experts"` ExpertsUsed int `json:"num_experts_per_tok"` + PreTokenizer string + ByteOrder } diff --git a/convert/llama.go b/convert/llama.go index 9fdcd02b..83d942cb 100644 --- a/convert/llama.go +++ b/convert/llama.go @@ -2,9 +2,9 @@ package convert import ( "encoding/binary" + "errors" "fmt" "io" - "log/slog" "os" "path/filepath" "regexp" @@ -134,44 +134,27 @@ func (m *LlamaModel) GetTensors() error { } func (m *LlamaModel) LoadVocab() error { - v := &Vocab{ - Tokens: []string{}, - Types: []int32{}, - Merges: []string{}, - } + v := &Vocab{} tokpath := filepath.Join(m.Path, "tokenizer.json") - slog.Debug(fmt.Sprintf("looking for %s", tokpath)) - if _, err := os.Stat(tokpath); !os.IsNotExist(err) { - t, err := newTokenizer(tokpath) - if err != nil { - return err - } - - for _, tok := range t.Model.Tokens { - v.Tokens = append(v.Tokens, tok.Content) - var tokType int32 - switch { - case tok.Special: - tokType = 3 - case tok.UserDefined: - tokType = 4 - default: - tokType = 1 - } - v.Types = append(v.Types, tokType) - } - v.Merges = t.Model.Merges - } else { - slog.Debug("loading sentence piece vocab") + pre, ts, merges, err := parseTokens(tokpath) + if errors.Is(err, os.ErrNotExist) { v, err = LoadSentencePieceTokens(m.Path, m.Params) if err != nil { return err } + } else if err != nil { + return err + } else { + for _, t := range ts { + v.Tokens = append(v.Tokens, t.Content) + v.Types = append(v.Types, t.Type()) + } - slog.Debug("vocab loaded") - + m.Params.PreTokenizer = pre + v.Merges = merges } + m.Vocab = v return nil @@ -194,6 +177,7 @@ func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error { "general.file_type": uint32(2), "tokenizer.ggml.model": "gpt2", + "tokenizer.ggml.pre": m.Params.PreTokenizer, "tokenizer.ggml.tokens": m.Vocab.Tokens, "tokenizer.ggml.token_type": m.Vocab.Types, diff --git a/convert/tokenizer.go b/convert/tokenizer.go index a7da81e6..a847a84c 100644 --- a/convert/tokenizer.go +++ b/convert/tokenizer.go @@ -1,15 +1,30 @@ package convert import ( + "cmp" + "crypto/sha256" "encoding/json" - "io/ioutil" + "fmt" + "log/slog" "os" + "slices" + + "golang.org/x/exp/maps" ) type Tokenizer struct { Version string `json:"version"` AddedTokens []Token `json:"added_tokens"` Model TokenizerModel `json:"model"` + + PreTokenizer struct { + PreTokenziers []struct { + Type string `json:"type"` + Pattern struct { + Regex string `json:"Regex"` + } `json:"pattern"` + } `json:"pretokenizers"` + } `json:"pre_tokenizer"` } type TokenizerModel struct { @@ -26,47 +41,69 @@ type Token struct { UserDefined bool } -func (t *Tokenizer) getMaxID() int { - var maxID int - for _, v := range t.Model.Vocab { - maxID = max(maxID, v) +func (t *Token) Type() int32 { + switch { + case t.Special: + return 3 + case t.UserDefined: + return 4 + default: + return 1 } - - for _, v := range t.AddedTokens { - maxID = max(maxID, v.ID) - } - return maxID } -func newTokenizer(dirpath string) (*Tokenizer, error) { +func (t *Tokenizer) maxID() int { + return max( + slices.Max(maps.Values(t.Model.Vocab)), + slices.MaxFunc(t.AddedTokens, func(a, b Token) int { + return cmp.Compare(a.ID, b.ID) + }).ID, + ) +} + +func parseTokens(dirpath string) (pre string, tokens []Token, merges []string, err error) { f, err := os.Open(dirpath) if err != nil { panic(err) } defer f.Close() - data, err := ioutil.ReadAll(f) - if err != nil { - return nil, err + var t Tokenizer + if err := json.NewDecoder(f).Decode(&t); err != nil { + return "", nil, nil, err } - var tdata Tokenizer - - if err := json.Unmarshal(data, &tdata); err != nil { - return nil, err + tokens = make([]Token, t.maxID()+1) + for k, v := range t.Model.Vocab { + tokens[v] = Token{ID: v, Content: k, Special: false, UserDefined: false} } - maxID := tdata.getMaxID() - tdata.Model.Tokens = make([]Token, maxID+1) - - for k, v := range tdata.Model.Vocab { - tdata.Model.Tokens[v] = Token{ID: v, Content: k, Special: false, UserDefined: false} - } - - for _, v := range tdata.AddedTokens { + for _, v := range t.AddedTokens { v.UserDefined = true - tdata.Model.Tokens[v.ID] = v + tokens[v.ID] = v } - return &tdata, nil + sha256sum := sha256.New() + for _, pt := range t.PreTokenizer.PreTokenziers { + switch pt.Type { + case "Split": + if pt.Pattern.Regex != "" { + sha256sum.Write([]byte(pt.Pattern.Regex)) + } + } + } + + switch digest := fmt.Sprintf("%x", sha256sum.Sum(nil)); digest { + case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f": + pre = "llama-bpe" + case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02": + pre = "deepseek-llm" + case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e": + pre = "deepseek-coder" + default: + slog.Warn("unknown pretokenizer, using default", "digest", digest) + pre = "default" + } + + return pre, tokens, t.Model.Merges, nil } diff --git a/llm/gguf.go b/llm/gguf.go index c3cc3d41..179b3255 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -480,6 +480,7 @@ var ggufKVOrder = map[string][]string{ "gemma.attention.key_length", "gemma.attention.value_length", "general.file_type", + "tokenizer.ggml.pre", "tokenizer.ggml.model", "tokenizer.ggml.tokens", "tokenizer.ggml.scores", From bbbd9f20f313af308bf4d573994e01fd5d5f7170 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Wed, 15 May 2024 14:55:57 -0700 Subject: [PATCH 7/9] cleanup --- convert/convert.go | 14 ++++++++++++-- convert/gemma.go | 2 -- convert/llama.go | 32 +++++++++++--------------------- convert/mistral.go | 2 -- convert/mixtral.go | 2 -- convert/safetensors.go | 5 +++++ convert/tokenizer.go | 6 +++--- convert/torch.go | 13 ++++--------- llm/gguf.go | 10 ---------- 9 files changed, 35 insertions(+), 51 deletions(-) diff --git a/convert/convert.go b/convert/convert.go index e9c2ef2d..e71a0ff3 100644 --- a/convert/convert.go +++ b/convert/convert.go @@ -18,6 +18,16 @@ import ( "github.com/ollama/ollama/llm" ) +const ( + _ int32 = iota + tokenTypeNormal + tokenTypeUnknown + tokenTypeControl + tokenTypeUserDefined + tokenTypeUnused + tokenTypeByte +) + type Params struct { Architectures []string `json:"architectures"` VocabSize int `json:"vocab_size"` @@ -172,7 +182,7 @@ func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) { } v.Tokens = append(v.Tokens, t.key) v.Scores = append(v.Scores, -1000.0) - v.Types = append(v.Types, int32(llm.GGUFTokenUserDefined)) + v.Types = append(v.Types, tokenTypeUserDefined) } slog.Info(fmt.Sprintf("vocab size w/ extra tokens: %d", len(v.Tokens))) @@ -182,7 +192,7 @@ func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) { for cnt := 0; cnt < missingTokens; cnt++ { v.Tokens = append(v.Tokens, fmt.Sprintf("", cnt+1)) v.Scores = append(v.Scores, -1) - v.Types = append(v.Types, int32(llm.GGUFTokenUserDefined)) + v.Types = append(v.Types, tokenTypeUserDefined) } } diff --git a/convert/gemma.go b/convert/gemma.go index 88abe646..e24b8ec5 100644 --- a/convert/gemma.go +++ b/convert/gemma.go @@ -71,8 +71,6 @@ func (m *GemmaModel) GetTensors() error { } slog.Debug(fmt.Sprintf("Total tensors: %d", len(t))) - - m.Tensors = []llm.Tensor{} for _, l := range t { if strings.HasSuffix(l.Name, "norm.weight") { wt := l.WriterTo.(safetensorWriterTo) diff --git a/convert/llama.go b/convert/llama.go index 83d942cb..a10670e6 100644 --- a/convert/llama.go +++ b/convert/llama.go @@ -105,8 +105,6 @@ func (m *LlamaModel) GetTensors() error { return err } - m.Tensors = []llm.Tensor{} - pattern := `^blk\.[0-9]+\.attn_(?Pq|k)\.weight$` re, err := regexp.Compile(pattern) if err != nil { @@ -133,30 +131,22 @@ func (m *LlamaModel) GetTensors() error { return nil } -func (m *LlamaModel) LoadVocab() error { - v := &Vocab{} - - tokpath := filepath.Join(m.Path, "tokenizer.json") - pre, ts, merges, err := parseTokens(tokpath) +func (m *LlamaModel) LoadVocab() (err error) { + pre, ts, merges, err := parseTokens(filepath.Join(m.Path, "tokenizer.json")) if errors.Is(err, os.ErrNotExist) { - v, err = LoadSentencePieceTokens(m.Path, m.Params) - if err != nil { - return err - } + return nil } else if err != nil { return err - } else { - for _, t := range ts { - v.Tokens = append(v.Tokens, t.Content) - v.Types = append(v.Types, t.Type()) - } - - m.Params.PreTokenizer = pre - v.Merges = merges } - m.Vocab = v + m.Vocab = &Vocab{} + for _, t := range ts { + m.Vocab.Tokens = append(m.Vocab.Tokens, t.Content) + m.Vocab.Types = append(m.Vocab.Types, t.Type()) + } + m.Vocab.Merges = merges + m.Params.PreTokenizer = pre return nil } @@ -174,7 +164,7 @@ func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error { "llama.attention.head_count": uint32(m.Params.AttentionHeads), "llama.attention.head_count_kv": uint32(m.Params.KeyValHeads), "llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS), - "general.file_type": uint32(2), + "general.file_type": uint32(1), "tokenizer.ggml.model": "gpt2", "tokenizer.ggml.pre": m.Params.PreTokenizer, diff --git a/convert/mistral.go b/convert/mistral.go index f88de12b..89d2e084 100644 --- a/convert/mistral.go +++ b/convert/mistral.go @@ -102,8 +102,6 @@ func (m *MistralModel) GetTensors() error { return err } - m.Tensors = []llm.Tensor{} - pattern := `^blk\.[0-9]+\.attn_(?Pq|k)\.weight$` re, err := regexp.Compile(pattern) if err != nil { diff --git a/convert/mixtral.go b/convert/mixtral.go index 940df55d..66546fd7 100644 --- a/convert/mixtral.go +++ b/convert/mixtral.go @@ -17,8 +17,6 @@ func (m *MixtralModel) GetTensors() error { return err } - m.Tensors = []llm.Tensor{} - pattern := `^blk\.[0-9]+\.attn_(?Pq|k)\.weight$` re, err := regexp.Compile(pattern) if err != nil { diff --git a/convert/safetensors.go b/convert/safetensors.go index b52a048d..2107ae81 100644 --- a/convert/safetensors.go +++ b/convert/safetensors.go @@ -11,6 +11,7 @@ import ( "path/filepath" "regexp" "slices" + "strings" "github.com/d4l3k/go-bfloat16" "github.com/mitchellh/mapstructure" @@ -97,6 +98,10 @@ func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params) var tensors []llm.Tensor for _, k := range keys { + if strings.HasSuffix(k, "self_attn.rotary_emb.inv_freq") { + continue + } + vals := parsed[k].(map[string]interface{}) var data tensorMetaData if err = mapstructure.Decode(vals, &data); err != nil { diff --git a/convert/tokenizer.go b/convert/tokenizer.go index a847a84c..e0fe0bb7 100644 --- a/convert/tokenizer.go +++ b/convert/tokenizer.go @@ -44,11 +44,11 @@ type Token struct { func (t *Token) Type() int32 { switch { case t.Special: - return 3 + return tokenTypeControl case t.UserDefined: - return 4 + return tokenTypeUserDefined default: - return 1 + return tokenTypeNormal } } diff --git a/convert/torch.go b/convert/torch.go index 803827ba..cb8d74b0 100644 --- a/convert/torch.go +++ b/convert/torch.go @@ -34,18 +34,13 @@ func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, slog.Debug("getting torch tensors") var files []string - var err error - files, err = filepath.Glob(filepath.Join(dirpath, "consolidated.*.pth")) - if err != nil { - files, err = filepath.Glob(filepath.Join(dirpath, "pytorch_model-*.bin")) - if err != nil { - slog.Error("didn't find any torch files") - return nil, err - } + if pt, _ := filepath.Glob(filepath.Join(dirpath, "consolidated*.pth")); len(pt) > 0 { + files = append(files, pt...) + } else if pt, _ := filepath.Glob(filepath.Join(dirpath, "pytorch_model*.pth")); len(pt) > 0 { + files = append(files, pt...) } var offset uint64 - var tensors []llm.Tensor for _, fn := range files { m, err := pytorch.Load(fn) diff --git a/llm/gguf.go b/llm/gguf.go index 179b3255..eb7d7b75 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -62,16 +62,6 @@ func (c *containerGGUF) Decode(rs io.ReadSeeker) (model, error) { return model, nil } -const ( - _ uint32 = iota - GGUFTokenNormal - GGUFTokenUnknown - GGUFTokenControl - GGUFTokenUserDefined - GGUFTokenUnused - GGUFTokenByte -) - const ( ggufTypeUint8 uint32 = iota ggufTypeInt8 From 34d5ef29b3d01e2a0785af96df1135dfec567a3e Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Fri, 17 May 2024 12:11:49 -0700 Subject: [PATCH 8/9] fix conversion for f16 or f32 inputs --- convert/gemma.go | 49 +++++---------- convert/llama.go | 136 ++++++++++++++++------------------------- convert/mistral.go | 91 ++------------------------- convert/mixtral.go | 6 +- convert/safetensors.go | 85 ++++++++++++++------------ convert/torch.go | 77 +++++++++-------------- go.mod | 2 +- 7 files changed, 152 insertions(+), 294 deletions(-) diff --git a/convert/gemma.go b/convert/gemma.go index e24b8ec5..9dc406e0 100644 --- a/convert/gemma.go +++ b/convert/gemma.go @@ -1,14 +1,11 @@ package convert import ( - "encoding/binary" "fmt" "io" "log/slog" - "os" "strings" - "github.com/d4l3k/go-bfloat16" "github.com/pdevine/tensor" "github.com/pdevine/tensor/native" @@ -19,49 +16,27 @@ type GemmaModel struct { ModelData } -func gemmaLayerHandler(w io.Writer, r safetensorWriterTo, f *os.File) error { - slog.Debug(fmt.Sprintf("converting '%s'", r.t.Name)) - - data := make([]byte, r.end-r.start) - if err := binary.Read(f, r.bo, data); err != nil { - return err - } - - tDataF32 := bfloat16.DecodeFloat32(data) - - var err error - tDataF32, err = addOnes(tDataF32, int(r.t.Shape[0])) - if err != nil { - return err - } - - if err := binary.Write(w, r.bo, tDataF32); err != nil { - return err - } - return nil -} - func addOnes(data []float32, vectorSize int) ([]float32, error) { n := tensor.New(tensor.WithShape(vectorSize), tensor.WithBacking(data)) ones := tensor.Ones(tensor.Float32, vectorSize) - var err error - n, err = n.Add(ones) + n, err := n.Add(ones) if err != nil { - return []float32{}, err + return nil, err } - newN, err := native.SelectF32(n, 0) + ts, err := native.SelectF32(n, 0) if err != nil { - return []float32{}, err + return nil, err } - var fullTensor []float32 - for _, v := range newN { - fullTensor = append(fullTensor, v...) + var f32s []float32 + for _, t := range ts { + f32s = append(f32s, t...) } - return fullTensor, nil + + return f32s, nil } func (m *GemmaModel) GetTensors() error { @@ -74,7 +49,7 @@ func (m *GemmaModel) GetTensors() error { for _, l := range t { if strings.HasSuffix(l.Name, "norm.weight") { wt := l.WriterTo.(safetensorWriterTo) - wt.handler = gemmaLayerHandler + wt.repacker = m.Repack l.WriterTo = wt } m.Tensors = append(m.Tensors, l) @@ -92,6 +67,10 @@ func (m *GemmaModel) LoadVocab() error { return nil } +func (m *GemmaModel) Repack(_ string, data []float32, shape []uint64) ([]float32, error) { + return addOnes(data, int(shape[0])) +} + func (m *GemmaModel) WriteGGUF(ws io.WriteSeeker) error { kv := llm.KV{ "general.architecture": "gemma", diff --git a/convert/llama.go b/convert/llama.go index a10670e6..7853c4cf 100644 --- a/convert/llama.go +++ b/convert/llama.go @@ -1,7 +1,7 @@ package convert import ( - "encoding/binary" + "cmp" "errors" "fmt" "io" @@ -10,10 +10,8 @@ import ( "regexp" "strings" - "github.com/nlpodyssey/gopickle/pytorch" "github.com/pdevine/tensor" "github.com/pdevine/tensor/native" - "github.com/x448/float16" "github.com/ollama/ollama/llm" ) @@ -22,83 +20,6 @@ type LlamaModel struct { ModelData } -func llamaTorchLayerHandler(w io.Writer, r torchWriterTo) error { - - var tData []uint16 - switch r.storage.(type) { - case *pytorch.HalfStorage: - data := r.storage.(*pytorch.HalfStorage).Data - tData = make([]uint16, len(data)) - for cnt, v := range data { - tData[cnt] = uint16(float16.Fromfloat32(v)) - } - case *pytorch.BFloat16Storage: - data := r.storage.(*pytorch.BFloat16Storage).Data - tData = make([]uint16, len(data)) - - for cnt, v := range data { - tData[cnt] = uint16(float16.Fromfloat32(v)) - } - default: - return fmt.Errorf("unknown storage type for torch") - } - - var err error - var heads uint32 - if strings.Contains(r.t.Name, "attn_q") { - heads = uint32(r.params.AttentionHeads) - } else if strings.Contains(r.t.Name, "attn_k") { - heads = uint32(r.params.KeyValHeads) - if heads == 0 { - heads = uint32(r.params.AttentionHeads) - } - } else { - return fmt.Errorf("unknown layer type") - } - - tData, err = llamaRepack(tData, int(heads), r.t.Shape) - if err != nil { - return err - } - - if err = binary.Write(w, r.bo, tData); err != nil { - return err - } - return nil -} - -func llamaRepack(data []uint16, heads int, shape []uint64) ([]uint16, error) { - n := tensor.New(tensor.WithShape(int(shape[0]), int(shape[1])), tensor.WithBacking(data)) - origShape := n.Shape().Clone() - - // reshape the tensor and swap axes 1 and 2 to unpack the layer for gguf - if err := n.Reshape(heads, 2, origShape[0]/heads/2, origShape[1]); err != nil { - return nil, err - } - - if err := n.T(0, 2, 1, 3); err != nil { - return nil, err - } - - if err := n.Reshape(origShape...); err != nil { - return nil, err - } - - if err := n.Transpose(); err != nil { - return nil, err - } - newN, err := native.SelectU16(n, 1) - if err != nil { - return nil, err - } - - var fullTensor []uint16 - for _, v := range newN { - fullTensor = append(fullTensor, v...) - } - return fullTensor, nil -} - func (m *LlamaModel) GetTensors() error { t, err := m.Format.GetTensors(m.Path, m.Params) if err != nil { @@ -117,11 +38,11 @@ func (m *LlamaModel) GetTensors() error { switch m.Format.(type) { case *TorchFormat: wt := l.WriterTo.(torchWriterTo) - wt.handler = llamaTorchLayerHandler + wt.repacker = m.Repack l.WriterTo = wt case *SafetensorFormat: wt := l.WriterTo.(safetensorWriterTo) - wt.handler = mistralLayerHandler + wt.repacker = m.Repack l.WriterTo = wt } } @@ -184,3 +105,54 @@ func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error { return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors) } + +func (m *LlamaModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) { + return llamaRepack(name, m.Params, data, shape) +} + +func llamaRepack(name string, params *Params, data []float32, shape []uint64) ([]float32, error) { + var dims []int + for _, dim := range shape { + if dim != 0 { + dims = append(dims, int(dim)) + } + } + + var heads int + if strings.HasSuffix(name, "attn_q.weight") { + heads = params.AttentionHeads + } else if strings.HasSuffix(name, "attn_k.weight") { + heads = cmp.Or(params.KeyValHeads, params.AttentionHeads) + } else { + return nil, fmt.Errorf("unknown tensor name: %s", name) + } + + n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data)) + if err := n.Reshape(append([]int{heads, 2, dims[0] / heads / 2}, dims[1:]...)...); err != nil { + return nil, err + } + + if err := n.T(0, 2, 1, 3); err != nil { + return nil, err + } + + if err := n.Reshape(dims...); err != nil { + return nil, err + } + + if err := n.Transpose(); err != nil { + return nil, err + } + + ts, err := native.SelectF32(n, 1) + if err != nil { + return nil, err + } + + var f32s []float32 + for _, t := range ts { + f32s = append(f32s, t...) + } + + return f32s, nil +} diff --git a/convert/mistral.go b/convert/mistral.go index 89d2e084..da6874cf 100644 --- a/convert/mistral.go +++ b/convert/mistral.go @@ -1,17 +1,8 @@ package convert import ( - "encoding/binary" - "fmt" "io" - "os" "regexp" - "strings" - - "github.com/d4l3k/go-bfloat16" - "github.com/pdevine/tensor" - "github.com/pdevine/tensor/native" - "github.com/x448/float16" "github.com/ollama/ollama/llm" ) @@ -20,82 +11,6 @@ type MistralModel struct { ModelData } -func mistralLayerHandler(w io.Writer, r safetensorWriterTo, f *os.File) error { - layerSize := r.end - r.start - - var err error - tData := make([]uint16, layerSize/2) - if err = binary.Read(f, r.bo, tData); err != nil { - return err - } - - var heads uint32 - if strings.Contains(r.t.Name, "attn_q") { - heads = uint32(r.params.AttentionHeads) - } else if strings.Contains(r.t.Name, "attn_k") { - heads = uint32(r.params.KeyValHeads) - if heads == 0 { - heads = uint32(r.params.AttentionHeads) - } - } else { - return fmt.Errorf("unknown layer type") - } - - tData, err = repack(tData, int(heads), r.t.Shape) - if err != nil { - return err - } - - var buf []byte - for _, n := range tData { - buf = r.bo.AppendUint16(buf, n) - } - - tempBuf := make([]uint16, len(tData)) - tDataF32 := bfloat16.DecodeFloat32(buf) - for cnt, v := range tDataF32 { - tDataF16 := float16.Fromfloat32(v) - tempBuf[cnt] = uint16(tDataF16) - } - - if err = binary.Write(w, r.bo, tempBuf); err != nil { - return err - } - return nil -} - -func repack(data []uint16, heads int, shape []uint64) ([]uint16, error) { - n := tensor.New(tensor.WithShape(int(shape[0]), int(shape[1])), tensor.WithBacking(data)) - origShape := n.Shape().Clone() - - // reshape the tensor and swap axes 1 and 2 to unpack the layer for gguf - if err := n.Reshape(heads, 2, origShape[0]/heads/2, origShape[1]); err != nil { - return nil, err - } - - if err := n.T(0, 2, 1, 3); err != nil { - return nil, err - } - - if err := n.Reshape(origShape...); err != nil { - return nil, err - } - - if err := n.Transpose(); err != nil { - return nil, err - } - newN, err := native.SelectU16(n, 1) - if err != nil { - return nil, err - } - - var fullTensor []uint16 - for _, v := range newN { - fullTensor = append(fullTensor, v...) - } - return fullTensor, nil -} - func (m *MistralModel) GetTensors() error { t, err := m.Format.GetTensors(m.Path, m.Params) if err != nil { @@ -112,7 +27,7 @@ func (m *MistralModel) GetTensors() error { matches := re.FindAllStringSubmatch(l.Name, -1) if len(matches) > 0 { wt := l.WriterTo.(safetensorWriterTo) - wt.handler = mistralLayerHandler + wt.repacker = m.Repack l.WriterTo = wt } m.Tensors = append(m.Tensors, l) @@ -158,3 +73,7 @@ func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error { return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors) } + +func (m *MistralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) { + return llamaRepack(name, m.Params, data, shape) +} diff --git a/convert/mixtral.go b/convert/mixtral.go index 66546fd7..baea68cd 100644 --- a/convert/mixtral.go +++ b/convert/mixtral.go @@ -27,7 +27,7 @@ func (m *MixtralModel) GetTensors() error { matches := re.FindAllStringSubmatch(l.Name, -1) if len(matches) > 0 { wt := l.WriterTo.(safetensorWriterTo) - wt.handler = mistralLayerHandler + wt.repacker = m.Repack l.WriterTo = wt } m.Tensors = append(m.Tensors, l) @@ -81,3 +81,7 @@ func (m *MixtralModel) WriteGGUF(ws io.WriteSeeker) error { return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors) } + +func (m *MixtralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) { + return llamaRepack(name, m.Params, data, shape) +} diff --git a/convert/safetensors.go b/convert/safetensors.go index 2107ae81..9de9a002 100644 --- a/convert/safetensors.go +++ b/convert/safetensors.go @@ -27,9 +27,10 @@ type safetensorWriterTo struct { bo ByteOrder filename string + dtype string start, end, padding uint64 - handler func(w io.Writer, r safetensorWriterTo, f *os.File) error + repacker func(string, []float32, []uint64) ([]float32, error) } type tensorMetaData struct { @@ -150,6 +151,7 @@ func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params) params: params, bo: params.ByteOrder, filename: fn, + dtype: data.Type, start: uint64(data.Offsets[0]), end: uint64(data.Offsets[1]), padding: 8 + jsonSize, @@ -235,51 +237,54 @@ func (r safetensorWriterTo) WriteTo(w io.Writer) (n int64, err error) { return 0, err } - // use the handler if one is present - if r.handler != nil { - return 0, r.handler(w, r, f) - } - - remaining := r.end - r.start - - bufSize := uint64(10240) - var finished bool - for { - data := make([]byte, min(bufSize, remaining)) - - b, err := io.ReadFull(f, data) - remaining -= uint64(b) - - if err == io.EOF || remaining <= 0 { - finished = true - } else if err != nil { + var f32s []float32 + switch r.dtype { + case "F32": + f32s = make([]float32, (r.end-r.start)/4) + if err = binary.Read(f, r.bo, f32s); err != nil { + return 0, err + } + case "F16": + bts := make([]uint16, (r.end-r.start)/2) + if err = binary.Read(f, r.bo, bts); err != nil { return 0, err } - // convert bfloat16 -> ieee float32 - tDataF32 := bfloat16.DecodeFloat32(data) - - switch r.t.Kind { - case 0: - if err := binary.Write(w, r.bo, tDataF32); err != nil { - return 0, err - } - case 1: - // convert float32 -> float16 - tempBuf := make([]uint16, len(data)/2) - for cnt, v := range tDataF32 { - tDataF16 := float16.Fromfloat32(v) - tempBuf[cnt] = uint16(tDataF16) - } - if err := binary.Write(w, r.bo, tempBuf); err != nil { - return 0, err - } + for _, b := range bts { + f32s = append(f32s, float16.Frombits(b).Float32()) } - if finished { - break + + case "BF16": + bts := make([]byte, r.end-r.start) + if err = binary.Read(f, r.bo, bts); err != nil { + return 0, err + } + + f32s = bfloat16.DecodeFloat32(bts) + default: + return 0, fmt.Errorf("unknown data type: %s", r.dtype) + } + + if r.repacker != nil { + f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape) + if err != nil { + return 0, err } } - return 0, nil + + switch r.t.Kind { + case 0: + return 0, binary.Write(w, r.bo, f32s) + case 1: + f16s := make([]uint16, len(f32s)) + for i := range f32s { + f16s[i] = float16.Fromfloat32(f32s[i]).Bits() + } + + return 0, binary.Write(w, r.bo, f16s) + default: + return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind) + } } func (m *SafetensorFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) { diff --git a/convert/torch.go b/convert/torch.go index cb8d74b0..b7ae0f76 100644 --- a/convert/torch.go +++ b/convert/torch.go @@ -24,8 +24,8 @@ type torchWriterTo struct { params *Params bo ByteOrder - storage pytorch.StorageInterface - handler func(w io.Writer, r torchWriterTo) error + storage pytorch.StorageInterface + repacker func(string, []float32, []uint64) ([]float32, error) } type TorchFormat struct{} @@ -230,59 +230,38 @@ func (m *TorchFormat) GetLayerName(n string) (string, error) { } func (r torchWriterTo) WriteTo(w io.Writer) (n int64, err error) { - // use the handler if one is present - if r.handler != nil { - return 0, r.handler(w, r) - } - - switch storage := r.storage.(type) { + var f32s []float32 + switch s := r.storage.(type) { case *pytorch.FloatStorage: - slog.Warn(fmt.Sprintf("unexpected storage found for layer '%s'; skipping", r.t.Name)) - return 0, nil + f32s = s.Data case *pytorch.HalfStorage: - switch r.t.Kind { - case 0: - data := r.storage.(*pytorch.HalfStorage).Data - slog.Debug(fmt.Sprintf("%35s F32 (%d)", r.t.Name, len(data))) - if err := binary.Write(w, r.bo, data); err != nil { - return 0, err - } - case 1: - data := r.storage.(*pytorch.HalfStorage).Data - tData := make([]uint16, len(data)) - for cnt, v := range data { - tData[cnt] = uint16(float16.Fromfloat32(v)) - } - slog.Debug(fmt.Sprintf("%35s F16 (%d)", r.t.Name, len(tData))) - if err := binary.Write(w, r.bo, tData); err != nil { - return 0, err - } - } + f32s = s.Data case *pytorch.BFloat16Storage: - data := r.storage.(*pytorch.BFloat16Storage).Data - switch r.t.Kind { - case 0: - if err = binary.Write(w, r.bo, data); err != nil { - return 0, err - } - case 1: - tData := make([]uint16, len(data)) - - for cnt, v := range data { - tData[cnt] = uint16(float16.Fromfloat32(v)) - } - - if err = binary.Write(w, r.bo, tData); err != nil { - return 0, err - } - default: - return 0, fmt.Errorf("unknown storage kind: %d", r.t.Kind) - } + f32s = s.Data default: - return 0, fmt.Errorf("unknown storage type: %T", storage) + return 0, fmt.Errorf("unknown data type: %T", s) } - return 0, nil + if r.repacker != nil { + f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape) + if err != nil { + return 0, err + } + } + + switch r.t.Kind { + case 0: + return 0, binary.Write(w, r.bo, f32s) + case 1: + f16s := make([]uint16, len(f32s)) + for i := range f32s { + f16s[i] = float16.Fromfloat32(f32s[i]).Bits() + } + + return 0, binary.Write(w, r.bo, f16s) + default: + return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind) + } } func (m *TorchFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) { diff --git a/go.mod b/go.mod index 5d0d3c33..255c8a04 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,6 @@ go 1.22.0 require ( github.com/containerd/console v1.0.3 - github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1 github.com/emirpasic/gods v1.18.1 github.com/gin-gonic/gin v1.10.0 github.com/golang/protobuf v1.5.4 // indirect @@ -18,6 +17,7 @@ require ( ) require ( + github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1 github.com/mattn/go-runewidth v0.0.14 github.com/nlpodyssey/gopickle v0.3.0 github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c From 3591bbe56fc3dba4d7cf9b77929143a58ffaaa59 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Tue, 21 May 2024 11:28:16 -0700 Subject: [PATCH 9/9] add test --- convert/convert_test.go | 103 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 convert/convert_test.go diff --git a/convert/convert_test.go b/convert/convert_test.go new file mode 100644 index 00000000..6aa33a49 --- /dev/null +++ b/convert/convert_test.go @@ -0,0 +1,103 @@ +//go:build slow + +package convert + +import ( + "os" + "path/filepath" + "testing" + + "github.com/ollama/ollama/llm" +) + +func convertFull(t *testing.T, p string) (llm.KV, llm.Tensors) { + t.Helper() + + mf, err := GetModelFormat(p) + if err != nil { + t.Fatal(err) + } + + params, err := mf.GetParams(p) + if err != nil { + t.Fatal(err) + } + + arch, err := mf.GetModelArch("", p, params) + if err != nil { + t.Fatal(err) + } + + if err := arch.LoadVocab(); err != nil { + t.Fatal(err) + } + + if err := arch.GetTensors(); err != nil { + t.Fatal(err) + } + + f, err := os.CreateTemp(t.TempDir(), "f16") + if err != nil { + t.Fatal(err) + } + defer f.Close() + + if err := arch.WriteGGUF(f); err != nil { + t.Fatal(err) + } + + r, err := os.Open(f.Name()) + if err != nil { + t.Fatal(err) + } + defer r.Close() + + m, _, err := llm.DecodeGGML(r) + if err != nil { + t.Fatal(err) + } + + return m.KV(), m.Tensors() +} + +func TestConvertFull(t *testing.T) { + cases := []struct { + path string + arch string + tensors int + layers int + }{ + {"Meta-Llama-3-8B-Instruct", "llama", 291, 35}, + {"Mistral-7B-Instruct-v0.2", "llama", 291, 35}, + {"Mixtral-8x7B-Instruct-v0.1", "llama", 291, 35}, + {"gemma-2b-it", "gemma", 164, 20}, + } + + for _, tt := range cases { + t.Run(tt.path, func(t *testing.T) { + p := filepath.Join("testdata", tt.path) + if _, err := os.Stat(p); err != nil { + t.Skipf("%s not found", p) + } + + kv, tensors := convertFull(t, p) + + if kv.Architecture() != tt.arch { + t.Fatalf("expected llama, got %s", kv.Architecture()) + } + + if kv.FileType().String() != "F16" { + t.Fatalf("expected F16, got %s", kv.FileType()) + } + + if len(tensors) != tt.tensors { + t.Fatalf("expected %d tensors, got %d", tt.tensors, len(tensors)) + } + + layers := tensors.Layers() + if len(layers) != tt.layers { + t.Fatalf("expected %d layers, got %d", tt.layers, len(layers)) + } + }) + } +}