diff --git a/README.md b/README.md index 941a4f99..0cc15266 100644 --- a/README.md +++ b/README.md @@ -299,6 +299,7 @@ See the [API documentation](./docs/api.md) for all endpoints. - [AI Studio](https://github.com/MindWorkAI/AI-Studio) - [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client) - [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows) +- [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac) ### Terminal diff --git a/api/client.go b/api/client.go index c59fbc42..e02b21bf 100644 --- a/api/client.go +++ b/api/client.go @@ -20,7 +20,6 @@ import ( "encoding/json" "fmt" "io" - "net" "net/http" "net/url" "runtime" @@ -63,13 +62,8 @@ func checkError(resp *http.Response, body []byte) error { // If the variable is not specified, a default ollama host and port will be // used. func ClientFromEnvironment() (*Client, error) { - ollamaHost := envconfig.Host - return &Client{ - base: &url.URL{ - Scheme: ollamaHost.Scheme, - Host: net.JoinHostPort(ollamaHost.Host, ollamaHost.Port), - }, + base: envconfig.Host(), http: http.DefaultClient, }, nil } diff --git a/api/client_test.go b/api/client_test.go index fe9fd74f..23fe9334 100644 --- a/api/client_test.go +++ b/api/client_test.go @@ -2,8 +2,6 @@ package api import ( "testing" - - "github.com/ollama/ollama/envconfig" ) func TestClientFromEnvironment(t *testing.T) { @@ -33,7 +31,6 @@ func TestClientFromEnvironment(t *testing.T) { for k, v := range testCases { t.Run(k, func(t *testing.T) { t.Setenv("OLLAMA_HOST", v.value) - envconfig.LoadConfig() client, err := ClientFromEnvironment() if err != v.err { diff --git a/app/lifecycle/logging.go b/app/lifecycle/logging.go index a8f1f7cd..3672aad5 100644 --- a/app/lifecycle/logging.go +++ b/app/lifecycle/logging.go @@ -14,7 +14,7 @@ import ( func InitLogging() { level := slog.LevelInfo - if envconfig.Debug { + if envconfig.Debug() { level = slog.LevelDebug } diff --git a/cmd/cmd.go b/cmd/cmd.go index 610fddcb..c1a3c3f6 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -362,9 +362,24 @@ func RunHandler(cmd *cobra.Command, args []string) error { opts.MultiModal = slices.Contains(info.Details.Families, "clip") opts.ParentModel = info.Details.ParentModel - opts.Messages = append(opts.Messages, info.Messages...) if interactive { + if err := loadModel(cmd, &opts); err != nil { + return err + } + + for _, msg := range info.Messages { + switch msg.Role { + case "user": + fmt.Printf(">>> %s\n", msg.Content) + case "assistant": + state := &displayResponseState{} + displayResponse(msg.Content, opts.WordWrap, state) + fmt.Println() + fmt.Println() + } + } + return generateInteractive(cmd, opts) } return generate(cmd, opts) @@ -1076,7 +1091,7 @@ func RunServer(cmd *cobra.Command, _ []string) error { return err } - ln, err := net.Listen("tcp", net.JoinHostPort(envconfig.Host.Host, envconfig.Host.Port)) + ln, err := net.Listen("tcp", envconfig.Host().Host) if err != nil { return err } diff --git a/cmd/interactive.go b/cmd/interactive.go index 70afc6ea..b566eb2f 100644 --- a/cmd/interactive.go +++ b/cmd/interactive.go @@ -48,29 +48,10 @@ func loadModel(cmd *cobra.Command, opts *runOptions) error { KeepAlive: opts.KeepAlive, } - return client.Chat(cmd.Context(), chatReq, func(resp api.ChatResponse) error { - p.StopAndClear() - for _, msg := range opts.Messages { - switch msg.Role { - case "user": - fmt.Printf(">>> %s\n", msg.Content) - case "assistant": - state := &displayResponseState{} - displayResponse(msg.Content, opts.WordWrap, state) - fmt.Println() - fmt.Println() - } - } - return nil - }) + return client.Chat(cmd.Context(), chatReq, func(api.ChatResponse) error { return nil }) } func generateInteractive(cmd *cobra.Command, opts runOptions) error { - err := loadModel(cmd, &opts) - if err != nil { - return err - } - usage := func() { fmt.Fprintln(os.Stderr, "Available Commands:") fmt.Fprintln(os.Stderr, " /set Set session variables") @@ -160,7 +141,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error { return err } - if envconfig.NoHistory { + if envconfig.NoHistory() { scanner.HistoryDisable() } diff --git a/convert/convert.go b/convert/convert.go index 103de457..b9461e4f 100644 --- a/convert/convert.go +++ b/convert/convert.go @@ -1,200 +1,122 @@ package convert import ( - "cmp" - "encoding/binary" "encoding/json" + "errors" "fmt" "io" + "io/fs" "log/slog" - "os" - "path/filepath" - "slices" - "strings" - "google.golang.org/protobuf/proto" - - "github.com/ollama/ollama/convert/sentencepiece" "github.com/ollama/ollama/llm" ) -const ( - _ int32 = iota - tokenTypeNormal - tokenTypeUnknown - tokenTypeControl - tokenTypeUserDefined - tokenTypeUnused - tokenTypeByte -) - -type Params struct { - Architectures []string `json:"architectures"` - VocabSize int `json:"vocab_size"` - HiddenSize int `json:"hidden_size"` // n_embd - HiddenLayers int `json:"num_hidden_layers"` // n_layer - ContextSize int `json:"max_position_embeddings"` - IntermediateSize int `json:"intermediate_size"` - AttentionHeads int `json:"num_attention_heads"` // n_head - KeyValHeads int `json:"num_key_value_heads"` - NormEPS float64 `json:"rms_norm_eps"` - BoSTokenID int `json:"bos_token_id"` - EoSTokenID int `json:"eos_token_id"` - HeadDimension int `json:"head_dim"` - PaddingTokenID int `json:"pad_token_id"` - RopeFrequencyBase float64 `json:"rope_theta"` - - Experts int `json:"num_local_experts"` - ExpertsUsed int `json:"num_experts_per_tok"` - - PreTokenizer string - - ByteOrder +type Parameters struct { + Architectures []string `json:"architectures"` + VocabSize uint32 `json:"vocab_size"` } -type ByteOrder interface { - binary.ByteOrder - binary.AppendByteOrder +func (Parameters) KV(t *Tokenizer) llm.KV { + kv := llm.KV{ + "general.file_type": uint32(1), + "general.quantization_version": uint32(2), + "tokenizer.ggml.pre": t.Pre, + "tokenizer.ggml.model": t.Vocabulary.Model, + "tokenizer.ggml.tokens": t.Vocabulary.Tokens, + "tokenizer.ggml.scores": t.Vocabulary.Scores, + "tokenizer.ggml.token_type": t.Vocabulary.Types, + } + + if t.Template != "" { + kv["tokenizer.chat_template"] = t.Template + } + + for _, sv := range t.SpecialVocabulary { + kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID) + kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken + } + + return kv } -type ModelArch interface { - GetTensors() error - LoadVocab() error - WriteGGUF(io.WriteSeeker) error +func (Parameters) specialTokenTypes() []string { + return []string{ + "bos", "eos", "unk", "sep", "pad", "cls", "mask", + } } -type ModelFormat interface { - GetLayerName(string) (string, error) - GetTensors(string, *Params) ([]llm.Tensor, error) - GetParams(string) (*Params, error) - GetModelArch(string, string, *Params) (ModelArch, error) +func (Parameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error { + return llm.WriteGGUF(ws, kv, ts) } -type ModelData struct { - Path string - Name string - Params *Params - Vocab *Vocab - Tensors []llm.Tensor - Format ModelFormat +type Converter interface { + // KV maps parameters to LLM key-values + KV(*Tokenizer) llm.KV + // Tensors maps input tensors to LLM tensors. Model specific modifications can be done here. + Tensors([]Tensor) []llm.Tensor + + // tensorName returns the LLM tensor name for a specific input name + tensorName(string) string + // specialTokenTypes returns any special token types the model uses + specialTokenTypes() []string + writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error } -func GetModelFormat(dirname string) (ModelFormat, error) { - files, err := filepath.Glob(filepath.Join(dirname, "*")) +// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations +// and files it finds in the input path. +// Supported input model formats include safetensors. +// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model. +func Convert(fsys fs.FS, ws io.WriteSeeker) error { + bts, err := fs.ReadFile(fsys, "config.json") if err != nil { - return nil, err + return err } - for _, fn := range files { - if strings.HasSuffix(fn, ".safetensors") { - return &SafetensorFormat{}, nil - } else if strings.HasSuffix(fn, ".bin") || strings.HasSuffix(fn, ".pth") { - slog.Debug("model is torch") - return &TorchFormat{}, nil - } + var p Parameters + if err := json.Unmarshal(bts, &p); err != nil { + return err } - return nil, fmt.Errorf("couldn't determine model format") -} + if len(p.Architectures) < 1 { + return errors.New("unknown architecture") + } -// Details on gguf's tokenizer can be found at: -// https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#tokenizer -type Vocab struct { - Tokens []string - Scores []float32 - Types []int32 - Merges []string -} + var conv Converter + switch p.Architectures[0] { + case "LlamaForCausalLM", "MistralForCausalLM": + conv = &llama{} + case "MixtralForCausalLM": + conv = &mixtral{} + case "GemmaForCausalLM": + conv = &gemma{} + default: + return errors.New("unsupported architecture") + } -func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) { - slog.Info(fmt.Sprintf("reading vocab from %s", filepath.Join(dirpath, "tokenizer.model"))) - in, err := os.ReadFile(filepath.Join(dirpath, "tokenizer.model")) + if err := json.Unmarshal(bts, conv); err != nil { + return err + } + + t, err := parseTokenizer(fsys, conv.specialTokenTypes()) if err != nil { - return nil, err + return err } - // To regenerate sentencepiece from the protobufs use: - // protoc -I=./ --go_out=./ sentencepiece_model.proto - modelProto := &sentencepiece.ModelProto{} - if err := proto.Unmarshal(in, modelProto); err != nil { - return nil, err - } - - v := &Vocab{ - Tokens: make([]string, 0), - Scores: make([]float32, 0), - Types: make([]int32, 0), - } - - pieces := modelProto.GetPieces() - for _, p := range pieces { - v.Tokens = append(v.Tokens, p.GetPiece()) - v.Scores = append(v.Scores, p.GetScore()) - t := p.GetType() - switch t { - case sentencepiece.ModelProto_SentencePiece_UNKNOWN: - case sentencepiece.ModelProto_SentencePiece_CONTROL: - case sentencepiece.ModelProto_SentencePiece_UNUSED: - case sentencepiece.ModelProto_SentencePiece_BYTE: - default: - t = sentencepiece.ModelProto_SentencePiece_NORMAL + if vocabSize := int(p.VocabSize); vocabSize > len(t.Vocabulary.Tokens) { + slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", p.VocabSize, "actual", len(t.Vocabulary.Tokens)) + for i := range vocabSize - len(t.Vocabulary.Tokens) { + t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i)) + t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1) + t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined) } - v.Types = append(v.Types, int32(t)) + } else { + slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens)) } - slog.Info(fmt.Sprintf("vocab size: %d", len(v.Tokens))) - - // add any additional tokens - addIn, err := os.ReadFile(filepath.Join(dirpath, "added_tokens.json")) - if os.IsNotExist(err) { - return v, nil - } else if err != nil { - return nil, err + ts, err := parseTensors(fsys) + if err != nil { + return err } - slog.Info("reading user defined tokens") - - var extraTokenData map[string]int - if err := json.Unmarshal(addIn, &extraTokenData); err != nil { - return nil, err - } - - type token struct { - key string - pos int - } - - extraTokens := make([]token, 0) - for k, id := range extraTokenData { - extraTokens = append(extraTokens, token{k, id}) - } - - slices.SortFunc(extraTokens, func(a, b token) int { - return cmp.Compare(a.pos, b.pos) - }) - - numToks := len(v.Tokens) - - for cnt, t := range extraTokens { - // the token id should match the specific index for the total number of tokens - if t.pos != cnt+numToks { - return nil, fmt.Errorf("token ID '%d' for '%s' doesn't match total token size", t.pos, t.key) - } - v.Tokens = append(v.Tokens, t.key) - v.Scores = append(v.Scores, -1000.0) - v.Types = append(v.Types, tokenTypeUserDefined) - } - slog.Info(fmt.Sprintf("vocab size w/ extra tokens: %d", len(v.Tokens))) - - if params.VocabSize > len(v.Tokens) { - missingTokens := params.VocabSize - len(v.Tokens) - slog.Warn(fmt.Sprintf("vocab is missing %d tokens", missingTokens)) - for cnt := range missingTokens { - v.Tokens = append(v.Tokens, fmt.Sprintf("", cnt+1)) - v.Scores = append(v.Scores, -1) - v.Types = append(v.Types, tokenTypeUserDefined) - } - } - - return v, nil + return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts)) } diff --git a/convert/convert_gemma.go b/convert/convert_gemma.go new file mode 100644 index 00000000..9213e157 --- /dev/null +++ b/convert/convert_gemma.go @@ -0,0 +1,103 @@ +package convert + +import ( + "strings" + + "github.com/pdevine/tensor" + "github.com/pdevine/tensor/native" + + "github.com/ollama/ollama/llm" +) + +type gemma struct { + Parameters + MaxPositionEmbeddings uint32 `json:"max_position_embeddings"` + HiddenSize uint32 `json:"hidden_size"` + HiddenLayers uint32 `json:"num_hidden_layers"` + IntermediateSize uint32 `json:"intermediate_size"` + NumAttentionHeads uint32 `json:"num_attention_heads"` + NumKeyValueHeads uint32 `json:"num_key_value_heads"` + RMSNormEPS float32 `json:"rms_norm_eps"` + HeadDim uint32 `json:"head_dim"` +} + +var _ Converter = (*gemma)(nil) + +func (p *gemma) KV(t *Tokenizer) llm.KV { + kv := p.Parameters.KV(t) + kv["general.architecture"] = "gemma" + kv["general.name"] = "gemma" + kv["gemma.context_length"] = p.MaxPositionEmbeddings + kv["gemma.embedding_length"] = p.HiddenSize + kv["gemma.block_count"] = p.HiddenLayers + kv["gemma.feed_forward_length"] = p.IntermediateSize + kv["gemma.attention.head_count"] = p.NumAttentionHeads + kv["gemma.attention.head_count_kv"] = p.NumKeyValueHeads + kv["gemma.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS + kv["gemma.attention.key_length"] = p.HeadDim + kv["gemma.attention.value_length"] = p.HeadDim + kv["tokenizer.ggml.eot_token_id"] = uint32(107) + kv["tokenizer.ggml.middle_token_id"] = uint32(68) + kv["tokenizer.ggml.prefix_token_id"] = uint32(67) + kv["tokenizer.ggml.suffix_token_id"] = uint32(69) + return kv +} + +func (p *gemma) Tensors(ts []Tensor) []llm.Tensor { + var out []llm.Tensor + for _, t := range ts { + name := p.tensorName(t.Name()) + if strings.HasSuffix(name, "_norm.weight") { + t.SetRepacker(p.addOne) + } + + out = append(out, llm.Tensor{ + Name: name, + Kind: t.Kind(), + Shape: t.Shape(), + WriterTo: t, + }) + } + + return out +} + +func (p *gemma) tensorName(n string) string { + return strings.NewReplacer( + "model.embed_tokens", "token_embd", + "model.norm", "output_norm", + "model.layers", "blk", + "input_layernorm", "attn_norm", + "self_attn.q_proj", "attn_q", + "self_attn.k_proj", "attn_k", + "self_attn.v_proj", "attn_v", + "self_attn.o_proj", "attn_output", + "mlp.gate_proj", "ffn_gate", + "mlp.down_proj", "ffn_down", + "mlp.up_proj", "ffn_up", + "post_attention_layernorm", "ffn_norm", + "block_sparse_moe.gate", "ffn_inp", + ).Replace(n) +} + +func (*gemma) addOne(_ string, data []float32, shape []uint64) ([]float32, error) { + n := tensor.New(tensor.WithShape(int(shape[0])), tensor.WithBacking(data)) + ones := tensor.Ones(tensor.Float32, int(shape[0])) + + n, err := n.Add(ones) + if err != nil { + return nil, err + } + + ts, err := native.SelectF32(n, 0) + if err != nil { + return nil, err + } + + var f32s []float32 + for _, t := range ts { + f32s = append(f32s, t...) + } + + return f32s, nil +} diff --git a/convert/convert_llama.go b/convert/convert_llama.go new file mode 100644 index 00000000..ed6469c5 --- /dev/null +++ b/convert/convert_llama.go @@ -0,0 +1,182 @@ +package convert + +import ( + "cmp" + "fmt" + "strings" + + "github.com/ollama/ollama/llm" + "github.com/pdevine/tensor" + "github.com/pdevine/tensor/native" +) + +type llama struct { + Parameters + NLayers uint32 `json:"n_layers"` + NumHiddenLayers uint32 `json:"num_hidden_layers"` + NLayer uint32 `json:"n_layer"` + MaxPositionEmbeddings uint32 `json:"max_position_embeddings"` + NCtx uint32 `json:"n_ctx"` + HiddenSize uint32 `json:"hidden_size"` + NEmbd uint32 `json:"n_embd"` + IntermediateSize uint32 `json:"intermediate_size"` + NInner uint32 `json:"n_inner"` + NumAttentionHeads uint32 `json:"num_attention_heads"` + NHead uint32 `json:"n_head"` + NumKeyValueHeads uint32 `json:"num_key_value_heads"` + RopeTheta float32 `json:"rope_theta"` + RopeScaling struct { + Type string `json:"type"` + Factor float32 `json:"factor"` + } `json:"rope_scaling"` + RMSNormEPS float32 `json:"rms_norm_eps"` + LayerNormEPS float32 `json:"layer_norm_eps"` + LayerNormEpsilon float32 `json:"layer_norm_epsilon"` + NormEpsilon float32 `json:"norm_epsilon"` + HeadDim uint32 `json:"head_dim"` +} + +var _ Converter = (*llama)(nil) + +func (p *llama) KV(t *Tokenizer) llm.KV { + kv := p.Parameters.KV(t) + kv["general.architecture"] = "llama" + kv["general.name"] = "llama" + kv["llama.vocab_size"] = p.VocabSize + + kv["llama.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer) + + if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 { + kv["llama.context_length"] = contextLength + } + + if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 { + kv["llama.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd) + } + + if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 { + kv["llama.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner) + } + + if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 { + kv["llama.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead) + kv["llama.rope.dimension_count"] = p.HiddenSize / headCount + } + + if p.RopeTheta > 0 { + kv["llama.rope.freq_base"] = p.RopeTheta + } + + if p.RopeScaling.Type == "linear" { + kv["llama.rope.scaling.type"] = p.RopeScaling.Type + kv["llama.rope.scaling.factor"] = p.RopeScaling.Factor + } + + if p.NumKeyValueHeads > 0 { + kv["llama.attention.head_count_kv"] = p.NumKeyValueHeads + } + + if p.RMSNormEPS > 0 { + kv["llama.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS + } + + if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 { + kv["llama.attention.layer_norm_epsilon"] = layerNormEpsilon + } + + if p.HeadDim > 0 { + kv["llama.attention.key_length"] = p.HeadDim + kv["llama.attention.value_length"] = p.HeadDim + } + + if len(t.Merges) > 0 { + kv["tokenizer.ggml.merges"] = t.Merges + } + + return kv +} + +func (p *llama) Tensors(ts []Tensor) []llm.Tensor { + var out []llm.Tensor + for _, t := range ts { + name := p.tensorName(t.Name()) + if strings.HasSuffix(name, "attn_q.weight") || + strings.HasSuffix(name, "attn_k.weight") { + t.SetRepacker(p.repack) + } + + out = append(out, llm.Tensor{ + Name: name, + Kind: t.Kind(), + Shape: t.Shape(), + WriterTo: t, + }) + } + + return out +} + +func (p *llama) tensorName(n string) string { + return strings.NewReplacer( + "lm_head", "output", + "model.embed_tokens", "token_embd", + "model.norm", "output_norm", + "model.layers", "blk", + "input_layernorm", "attn_norm", + "self_attn.q_proj", "attn_q", + "self_attn.k_proj", "attn_k", + "self_attn.v_proj", "attn_v", + "self_attn.o_proj", "attn_output", + "mlp.gate_proj", "ffn_gate", + "mlp.down_proj", "ffn_down", + "mlp.up_proj", "ffn_up", + "post_attention_layernorm", "ffn_norm", + // mixtral + "block_sparse_moe.gate", "ffn_gate_inp", + ).Replace(n) +} + +func (p *llama) repack(name string, data []float32, shape []uint64) ([]float32, error) { + var dims []int + for _, dim := range shape { + dims = append(dims, int(dim)) + } + + var heads uint32 + if strings.HasSuffix(name, "q_proj.weight") { + heads = p.NumAttentionHeads + } else if strings.HasSuffix(name, "k_proj.weight") { + heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads) + } else { + return nil, fmt.Errorf("unknown tensor for repack: %s", name) + } + + n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data)) + if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil { + return nil, err + } + + if err := n.T(0, 2, 1, 3); err != nil { + return nil, err + } + + if err := n.Reshape(dims...); err != nil { + return nil, err + } + + if err := n.Transpose(); err != nil { + return nil, err + } + + ts, err := native.SelectF32(n, 1) + if err != nil { + return nil, err + } + + var f32s []float32 + for _, t := range ts { + f32s = append(f32s, t...) + } + + return f32s, nil +} diff --git a/convert/convert_mixtral.go b/convert/convert_mixtral.go new file mode 100644 index 00000000..3263a27b --- /dev/null +++ b/convert/convert_mixtral.go @@ -0,0 +1,89 @@ +package convert + +import ( + "fmt" + "io" + "slices" + "strings" + + "github.com/ollama/ollama/llm" +) + +type mixtral struct { + llama + NumLocalExperts uint32 `json:"num_local_experts"` + NumExpertsPerToken uint32 `json:"num_experts_per_tok"` +} + +var _ Converter = (*mixtral)(nil) + +func (p *mixtral) KV(t *Tokenizer) llm.KV { + kv := p.llama.KV(t) + + if p.NumLocalExperts > 0 { + kv["llama.expert_count"] = p.NumLocalExperts + } + + if p.NumExpertsPerToken > 0 { + kv["llama.expert_used_count"] = p.NumExpertsPerToken + } + + return kv +} + +func (p *mixtral) Tensors(ts []Tensor) []llm.Tensor { + oldnew := []string{ + "model.layers", "blk", + "w1", "ffn_gate_exps", + "w2", "ffn_down_exps", + "w3", "ffn_up_exps", + } + + for i := range p.NumLocalExperts { + oldnew = append(oldnew, fmt.Sprintf(".block_sparse_moe.experts.%d.", i), ".") + } + + // group experts of the same layer (model.layers.%d) and type (w[123]) into a single tensor + namer := strings.NewReplacer(oldnew...) + experts := make(map[string]experts) + + // merge experts into a single tensor while removing them from ts + ts = slices.DeleteFunc(ts, func(t Tensor) bool { + if !strings.Contains(t.Name(), ".block_sparse_moe.experts.") { + return false + } + + name := namer.Replace(t.Name()) + experts[name] = append(experts[name], t) + return true + }) + + var out []llm.Tensor + for n, e := range experts { + // TODO(mxyng): sanity check experts + out = append(out, llm.Tensor{ + Name: n, + Kind: e[0].Kind(), + Shape: append([]uint64{uint64(len(e))}, e[0].Shape()...), + WriterTo: e, + }) + } + + return append(out, p.llama.Tensors(ts)...) +} + +type experts []Tensor + +func (e experts) WriteTo(w io.Writer) (int64, error) { + // TODO(mxyng): experts _should_ be numerically sorted by expert but this should check + for _, t := range e { + // the canonical merged experts tensor stacks all experts along a new, 0 axis, + // e.g. `tensor.Stack(0, e[0], e[1:]...)`, which requires allocating temporary buffers + // this accomplishes the same thing by writing each expert tensor in sequence + if _, err := t.WriteTo(w); err != nil { + return 0, err + } + } + + return 0, nil +} diff --git a/convert/convert_test.go b/convert/convert_test.go index 6aa33a49..67a2fcfe 100644 --- a/convert/convert_test.go +++ b/convert/convert_test.go @@ -1,48 +1,33 @@ -//go:build slow - package convert import ( + "crypto/sha256" + "encoding/json" + "flag" + "fmt" + "io" + "io/fs" + "log/slog" + "math" "os" "path/filepath" + "slices" "testing" "github.com/ollama/ollama/llm" + "golang.org/x/exp/maps" ) -func convertFull(t *testing.T, p string) (llm.KV, llm.Tensors) { +func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) { t.Helper() - mf, err := GetModelFormat(p) - if err != nil { - t.Fatal(err) - } - - params, err := mf.GetParams(p) - if err != nil { - t.Fatal(err) - } - - arch, err := mf.GetModelArch("", p, params) - if err != nil { - t.Fatal(err) - } - - if err := arch.LoadVocab(); err != nil { - t.Fatal(err) - } - - if err := arch.GetTensors(); err != nil { - t.Fatal(err) - } - f, err := os.CreateTemp(t.TempDir(), "f16") if err != nil { t.Fatal(err) } defer f.Close() - if err := arch.WriteGGUF(f); err != nil { + if err := Convert(fsys, f); err != nil { t.Fatal(err) } @@ -50,53 +35,91 @@ func convertFull(t *testing.T, p string) (llm.KV, llm.Tensors) { if err != nil { t.Fatal(err) } - defer r.Close() + t.Cleanup(func() { r.Close() }) - m, _, err := llm.DecodeGGML(r) + m, _, err := llm.DecodeGGML(r, math.MaxInt) if err != nil { t.Fatal(err) } - return m.KV(), m.Tensors() + if _, err := r.Seek(0, io.SeekStart); err != nil { + t.Fatal(err) + } + + return r, m.KV(), m.Tensors() +} + +func TestMain(m *testing.M) { + var level slog.Level + flag.TextVar(&level, "level", slog.LevelInfo, "log level") + flag.Parse() + slog.SetLogLoggerLevel(level) + os.Exit(m.Run()) } func TestConvertFull(t *testing.T) { - cases := []struct { - path string - arch string - tensors int - layers int - }{ - {"Meta-Llama-3-8B-Instruct", "llama", 291, 35}, - {"Mistral-7B-Instruct-v0.2", "llama", 291, 35}, - {"Mixtral-8x7B-Instruct-v0.1", "llama", 291, 35}, - {"gemma-2b-it", "gemma", 164, 20}, + cases := []string{ + "Meta-Llama-3-8B-Instruct", + "Mistral-7B-Instruct-v0.2", + "Mixtral-8x7B-Instruct-v0.1", + "gemma-2b-it", } - for _, tt := range cases { - t.Run(tt.path, func(t *testing.T) { - p := filepath.Join("testdata", tt.path) - if _, err := os.Stat(p); err != nil { + for i := range cases { + tt := cases[i] + t.Run(tt, func(t *testing.T) { + t.Parallel() + + p := filepath.Join("testdata", tt) + if testing.Short() { + t.Skip("skipping in short mode") + } else if _, err := os.Stat(p); err != nil { t.Skipf("%s not found", p) } - kv, tensors := convertFull(t, p) + f, kv, tensors := convertFull(t, os.DirFS(p)) + actual := make(map[string]string) + for k, v := range kv { + if s, ok := v.(json.Marshaler); !ok { + actual[k] = fmt.Sprintf("%v", v) + } else { + bts, err := json.Marshal(s) + if err != nil { + t.Fatal(err) + } - if kv.Architecture() != tt.arch { - t.Fatalf("expected llama, got %s", kv.Architecture()) + actual[k] = fmt.Sprintf("%x", sha256.Sum256(bts)) + } } - if kv.FileType().String() != "F16" { - t.Fatalf("expected F16, got %s", kv.FileType()) + for _, tensor := range tensors.Items { + sha256sum := sha256.New() + sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size())) + if _, err := io.Copy(sha256sum, sr); err != nil { + t.Fatal(err) + } + + actual[tensor.Name] = fmt.Sprintf("%x", sha256sum.Sum(nil)) } - if len(tensors) != tt.tensors { - t.Fatalf("expected %d tensors, got %d", tt.tensors, len(tensors)) + expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt))) + if err != nil { + t.Fatal(err) } - layers := tensors.Layers() - if len(layers) != tt.layers { - t.Fatalf("expected %d layers, got %d", tt.layers, len(layers)) + var expect map[string]string + if err := json.NewDecoder(expectFile).Decode(&expect); err != nil { + t.Fatal(err) + } + + keys := maps.Keys(expect) + slices.Sort(keys) + for _, k := range keys { + if v, ok := actual[k]; !ok { + t.Errorf("missing %s", k) + } else if v != expect[k] { + t.Errorf("unexpected %s: want %s, got %s", k, expect[k], v) + } } }) } diff --git a/convert/fs.go b/convert/fs.go new file mode 100644 index 00000000..bf6da6c2 --- /dev/null +++ b/convert/fs.go @@ -0,0 +1,58 @@ +package convert + +import ( + "archive/zip" + "errors" + "io" + "io/fs" + "os" + "path/filepath" +) + +type ZipReader struct { + r *zip.Reader + p string + + // limit is the maximum size of a file that can be read directly + // from the zip archive. Files larger than this size will be extracted + limit int64 +} + +func NewZipReader(r *zip.Reader, p string, limit int64) fs.FS { + return &ZipReader{r, p, limit} +} + +func (z *ZipReader) Open(name string) (fs.File, error) { + r, err := z.r.Open(name) + if err != nil { + return nil, err + } + defer r.Close() + + if fi, err := r.Stat(); err != nil { + return nil, err + } else if fi.Size() < z.limit { + return r, nil + } + + if !filepath.IsLocal(name) { + return nil, zip.ErrInsecurePath + } + + n := filepath.Join(z.p, name) + if _, err := os.Stat(n); errors.Is(err, os.ErrNotExist) { + w, err := os.Create(n) + if err != nil { + return nil, err + } + defer w.Close() + + if _, err := io.Copy(w, r); err != nil { + return nil, err + } + } else if err != nil { + return nil, err + } + + return os.Open(n) +} diff --git a/convert/gemma.go b/convert/gemma.go deleted file mode 100644 index d01ffedf..00000000 --- a/convert/gemma.go +++ /dev/null @@ -1,102 +0,0 @@ -package convert - -import ( - "fmt" - "io" - "log/slog" - "strings" - - "github.com/pdevine/tensor" - "github.com/pdevine/tensor/native" - - "github.com/ollama/ollama/llm" -) - -type GemmaModel struct { - ModelData -} - -func addOnes(data []float32, vectorSize int) ([]float32, error) { - n := tensor.New(tensor.WithShape(vectorSize), tensor.WithBacking(data)) - ones := tensor.Ones(tensor.Float32, vectorSize) - - n, err := n.Add(ones) - if err != nil { - return nil, err - } - - ts, err := native.SelectF32(n, 0) - if err != nil { - return nil, err - } - - var f32s []float32 - for _, t := range ts { - f32s = append(f32s, t...) - } - - return f32s, nil -} - -func (m *GemmaModel) GetTensors() error { - t, err := m.Format.GetTensors(m.Path, m.Params) - if err != nil { - return err - } - - slog.Debug(fmt.Sprintf("Total tensors: %d", len(t))) - for _, l := range t { - if strings.HasSuffix(l.Name, "norm.weight") { - wt := l.WriterTo.(safetensorWriterTo) - wt.repacker = m.Repack - l.WriterTo = wt - } - m.Tensors = append(m.Tensors, l) - } - - return nil -} - -func (m *GemmaModel) LoadVocab() error { - v, err := LoadSentencePieceTokens(m.Path, m.Params) - if err != nil { - return err - } - m.Vocab = v - return nil -} - -func (m *GemmaModel) Repack(_ string, data []float32, shape []uint64) ([]float32, error) { - return addOnes(data, int(shape[0])) -} - -func (m *GemmaModel) WriteGGUF(ws io.WriteSeeker) error { - kv := llm.KV{ - "general.architecture": "gemma", - "general.name": m.Name, - "gemma.context_length": uint32(m.Params.ContextSize), - "gemma.embedding_length": uint32(m.Params.HiddenSize), - "gemma.block_count": uint32(m.Params.HiddenLayers), - "gemma.feed_forward_length": uint32(m.Params.IntermediateSize), - "gemma.attention.head_count": uint32(m.Params.AttentionHeads), - "gemma.attention.head_count_kv": uint32(m.Params.KeyValHeads), - "gemma.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS), - "gemma.attention.key_length": uint32(m.Params.HeadDimension), - "gemma.attention.value_length": uint32(m.Params.HeadDimension), - "general.file_type": uint32(1), - "tokenizer.ggml.model": "llama", - - "tokenizer.ggml.tokens": m.Vocab.Tokens, - "tokenizer.ggml.scores": m.Vocab.Scores, - "tokenizer.ggml.token_type": m.Vocab.Types, - - "tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID), - "tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID), - "tokenizer.ggml.padding_token_id": uint32(m.Params.PaddingTokenID), - "tokenizer.ggml.unknown_token_id": uint32(3), - "tokenizer.ggml.add_bos_token": true, - "tokenizer.ggml.add_eos_token": false, - } - - return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors) -} diff --git a/convert/llama.go b/convert/llama.go deleted file mode 100644 index b4211b02..00000000 --- a/convert/llama.go +++ /dev/null @@ -1,159 +0,0 @@ -package convert - -import ( - "cmp" - "errors" - "fmt" - "io" - "os" - "path/filepath" - "regexp" - "strings" - - "github.com/pdevine/tensor" - "github.com/pdevine/tensor/native" - - "github.com/ollama/ollama/llm" -) - -type LlamaModel struct { - ModelData -} - -func (m *LlamaModel) GetTensors() error { - t, err := m.Format.GetTensors(m.Path, m.Params) - if err != nil { - return err - } - - pattern := `^blk\.[0-9]+\.attn_(?Pq|k)\.weight$` - re, err := regexp.Compile(pattern) - if err != nil { - return err - } - - for _, l := range t { - matches := re.FindAllStringSubmatch(l.Name, -1) - if len(matches) > 0 { - switch m.Format.(type) { - case *TorchFormat: - wt := l.WriterTo.(torchWriterTo) - wt.repacker = m.Repack - l.WriterTo = wt - case *SafetensorFormat: - wt := l.WriterTo.(safetensorWriterTo) - wt.repacker = m.Repack - l.WriterTo = wt - } - } - m.Tensors = append(m.Tensors, l) - } - - return nil -} - -func (m *LlamaModel) LoadVocab() (err error) { - pre, ts, merges, err := parseTokens(filepath.Join(m.Path, "tokenizer.json")) - if errors.Is(err, os.ErrNotExist) { - return nil - } else if err != nil { - return err - } - - m.Vocab = &Vocab{} - for _, t := range ts { - m.Vocab.Tokens = append(m.Vocab.Tokens, t.Content) - m.Vocab.Types = append(m.Vocab.Types, t.Type()) - } - - m.Vocab.Merges = merges - m.Params.PreTokenizer = pre - return nil -} - -func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error { - kv := llm.KV{ - "general.architecture": "llama", - "general.name": m.Name, - "llama.vocab_size": uint32(len(m.Vocab.Tokens)), - "llama.context_length": uint32(m.Params.ContextSize), - "llama.embedding_length": uint32(m.Params.HiddenSize), - "llama.block_count": uint32(m.Params.HiddenLayers), - "llama.feed_forward_length": uint32(m.Params.IntermediateSize), - "llama.rope.freq_base": float32(m.Params.RopeFrequencyBase), - "llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads), - "llama.attention.head_count": uint32(m.Params.AttentionHeads), - "llama.attention.head_count_kv": uint32(m.Params.KeyValHeads), - "llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS), - "general.file_type": uint32(1), - "tokenizer.ggml.model": "gpt2", - - "tokenizer.ggml.pre": m.Params.PreTokenizer, - "tokenizer.ggml.tokens": m.Vocab.Tokens, - "tokenizer.ggml.token_type": m.Vocab.Types, - - "tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID), - "tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID), - "tokenizer.ggml.unknown_token_id": uint32(0), - } - - if len(m.Vocab.Merges) > 0 { - kv["tokenizer.ggml.merges"] = m.Vocab.Merges - } else { - kv["tokenizer.ggml.scores"] = m.Vocab.Scores - } - - return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors) -} - -func (m *LlamaModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) { - return llamaRepack(name, m.Params, data, shape) -} - -func llamaRepack(name string, params *Params, data []float32, shape []uint64) ([]float32, error) { - var dims []int - for _, dim := range shape { - if dim != 0 { - dims = append(dims, int(dim)) - } - } - - var heads int - switch { - case strings.HasSuffix(name, "attn_q.weight"): - heads = params.AttentionHeads - case strings.HasSuffix(name, "attn_k.weight"): - heads = cmp.Or(params.KeyValHeads, params.AttentionHeads) - default: - return nil, fmt.Errorf("unknown tensor name: %s", name) - } - - n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data)) - if err := n.Reshape(append([]int{heads, 2, dims[0] / heads / 2}, dims[1:]...)...); err != nil { - return nil, err - } - - if err := n.T(0, 2, 1, 3); err != nil { - return nil, err - } - - if err := n.Reshape(dims...); err != nil { - return nil, err - } - - if err := n.Transpose(); err != nil { - return nil, err - } - - ts, err := native.SelectF32(n, 1) - if err != nil { - return nil, err - } - - var f32s []float32 - for _, t := range ts { - f32s = append(f32s, t...) - } - - return f32s, nil -} diff --git a/convert/mistral.go b/convert/mistral.go deleted file mode 100644 index 8fe066d6..00000000 --- a/convert/mistral.go +++ /dev/null @@ -1,84 +0,0 @@ -package convert - -import ( - "io" - "regexp" - - "github.com/ollama/ollama/llm" -) - -type MistralModel struct { - ModelData -} - -func (m *MistralModel) GetTensors() error { - t, err := m.Format.GetTensors(m.Path, m.Params) - if err != nil { - return err - } - - pattern := `^blk\.[0-9]+\.attn_(?Pq|k)\.weight$` - re, err := regexp.Compile(pattern) - if err != nil { - return err - } - - for _, l := range t { - matches := re.FindAllStringSubmatch(l.Name, -1) - if len(matches) > 0 { - wt := l.WriterTo.(safetensorWriterTo) - wt.repacker = m.Repack - l.WriterTo = wt - } - m.Tensors = append(m.Tensors, l) - } - - return nil -} - -func (m *MistralModel) LoadVocab() error { - v, err := LoadSentencePieceTokens(m.Path, m.Params) - if err != nil { - return err - } - m.Vocab = v - return nil -} - -func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error { - kv := llm.KV{ - "general.architecture": "llama", - "general.name": m.Name, - "llama.context_length": uint32(m.Params.ContextSize), - "llama.embedding_length": uint32(m.Params.HiddenSize), - "llama.block_count": uint32(m.Params.HiddenLayers), - "llama.feed_forward_length": uint32(m.Params.IntermediateSize), - "llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads), - "llama.attention.head_count": uint32(m.Params.AttentionHeads), - "llama.attention.head_count_kv": uint32(m.Params.KeyValHeads), - "llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS), - "general.file_type": uint32(1), - "tokenizer.ggml.model": "llama", - - "tokenizer.ggml.tokens": m.Vocab.Tokens, - "tokenizer.ggml.scores": m.Vocab.Scores, - "tokenizer.ggml.token_type": m.Vocab.Types, - - "tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID), - "tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID), - "tokenizer.ggml.add_bos_token": true, - "tokenizer.ggml.add_eos_token": false, - "tokenizer.ggml.unknown_token_id": uint32(0), - } - - if m.Params.HeadDimension > 0 { - kv["llama.attention.key_length"] = uint32(m.Params.HeadDimension) - kv["llama.attention.value_length"] = uint32(m.Params.HeadDimension) - } - - return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors) -} - -func (m *MistralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) { - return llamaRepack(name, m.Params, data, shape) -} diff --git a/convert/mixtral.go b/convert/mixtral.go deleted file mode 100644 index baea68cd..00000000 --- a/convert/mixtral.go +++ /dev/null @@ -1,87 +0,0 @@ -package convert - -import ( - "io" - "regexp" - - "github.com/ollama/ollama/llm" -) - -type MixtralModel struct { - ModelData -} - -func (m *MixtralModel) GetTensors() error { - t, err := m.Format.GetTensors(m.Path, m.Params) - if err != nil { - return err - } - - pattern := `^blk\.[0-9]+\.attn_(?Pq|k)\.weight$` - re, err := regexp.Compile(pattern) - if err != nil { - return err - } - - for _, l := range t { - matches := re.FindAllStringSubmatch(l.Name, -1) - if len(matches) > 0 { - wt := l.WriterTo.(safetensorWriterTo) - wt.repacker = m.Repack - l.WriterTo = wt - } - m.Tensors = append(m.Tensors, l) - } - - return nil -} - -func (m *MixtralModel) LoadVocab() error { - v, err := LoadSentencePieceTokens(m.Path, m.Params) - if err != nil { - return err - } - m.Vocab = v - return nil -} - -func (m *MixtralModel) WriteGGUF(ws io.WriteSeeker) error { - kv := llm.KV{ - "general.architecture": "llama", - "general.name": m.Name, - "llama.block_count": uint32(m.Params.HiddenLayers), - "llama.context_length": uint32(m.Params.ContextSize), - "llama.embedding_length": uint32(m.Params.HiddenSize), - "llama.feed_forward_length": uint32(m.Params.IntermediateSize), - "llama.attention.head_count": uint32(m.Params.AttentionHeads), - "llama.attention.head_count_kv": uint32(m.Params.KeyValHeads), - - "llama.rope.freq_base": float32(m.Params.RopeFrequencyBase), - "llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS), - - "llama.expert_count": uint32(m.Params.Experts), - "llama.expert_used_count": uint32(m.Params.ExpertsUsed), - - "llama.vocab_size": uint32(len(m.Vocab.Tokens)), - "llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads), - - "general.file_type": uint32(1), - "tokenizer.ggml.model": "llama", - - "tokenizer.ggml.tokens": m.Vocab.Tokens, - "tokenizer.ggml.scores": m.Vocab.Scores, - "tokenizer.ggml.token_type": m.Vocab.Types, - - "tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID), - "tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID), - "tokenizer.ggml.unknown_token_id": uint32(0), - "tokenizer.ggml.add_bos_token": true, - "tokenizer.ggml.add_eos_token": false, - } - - return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors) -} - -func (m *MixtralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) { - return llamaRepack(name, m.Params, data, shape) -} diff --git a/convert/reader.go b/convert/reader.go new file mode 100644 index 00000000..ce95208e --- /dev/null +++ b/convert/reader.go @@ -0,0 +1,82 @@ +package convert + +import ( + "errors" + "io" + "io/fs" + "strings" +) + +type Tensor interface { + Name() string + Shape() []uint64 + Kind() uint32 + SetRepacker(repacker) + WriteTo(io.Writer) (int64, error) +} + +type tensorBase struct { + name string + shape []uint64 + repacker +} + +func (t tensorBase) Name() string { + return t.name +} + +func (t tensorBase) Shape() []uint64 { + return t.shape +} + +const ( + tensorKindF32 uint32 = iota + tensorKindF16 +) + +func (t tensorBase) Kind() uint32 { + if strings.HasSuffix(t.name, ".block_sparse_moe.gate.weight") { + return 0 + } + + switch len(t.shape) { + case 0: + panic("invalid tensor shape") + case 1: + return tensorKindF32 + default: + return tensorKindF16 + } +} + +func (t *tensorBase) SetRepacker(fn repacker) { + t.repacker = fn +} + +type repacker func(string, []float32, []uint64) ([]float32, error) + +func parseTensors(fsys fs.FS) ([]Tensor, error) { + patterns := []struct { + Pattern string + Func func(fs.FS, ...string) ([]Tensor, error) + }{ + {"model-*-of-*.safetensors", parseSafetensors}, + {"model.safetensors", parseSafetensors}, + {"pytorch_model-*-of-*.bin", parseTorch}, + {"pytorch_model.bin", parseTorch}, + {"consolidated.*.pth", parseTorch}, + } + + for _, pattern := range patterns { + matches, err := fs.Glob(fsys, pattern.Pattern) + if err != nil { + return nil, err + } + + if len(matches) > 0 { + return pattern.Func(fsys, matches...) + } + } + + return nil, errors.New("unknown tensor format") +} diff --git a/convert/reader_safetensors.go b/convert/reader_safetensors.go new file mode 100644 index 00000000..1c169504 --- /dev/null +++ b/convert/reader_safetensors.go @@ -0,0 +1,149 @@ +package convert + +import ( + "bytes" + "encoding/binary" + "encoding/json" + "fmt" + "io" + "io/fs" + "slices" + + "github.com/d4l3k/go-bfloat16" + "github.com/x448/float16" + "golang.org/x/exp/maps" +) + +type safetensorMetadata struct { + Type string `json:"dtype"` + Shape []uint64 `json:"shape"` + Offsets []int64 `json:"data_offsets"` +} + +func parseSafetensors(fsys fs.FS, ps ...string) ([]Tensor, error) { + var ts []Tensor + for _, p := range ps { + f, err := fsys.Open(p) + if err != nil { + return nil, err + } + defer f.Close() + + var n int64 + if err := binary.Read(f, binary.LittleEndian, &n); err != nil { + return nil, err + } + + b := bytes.NewBuffer(make([]byte, 0, n)) + if _, err = io.CopyN(b, f, n); err != nil { + return nil, err + } + + var headers map[string]safetensorMetadata + if err := json.NewDecoder(b).Decode(&headers); err != nil { + return nil, err + } + + keys := maps.Keys(headers) + slices.Sort(keys) + + for _, key := range keys { + if value := headers[key]; value.Type != "" { + ts = append(ts, safetensor{ + fs: fsys, + path: p, + dtype: value.Type, + offset: safetensorsPad(n, value.Offsets[0]), + size: safetensorsPad(n, value.Offsets[1]) - safetensorsPad(n, value.Offsets[0]), + tensorBase: &tensorBase{ + name: key, + shape: value.Shape, + }, + }) + } + } + } + + return ts, nil +} + +// safetensorsPad returns the padded size of the safetensors file given a length n and offset s +func safetensorsPad(n, offset int64) int64 { + return 8 + n + offset +} + +type safetensor struct { + fs fs.FS + path string + dtype string + offset int64 + size int64 + *tensorBase +} + +func (st safetensor) WriteTo(w io.Writer) (int64, error) { + f, err := st.fs.Open(st.path) + if err != nil { + return 0, err + } + defer f.Close() + + if seeker, ok := f.(io.Seeker); ok { + if _, err := seeker.Seek(st.offset, io.SeekStart); err != nil { + return 0, err + } + } else { + if _, err := io.CopyN(io.Discard, f, st.offset); err != nil { + return 0, err + } + } + + var f32s []float32 + switch st.dtype { + case "F32": + f32s = make([]float32, st.size/4) + if err = binary.Read(f, binary.LittleEndian, f32s); err != nil { + return 0, err + } + case "F16": + u16s := make([]uint16, st.size/2) + if err = binary.Read(f, binary.LittleEndian, u16s); err != nil { + return 0, err + } + + for _, b := range u16s { + f32s = append(f32s, float16.Frombits(b).Float32()) + } + + case "BF16": + u8s := make([]uint8, st.size) + if err = binary.Read(f, binary.LittleEndian, u8s); err != nil { + return 0, err + } + + f32s = bfloat16.DecodeFloat32(u8s) + default: + return 0, fmt.Errorf("unknown data type: %s", st.dtype) + } + + if st.repacker != nil { + f32s, err = st.repacker(st.Name(), f32s, st.Shape()) + if err != nil { + return 0, err + } + } + + switch st.Kind() { + case tensorKindF32: + return 0, binary.Write(w, binary.LittleEndian, f32s) + case tensorKindF16: + f16s := make([]uint16, len(f32s)) + for i := range f32s { + f16s[i] = float16.Fromfloat32(f32s[i]).Bits() + } + + return 0, binary.Write(w, binary.LittleEndian, f16s) + default: + return 0, fmt.Errorf("unknown storage type: %d", st.Kind()) + } +} diff --git a/convert/reader_torch.go b/convert/reader_torch.go new file mode 100644 index 00000000..531996bf --- /dev/null +++ b/convert/reader_torch.go @@ -0,0 +1,47 @@ +package convert + +import ( + "io" + "io/fs" + + "github.com/nlpodyssey/gopickle/pytorch" + "github.com/nlpodyssey/gopickle/types" +) + +func parseTorch(fsys fs.FS, ps ...string) ([]Tensor, error) { + var ts []Tensor + for _, p := range ps { + pt, err := pytorch.Load(p) + if err != nil { + return nil, err + } + + for _, k := range pt.(*types.Dict).Keys() { + t := pt.(*types.Dict).MustGet(k) + + var shape []uint64 + for dim := range t.(*pytorch.Tensor).Size { + shape = append(shape, uint64(dim)) + } + + ts = append(ts, torch{ + storage: t.(*pytorch.Tensor).Source, + tensorBase: &tensorBase{ + name: k.(string), + shape: shape, + }, + }) + } + } + + return ts, nil +} + +type torch struct { + storage pytorch.StorageInterface + *tensorBase +} + +func (pt torch) WriteTo(w io.Writer) (int64, error) { + return 0, nil +} diff --git a/convert/safetensors.go b/convert/safetensors.go deleted file mode 100644 index f45687f1..00000000 --- a/convert/safetensors.go +++ /dev/null @@ -1,309 +0,0 @@ -package convert - -import ( - "bytes" - "encoding/binary" - "encoding/json" - "fmt" - "io" - "os" - "path/filepath" - "regexp" - "slices" - "strings" - - "github.com/d4l3k/go-bfloat16" - "github.com/x448/float16" - - "github.com/ollama/ollama/llm" -) - -type safetensorWriterTo struct { - t *llm.Tensor - - params *Params - bo ByteOrder - - filename string - dtype string - - offset, size int64 - repacker func(string, []float32, []uint64) ([]float32, error) -} - -type safetensorMetadata struct { - Type string `json:"dtype"` - Shape []uint64 `json:"shape"` - Offsets []int64 `json:"data_offsets"` -} - -type SafetensorFormat struct{} - -func (m *SafetensorFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) { - var tensors []llm.Tensor - matches, err := filepath.Glob(filepath.Join(dirpath, "*.safetensors")) - if err != nil { - return nil, err - } - - var offset uint64 - for _, f := range matches { - var t []llm.Tensor - var err error - t, offset, err = m.readTensors(f, offset, params) - if err != nil { - return nil, err - } - - tensors = append(tensors, t...) - } - return tensors, nil -} - -func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params) ([]llm.Tensor, uint64, error) { - f, err := os.Open(fn) - if err != nil { - return nil, 0, err - } - defer f.Close() - - var n int64 - if err := binary.Read(f, binary.LittleEndian, &n); err != nil { - return nil, 0, err - } - - b := bytes.NewBuffer(make([]byte, 0, n)) - if _, err = io.CopyN(b, f, n); err != nil { - return nil, 0, err - } - - var headers map[string]safetensorMetadata - if err := json.NewDecoder(b).Decode(&headers); err != nil { - return nil, 0, err - } - - var keys []string - for key := range headers { - if !strings.HasSuffix(key, "self_attn.rotary_embd.inv_freq") { - keys = append(keys, key) - } - } - - slices.Sort(keys) - - var tensors []llm.Tensor - for _, key := range keys { - value := headers[key] - - var kind uint32 - switch len(value.Shape) { - case 0: - // valuedata - continue - case 2: - kind = 1 - } - - name, err := m.GetLayerName(key) - if err != nil { - return nil, 0, err - } - - shape := make([]uint64, len(value.Shape)) - copy(shape, value.Shape) - - pad := func(s int64) int64 { - return 8 + n + s - } - - t := llm.Tensor{ - Name: name, - Kind: kind, - Offset: offset, - Shape: shape, - } - - t.WriterTo = safetensorWriterTo{ - t: &t, - params: params, - bo: params.ByteOrder, - filename: fn, - dtype: value.Type, - offset: pad(value.Offsets[0]), - size: pad(value.Offsets[1]) - pad(value.Offsets[0]), - } - - offset += t.Size() - tensors = append(tensors, t) - } - - return tensors, offset, nil -} - -func (m *SafetensorFormat) GetParams(dirpath string) (*Params, error) { - f, err := os.Open(filepath.Join(dirpath, "config.json")) - if err != nil { - return nil, err - } - defer f.Close() - - var params Params - - if err := json.NewDecoder(f).Decode(¶ms); err != nil { - return nil, err - } - - params.ByteOrder = binary.LittleEndian - return ¶ms, nil -} - -func (m *SafetensorFormat) GetLayerName(n string) (string, error) { - directMap := map[string]string{ - "model.embed_tokens.weight": "token_embd.weight", - "lm_head.weight": "output.weight", - "model.norm.weight": "output_norm.weight", - } - - tMap := map[string]string{ - "model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight", - "model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight", - "model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight", - "model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight", - "model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight", - "model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight", - "model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight", - "model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight", - "model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight", - "model.layers.(\\d+).block_sparse_moe.gate.weight": "blk.$1.ffn_gate_inp.weight", - "model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w1.weight": "blk.$1.ffn_gate.$2.weight", - "model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w2.weight": "blk.$1.ffn_down.$2.weight", - "model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w3.weight": "blk.$1.ffn_up.$2.weight", - } - - v, ok := directMap[n] - if ok { - return v, nil - } - - // quick hack to rename the layers to gguf format - for k, v := range tMap { - re := regexp.MustCompile(k) - newName := re.ReplaceAllString(n, v) - if newName != n { - return newName, nil - } - } - - return "", fmt.Errorf("couldn't find a layer name for '%s'", n) -} - -func (r safetensorWriterTo) WriteTo(w io.Writer) (n int64, err error) { - f, err := os.Open(r.filename) - if err != nil { - return 0, err - } - defer f.Close() - - if _, err = f.Seek(r.offset, io.SeekStart); err != nil { - return 0, err - } - - var f32s []float32 - switch r.dtype { - case "F32": - f32s = make([]float32, r.size/4) - if err = binary.Read(f, r.bo, f32s); err != nil { - return 0, err - } - case "F16": - u16s := make([]uint16, r.size/2) - if err = binary.Read(f, r.bo, u16s); err != nil { - return 0, err - } - - for _, b := range u16s { - f32s = append(f32s, float16.Frombits(b).Float32()) - } - - case "BF16": - u8s := make([]uint8, r.size) - if err = binary.Read(f, r.bo, u8s); err != nil { - return 0, err - } - - f32s = bfloat16.DecodeFloat32(u8s) - default: - return 0, fmt.Errorf("unknown data type: %s", r.dtype) - } - - if r.repacker != nil { - f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape) - if err != nil { - return 0, err - } - } - - switch r.t.Kind { - case 0: - return 0, binary.Write(w, r.bo, f32s) - case 1: - f16s := make([]uint16, len(f32s)) - for i := range f32s { - f16s[i] = float16.Fromfloat32(f32s[i]).Bits() - } - - return 0, binary.Write(w, r.bo, f16s) - default: - return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind) - } -} - -func (m *SafetensorFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) { - switch len(params.Architectures) { - case 0: - return nil, fmt.Errorf("No architecture specified to convert") - case 1: - switch params.Architectures[0] { - case "LlamaForCausalLM": - return &LlamaModel{ - ModelData{ - Name: name, - Path: dirPath, - Params: params, - Format: m, - }, - }, nil - case "MistralForCausalLM": - return &MistralModel{ - ModelData{ - Name: name, - Path: dirPath, - Params: params, - Format: m, - }, - }, nil - case "MixtralForCausalLM": - return &MixtralModel{ - ModelData{ - Name: name, - Path: dirPath, - Params: params, - Format: m, - }, - }, nil - case "GemmaForCausalLM": - return &GemmaModel{ - ModelData{ - Name: name, - Path: dirPath, - Params: params, - Format: m, - }, - }, nil - default: - return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0]) - } - } - - return nil, fmt.Errorf("Unknown error") -} diff --git a/convert/testdata/Meta-Llama-3-8B-Instruct.json b/convert/testdata/Meta-Llama-3-8B-Instruct.json new file mode 100644 index 00000000..808826bb --- /dev/null +++ b/convert/testdata/Meta-Llama-3-8B-Instruct.json @@ -0,0 +1,313 @@ +{ + "general.architecture": "llama", + "general.file_type": "1", + "general.quantization_version": "2", + "llama.block_count": "32", + "llama.context_length": "8192", + "llama.embedding_length": "4096", + "llama.feed_forward_length": "14336", + "llama.rope.dimension_count": "128", + "llama.rope.freq_base": "500000", + "llama.vocab_size": "128256", + "llama.attention.head_count": "32", + "llama.attention.head_count_kv": "8", + "llama.attention.layer_norm_rms_epsilon": "1e-05", + "tokenizer.ggml.model": "gpt2", + "tokenizer.ggml.pre": "llama-bpe", + "tokenizer.ggml.bos_token_id": "128000", + "tokenizer.ggml.eos_token_id": "128009", + "tokenizer.ggml.merges": "d0cbac1fcc9dcf03724b8db5c9bfb593ae1cf68fb9bc72eb1d15274dcbbf618b", + "tokenizer.ggml.token_type": "d70a88809fd7da6f1f028622685cd64268a7a922c5d343c96f25b66327358978", + "tokenizer.ggml.tokens": "765b529dbcbc42dd202ce657341c63807b51f3b07e09898f6aa6196326865d5a", + "token_embd.weight": "b53102a11d9064bbd404833e3464b1b13e08ce73300b442312cccde2f19b2698", + "blk.0.attn_norm.weight": "7318df3cca9e8d153ff0a503026a1265e63d20b2a8c1dd7a2769585082b5d1ee", + "blk.0.ffn_down.weight": "b950806a1fc722c9fad7fd0b20c3c0a7fb50f14395e1e7663a590bfd62e20900", + "blk.0.ffn_gate.weight": "e73e580af6d4f08e060a74a3c25efdf5d3bed99e183d95a5a85ae859014839fd", + "blk.0.ffn_up.weight": "c8158af679ef99746da1befb67eebb19489e0bbe6ce7d97e13e348508244e516", + "blk.0.ffn_norm.weight": "7ec69c3c31e95e49a3359003b0033f6b9e85561a3e3fd83e7476661ecdd756bb", + "blk.0.attn_k.weight": "2732303257bac969b4964e0e32ec08b5a7f5c031bb02bf6ac4467b3ea0ebcf1e", + "blk.0.attn_output.weight": "ecda1d43b4ccc91cd5b366d7e7a275353990ac78561a07c83d9c77031aba12dc", + "blk.0.attn_q.weight": "569b1f5faf92b6f00910cf7effb2d5862f91038ce5c3b0019fc10e5d79fbd5e1", + "blk.0.attn_v.weight": "aa8416c5ef7e32fb54a1f20d6ac651656845d4af240564b397c39bd83e06e3b8", + "blk.1.attn_norm.weight": "03327e02862908c2a44b2f52decdb924bf4201f400b46f8037a9cb2e1d7a61ff", + "blk.1.ffn_down.weight": "5a83a87603f38c99f8e1e370a2d5f967bb45ac51d881a609304a7811027321e0", + "blk.1.ffn_gate.weight": "31da0572c79e655186c721c231376f85e56cdcc6257c28d08c8c5b40d5c22b40", + "blk.1.ffn_up.weight": "e0c811d64ca155c8de10a868e72015d43888834804614ee1aa2953129ffbc90f", + "blk.1.ffn_norm.weight": "5861f313d6137d6f0f904d423df47fffc6069e224ff746e1b637ac9c7f0af862", + "blk.1.attn_k.weight": "5fbbec0acca6457b9416ebdcd90e526885d0224537b7628f6be376a7f275313d", + "blk.1.attn_output.weight": "b237c9763fa3f75166a6f70b70f1566e77d0d89dfa164ed1b3137393e90575c3", + "blk.1.attn_q.weight": "c0a9cf4a98b4882b16f3eb2b49d933793dcc5357abb246fd3fe3134ed2b12e1c", + "blk.1.attn_v.weight": "96867111727200cac1af7865189dd41fd62b47584e5e5f33a91f1d34509cbd40", + "blk.2.attn_norm.weight": "f392f8a88ee3a95b1cc19c40dd4ef66317037b0faaa1800f610779e129ee0539", + "blk.2.ffn_down.weight": "73823eef46632aedcc8c1cb08a736b6aa97ca97842cd1fdfc5567d8dec459662", + "blk.2.ffn_gate.weight": "f4909ae19fc3848b00bb8b9050122e74f8e903b89e22937036f4cc9fea20a718", + "blk.2.ffn_up.weight": "16f4904a3d814ea68f00519724fc4943e48444a84c786bda39aa5efc298a7d84", + "blk.2.ffn_norm.weight": "e3ccdf56e75cb969f6f69c39caf6daf7c4e70e89e25df0f4d2e4bc60e159aafe", + "blk.2.attn_k.weight": "c3beb1e0a11bcf007ef0f0d8f6bdd3082d8b29090cd29597846b5d51e308a8e5", + "blk.2.attn_output.weight": "bb9f66c32cff51154fea92933c2cd62549236f8cb1a767f9ef28d3f99809b343", + "blk.2.attn_q.weight": "8eba394132eef2a05c5a92d62d2376000f7948448d7a2dc74e6b608203add20d", + "blk.2.attn_v.weight": "88f61f77c53567c617db3eef8f30621109a750e679f6784f7911739bd42c2f02", + "blk.3.attn_norm.weight": "7b996675b7ca75fa24107b3ebe0788653ede0f49ac83b8659d71ff54d591f81a", + "blk.3.ffn_down.weight": "2cb332bc05e4821962fdc9dcbcc7cc12630f32117711b687d18fb53c0bc4fbf4", + "blk.3.ffn_gate.weight": "340b387c7f208c8f0a6db904ef8d87c1e84b7d6ad57177abd32d86c8d18b760f", + "blk.3.ffn_up.weight": "07484433f8a7ee061c55aa0de2ecc009f769b0617c9c0ec096e9bb2946df9f0e", + "blk.3.ffn_norm.weight": "4f1a4ade36b393af341240bc894a2aab09cff7e4d56dc4658445deb107f9371b", + "blk.3.attn_k.weight": "483dcd96acb4528df84b9842970994630dbd82b8715ace394aa8b39fcf8d6291", + "blk.3.attn_output.weight": "beaff0810687923585642ee11d929cbf3b43dc6f87f30ddb552c222ab57bdbb3", + "blk.3.attn_q.weight": "0739355002f6fce520863add697e0ff25fc88215322dc3f993be7bb68dcce7e8", + "blk.3.attn_v.weight": "c216d17b6d90ee3e07f82598b8161fae34de2f392dbb0f745b682b578c324767", + "blk.4.attn_norm.weight": "91ab405bc4ba15bf63af233f266aa43aaab43789a9e6596e14a357c2ac7df217", + "blk.4.ffn_down.weight": "620f34ee75cdc73aecb8949af5fbb0d2437fd81422b6d8eb7acfc52addb9fc68", + "blk.4.ffn_gate.weight": "f6feec7bc9acadf35ec22532f8998d8e50f31afedabb19263590dcf8b9a92eee", + "blk.4.ffn_up.weight": "4a72af7cd28fd07b038f6cc4406678d120517280236ea85d9e76eff40ab2cc22", + "blk.4.ffn_norm.weight": "1805b37b44d5d682bdbd2fadeafb763ee001617d7870848cc487079ee34b21f9", + "blk.4.attn_k.weight": "a1e4f9d97cdf4c1b0d177cf00c4e32d1be30c1984a239b3c9bd73f8848888853", + "blk.4.attn_output.weight": "a1547e2497c423b0aff0eee71d9300d6fdf4e4986679418b6e637b69a9a6720b", + "blk.4.attn_q.weight": "0677483a9264ea6803d03d304d87a54632242cb516e8b76b6e3e8284c2f4de04", + "blk.4.attn_v.weight": "02691ba3af344fcc1969428ab0df811ac94aaa2fd91b0dc4ec1ac0a58806980d", + "blk.5.attn_norm.weight": "ba9c028335e5c895b87a5bd1448ca429248f9746ed97bdcb8679923206117156", + "blk.5.ffn_down.weight": "ccfdc9006acad1940a6bc05042a3947f1066acd671e0bb53b7684e9eea9ef5c9", + "blk.5.ffn_gate.weight": "623157679f1e742ccc3807c0b0153ddc8450104de75ec62f1370ec3807c09cf4", + "blk.5.ffn_up.weight": "05748804c65091f963729b58b085f58351891cac8a2861f5eae26b06aa60b2a0", + "blk.5.ffn_norm.weight": "84bae55af2efc8b8429f09056c8c04990c466dae31cb3f9356038b8957f1b406", + "blk.5.attn_k.weight": "8c766180c726b037d587fc52371de6e3307140c52409011609d1225624b6a3eb", + "blk.5.attn_output.weight": "490b582b3b1dc151ae55aee8b6743dad6c01fb49e43afefb6e68394b74be3d73", + "blk.5.attn_q.weight": "6f7b8ca4d9025ec836a44bbcca46be30c66b471a9fb62943ddff8288b3731409", + "blk.5.attn_v.weight": "9f70df3ba00c9e723214b3da83ff435a2163fff5915f75515c9664c05c866c27", + "blk.6.attn_norm.weight": "1a4a66613a682df6f061fc7c4d986f9f7e9175b62f0c42fc1ef31db536bd5942", + "blk.6.ffn_down.weight": "c56f25e4e49b443dbc82d88311ee63bc1f5002cc67e52f4787fd5f003aedeac1", + "blk.6.ffn_gate.weight": "31a5cf1aa9b831a81588d508550f51fc425f9517c43254d4ef7096d38029cf04", + "blk.6.ffn_up.weight": "ce135f3a1163e0c9297a615bdbe68a67ead21edce8debbfa9f6e15e6af8d4c94", + "blk.6.ffn_norm.weight": "4e328ce0648c94e732bc40501858ef6262ad1161e2e407b0cdcf4813fa9d45d8", + "blk.6.attn_k.weight": "1eb1c4c9f9c4c7ff7f5429075e0dc6a7782bed55109fa88df209a817dd8ef960", + "blk.6.attn_output.weight": "3d32986b56873b88655ee1edabdd413fdd9ab18b82108c9ce90bdbc2d3a6f3a3", + "blk.6.attn_q.weight": "8432f583b3a2809c99c393f9beb077cb0534dd5d247c17108f2986cadc6651f6", + "blk.6.attn_v.weight": "5045381513815bb91839dbac8335ffe49bbc7b0008369de7ea97eb676c5e2b36", + "blk.7.attn_norm.weight": "3dabd003638ec2499bfc8a48c49eef34276caab4fe76894eb963207848c2fdaf", + "blk.7.ffn_down.weight": "194fae858608bdcffd235be59ab119d0b91c8549f864ea06dae69249e099935f", + "blk.7.ffn_gate.weight": "00b24c29c30246892bce0791be804a89701d4c1332777e0bcdad5d9d5666604f", + "blk.7.ffn_up.weight": "44d7082a5280080c90cef9e19d410391de34f212ca0736377769b8ddd0c82d5e", + "blk.7.ffn_norm.weight": "21fe8a7fd6911c64e0d15a788b3b4cb6d71dd6ec51de65f760ee89afbb6ae53e", + "blk.7.attn_k.weight": "57a149eec5f6744a9526cd3925ac073f9d12db0fbcb5afe042ef4dc846458c44", + "blk.7.attn_output.weight": "0e9c28a3e81a2880251ce5eed77bcb8be8aaa1a51c9cb6de820b47ed83849fc2", + "blk.7.attn_q.weight": "15ee75263ee4e2a43eb322bc159ae004bb7d77e3a7e63ee4ddab700430693fff", + "blk.7.attn_v.weight": "440aa970bba4bff429fd7b7b1de21f2ad14fb2952b776cfa4acee68d7c6e9b8f", + "blk.8.attn_norm.weight": "af5b44825633c42c1ae964c82bb2be6a242d3a751f0a91f1bae4f593e8f5b6ec", + "blk.8.ffn_down.weight": "b11c14c76adca94fa200496dd2c10743becb23aab6642443ef1ae6d8710edbc1", + "blk.8.ffn_gate.weight": "7bb03d3325bf8637ae2fa1296b0651356515578d46a7c5ca65c7a923d7de27bc", + "blk.8.ffn_up.weight": "b956ef0a0669b5a9c9bf3a8da2d1c24f52d331cfb7354f6d7c51bd65be355e30", + "blk.8.ffn_norm.weight": "c78c3d748302edfef76f71ea5cb2055c94352122eee8b9b1173779a1814d224e", + "blk.8.attn_k.weight": "c0fba6a596ed9c1c32a7055c31a935a8b31e42b77282ee47c1f03ee3bde736b5", + "blk.8.attn_output.weight": "83cf9947080c5d8d571f04a842bc3dcfe7bbb0195fb25b346e22635e8649f2d4", + "blk.8.attn_q.weight": "47409350a576b333d97b7c877d69f47f46df504f3765102dfc0be9e521c7ecd6", + "blk.8.attn_v.weight": "1999dff91404fdcf1ecb34d9eaaaa9244ec7658a74dec8feb7cfd1fddba0347e", + "blk.9.attn_norm.weight": "1e6e29d5c3889ab4e1b0a5b9998cba60179b0f1fca133515df49cbc19d092593", + "blk.9.ffn_down.weight": "acb898a6490adff592e10b4c62d70edc5941661ee6da44658500e9205357c8e9", + "blk.9.ffn_gate.weight": "4cff63013593aadc3ffbaaa6ed70ffdba1224cd43c3644bf6f4162b5ac1ab542", + "blk.9.ffn_up.weight": "f985b5a2d6cf4fe32c7256301c3c89b8ad22b59e516342c52da42d8110766a4e", + "blk.9.ffn_norm.weight": "0d659c538bc6b21ed0018f107ab674a7424a00a42946c80e07208b479b21918f", + "blk.9.attn_k.weight": "f67611d888780d1b38c1c146b361c65310c8183bdf64fd73e2259985c6e8517f", + "blk.9.attn_output.weight": "f12ca1fa62a02ddc3f77f798bfb5707e0c50bf18ee0eaa67025521a98355f26b", + "blk.9.attn_q.weight": "3865185f4361a645b086ad47b72904c095313fb1c624e511647bf1a7dfc1c476", + "blk.9.attn_v.weight": "92125bbfed63544ab56052bd1e4aa453bbf34c795249ee54cde54907c8c6d1d3", + "blk.10.attn_norm.weight": "5d6bfbe545bcc2fcb2fc75c68f64b1f4c918badaf53e0156fe2d88aa977b2f94", + "blk.10.ffn_down.weight": "1dd9da8b0d2696ab5531fbca8a29c7d67567620a9d3e5fc2a19ec5d7e4c6cc8a", + "blk.10.ffn_gate.weight": "6e55e7f014edaebda0ac6819a426221d3b025c27312a2e18cc5806f31e3db226", + "blk.10.ffn_up.weight": "d80dde54af5db51241345ee8d64c1972608644f4deeac1e8195dc423bf27474a", + "blk.10.ffn_norm.weight": "f6ca65951d58ae3379eee8247bec34ebd0db05674cc9295593573841b8a55df3", + "blk.10.attn_k.weight": "b58e350bd6b49aba0fba4e4dd6865de3a2a0651ab865dbf2419b627b53ffc187", + "blk.10.attn_output.weight": "6b26a986e12fe66ec286a21d7d5af5eaa1bfe6f2bf502165d270e4497235a54a", + "blk.10.attn_q.weight": "3440e0e5b7e0d1e426424ae5a33f4e057be623249e9035ea12e57dbe5d3893c4", + "blk.10.attn_v.weight": "ebfadcfe14bcd6dee933053df0a67e12e7a196d5cc45728c1ffb2a2daedd5ca2", + "blk.11.attn_norm.weight": "3ed057b9576cd2de84507ef64c7646dc478c651efca4c2024cbe91a4f3fbf0bc", + "blk.11.ffn_down.weight": "8ff1c2487d22f5c499761e4eb721418f141f960160d0bab779595a34e4d68898", + "blk.11.ffn_gate.weight": "9c74e4507c7e45bf39b7cc7402198cd1dd77e3fff8c625b0413acaeb16efeb9f", + "blk.11.ffn_up.weight": "4367158007161d29939e00a322bb6776016e43f648a94f9b08a96a477aae75be", + "blk.11.ffn_norm.weight": "1cc0288c1491072121f4c9a0af20be0e13af49895696a3320e4fcac608768de3", + "blk.11.attn_k.weight": "066f5b3c144fce1366835e1ebf376f768b333b8ae29f5b478c42d1d0c809c855", + "blk.11.attn_output.weight": "e0d9f3d3f2c54aed59c02713ea4fb562799ddbacbe67ca3998dfc887bc44e47b", + "blk.11.attn_q.weight": "28d3ecc8a88cb3815e89a7f7a7d043da7a71f702b337a126e4d3a2ac1cd6370f", + "blk.11.attn_v.weight": "7c5cdef10ee73bca0a3b9f6ece5f0a0155664e0ce3d8de90ccdccfab5545e5e7", + "blk.12.attn_norm.weight": "973b133301a1af760cd7b3a7955371ea0a750808b442deb6adaf7b98482bd0c6", + "blk.12.ffn_down.weight": "d6c87b4b4ca03f75546ddd6a9e7fca720585a309188723c1ace8122438d4b200", + "blk.12.ffn_gate.weight": "2189a6e0cab1540bd05d6089b922aa8fd694be51255654933c165f302a0c955f", + "blk.12.ffn_up.weight": "5affbec19b58d092b9305721e3552481fe2eff51269ea3ed91cda3b9ef84d4df", + "blk.12.ffn_norm.weight": "f650fd42a34e950f758b4a130e7b8b1a712b1dcbede0291bb8edde47aaed0ef6", + "blk.12.attn_k.weight": "59b1e86f10450a7cc188beefc0856d2dcf44e8d7fdd9cd8859c30ec1ebaf24b6", + "blk.12.attn_output.weight": "446b0d36b2f66bd72a2323f4f4e9d85a0f621e9a58872e89a27248d6b1123238", + "blk.12.attn_q.weight": "3ed6bfd39f040301ed99fad882d3e569769d594259f9948445bef0e44ec881fb", + "blk.12.attn_v.weight": "e73652cd5d0029b1931be3ba9d82508f6696dce5a29d085476a54fb7a2ddbabc", + "blk.13.attn_norm.weight": "491b85278c0bd67bd31b9b8a9720902c244bd067e53a4a03641b7c0994782e82", + "blk.13.ffn_down.weight": "ad71cc248a85e9ced49307a24a9bfae01d387e979a7689c82ff59998e09741f3", + "blk.13.ffn_gate.weight": "0a55984d53971fab97575ee0ef5882013be7fdecfa76e3fbebb5dc85a07a14d4", + "blk.13.ffn_up.weight": "378b697b35e2e53c0de98e8e29b73d42ae3ec112ec16129aa5997a9e2f3b5943", + "blk.13.ffn_norm.weight": "f8aff2f69ab286210fad45a62b03f8d10b38f96a420d7baadf6b95d7b0b0bcd2", + "blk.13.attn_k.weight": "25ceb841afb1034831bea7f4d6a6c578def2ce4d4c412c780ef147dc9a598360", + "blk.13.attn_output.weight": "a242b322889c6bdaa14b67a7bab593db39df8eea3721638ef639abbb74d482e3", + "blk.13.attn_q.weight": "d80be9945a369439e835c55cfb0e97828b8a66bb7ced534d9059c92487bf20a9", + "blk.13.attn_v.weight": "ac33274cf9b67979d9ecdc967a55175afe0c9c4aeeff6391433cd9840c818706", + "blk.14.attn_norm.weight": "12a1e1091de5b2da12c9e7c0b1c8e6f09ce2a749733cf7d5240445b8e21cd093", + "blk.14.ffn_down.weight": "cfd41965c88266e32bc2dcdadda512499c35519e8686fefb9a7f249ab2291eb5", + "blk.14.ffn_gate.weight": "8dcfe774f07a095c7c6cf0a901c9df70d938bad7b5ba347fbc8f694e7603c0d1", + "blk.14.ffn_up.weight": "c7995577fe4a72ea0fb17c4a7b6b87b959072bbfdd5edacc6c367d43465809ae", + "blk.14.ffn_norm.weight": "81c41ebde41739e7016ffec31d2256217b825dc3cae049a935f5f61a60d22003", + "blk.14.attn_k.weight": "fb708bdebe4384f5c4b479c110028554f4d122f166b8091eda7d8d65e6780eb8", + "blk.14.attn_output.weight": "f5295caf2dfdc60553dcabe17537a80577e8b153c902247daac058df23542514", + "blk.14.attn_q.weight": "c12b7a3601c68c63ab5dc9d2599ebf3f3a10abc2c59d3a2126fffd5818f2763b", + "blk.14.attn_v.weight": "1ce968d9149bf0d5e237d52cc6d6433565b4bbf03252a736262bb00a2b34a687", + "blk.15.attn_norm.weight": "266fd2c36d7dcefc6b6bb7f1c9374c41f2bab5d6c84a063b6f91c4f682dad3c4", + "blk.15.ffn_down.weight": "6154886e9ef0a6cc08ab0d264a35f497e6f0987efdac992ed04e87088bea7801", + "blk.15.ffn_gate.weight": "183d9fd3c1b5657840099053d2fd3f72ad953b1de523296159b7761f20491a76", + "blk.15.ffn_up.weight": "51546d4498842ae2340ee226a0888d5f61e7d2ca4d052dfa06a77b0451242d3d", + "blk.15.ffn_norm.weight": "ef7378091a41a25a5f58bf1bf9d3bc64ea562e7f421e1c232b1f177c30fd3500", + "blk.15.attn_k.weight": "8d556ab8d9639324141774999b6eed0e91d7ee645bf3e7a3dcd200b2e7a00751", + "blk.15.attn_output.weight": "54aa6ba87def7cbe18b0c6ab3aff5c351cb3b6ca4a0d7b2cd5f75a1312991429", + "blk.15.attn_q.weight": "10731b0dc031ea8e0ef37bd7f010e0a78518a10a6df05a8bae48e3148b73ef3e", + "blk.15.attn_v.weight": "cbbe50c2ed7224866d3cf9b489c599f3ec41a4ea1aa3181e9f4e87e1fa0cefec", + "blk.16.attn_norm.weight": "387058eb39d4b28c04cf1368247417f1faeae8ae79d894c9f293457e0eaa00b0", + "blk.16.ffn_down.weight": "2cb26ccee585e933401ad5c82ed36ddacb3289efa0b28f8cf91b020ffbd9c333", + "blk.16.ffn_gate.weight": "d745985efb5bab42304e5d509024631efe35f92f2b2ec4931ead6db97ca9727e", + "blk.16.ffn_up.weight": "7a67bd195e0642828ca36eb7818149bb70c2c25f82de07e2b5807c520daf540e", + "blk.16.ffn_norm.weight": "7cefd061c8182482a89272f8a4e88a954b12609a62716923ca1cb3593b1c1651", + "blk.16.attn_k.weight": "d7968a2de67e755b4533e061aaad1cb62f8882af92dcad67f99d6d5112513439", + "blk.16.attn_output.weight": "9e9ab5788272ca3394ea89eadbce8c86ecc3fd75b7899184d6191c134ad9aae0", + "blk.16.attn_q.weight": "ef81c261b536c1a3a093b33f44cf2d42b86e5aa2d821674f07a0c80e992ed925", + "blk.16.attn_v.weight": "aef38e7958301b4a437cbdd2fbae6197f677b09269ec1eaf63188cd5da428d25", + "blk.17.attn_norm.weight": "28f6b289f1bc3131041e9f791b7a2a3a48baee0dfea27bf7051ebbb7ed364d80", + "blk.17.ffn_down.weight": "1a502829aafc6a9bd6bc81f12573bf8632d5c8c659f0dfb13c8b2411f3b1ec05", + "blk.17.ffn_gate.weight": "ddfd8aa0eb98846ebc9afe31366249159f46ae9815199dd70161527ed241ac4d", + "blk.17.ffn_up.weight": "4211a3cc247071bd361b30de2131d02382f552855062bf3b3e004c17992e5d09", + "blk.17.ffn_norm.weight": "647e5fa99a5b0d232af36d15816539f4d27e60a50a341b00aa88bb6e4474f8b9", + "blk.17.attn_k.weight": "d9125ff33a19c502c0f8846433ffc24395048582fc2f463d34a0301a82156f02", + "blk.17.attn_output.weight": "3d64fbb1cfef04444827f37c35fd9ad3413eb2165094d339ef89f00503f09de4", + "blk.17.attn_q.weight": "e5b29424028f578beca385fd82e29f37adedf3037cd51e5889d5a1ffb0428ca7", + "blk.17.attn_v.weight": "1809c5aaf2ac04c5d65539097564ad62796e87d24bb8b9ce5b095561a61d908a", + "blk.18.attn_norm.weight": "99daca58d001c627523d3adfbca1d95f04e590382a326866544d57989d5f4835", + "blk.18.ffn_down.weight": "84f30231ce6ca0f10227541dfc602d6418c1a210386b0c4926ef1656e7d4635c", + "blk.18.ffn_gate.weight": "ca5bbe4468b541740e54f69b9e08fcc8e478c344b70551dab21b1206acfbaadb", + "blk.18.ffn_up.weight": "0b3067b9dded31686dcfdc1e247eae3974a28a61ac59e9862758dbfaad64e8f7", + "blk.18.ffn_norm.weight": "8154a102232dbc0f90ce77ae5c1ff8f26f8b6e4dcf326e9ec1645749669e7960", + "blk.18.attn_k.weight": "25abb26021ccc481471a30e0d4cbeb7e1db29828417ec5136edeb93fecf09ac4", + "blk.18.attn_output.weight": "d87d481d9b046b68efa06ccdd4ed8cbf61e692d61114b75b7fad5ed75f5d87b2", + "blk.18.attn_q.weight": "cc6400379e15766992ff1293be79dc67682c28e9e15155a78109f4b64653b164", + "blk.18.attn_v.weight": "45c75cb1dd496aea3173aafe2575b841dd1d02cbe010b3198099731eb98f531c", + "blk.19.attn_norm.weight": "65389efc75297684773284ef8e5f8789a4504b636c9f33b8a32e0ee42499fa72", + "blk.19.ffn_down.weight": "4eefab7e939f64a17e4a214ca3c77a6fa110d94f677e2d6401086f70fc538b04", + "blk.19.ffn_gate.weight": "f1c0a59cafda66f466ab585b0b8b4861b58abe87a67cea1f6a488492242edfdf", + "blk.19.ffn_up.weight": "c42d045eef588db4a0e56960a57e110e1ff92eb8041107d19899165fd3b90f17", + "blk.19.ffn_norm.weight": "a8f33eda6d5d62ff5f333ad9771783caff556641f4e7df713451385676f441fa", + "blk.19.attn_k.weight": "0bab5d9e9083492bfb05a5a3bb23b79c0e7b99ef6a6644817b4d57d5c453b8a5", + "blk.19.attn_output.weight": "c99c551d70eafad0f7aea98fb6f9251635897168eb3895f76abf0d4ea3b3aa6f", + "blk.19.attn_q.weight": "c98bde95627c3b54c9443813ca50b4e14f518319681db6bbf7b2332ba26e9a60", + "blk.19.attn_v.weight": "ff3a490518cf64904db89ce0dc7d6eb89e870f1440e41883c6b55a221f82de84", + "blk.20.ffn_gate.weight": "761f0e317229cafe9d3754048ab038a0a84e9a287b196ab65f633139f2d29aba", + "blk.20.attn_k.weight": "45d13439b41066d282e8490a726785abf513605f46c79bd0c840f6419d27e790", + "blk.20.attn_output.weight": "a3b958d84b4a097844179b7d55c18fd0e4f319cb15e918c6fde33b68de1bcac6", + "blk.20.attn_q.weight": "127ab8e7d8c3f882874904196a02712bab42e6744fde45871b67350609d19f5e", + "blk.20.attn_v.weight": "5f0ad2d14a8ae42dd3bbeccfb33295687a14055fa92c54bc946249373c1c9f17", + "blk.20.attn_norm.weight": "77300b1755edc8c70089e0f45efa646056b9add7d8568b2324d2f3e62b64971a", + "blk.20.ffn_down.weight": "ab93d0e075b42e9017b701a070d561e698050d90aac4b4b9919256fbe50c3204", + "blk.20.ffn_up.weight": "4fd6628a07acc57a48d1ef83f81b7d7aa0bce569c1160a99d307284f8821322c", + "blk.20.ffn_norm.weight": "2a9e46b9e48e8e55215de56592e1f189530037c1c94a1428e3d6f106c7f26fb2", + "blk.21.attn_norm.weight": "4b3b5912c7bc61eb9da8e47d4651f896e85d9e59c4ecaa65df7acf3c21737298", + "blk.21.ffn_down.weight": "7146f931663d93b8771cd84405cd4802ea6560d0729b0d6d44588203c095bc53", + "blk.21.ffn_gate.weight": "b44ec5d64388fa40b90b3e9976d97a8b6800fa3b97584f32e64b03daffb8601f", + "blk.21.ffn_up.weight": "0cf3643fd23c685e17062cd11e116e17ce57a405e5e78953bab94cd62fe48789", + "blk.21.ffn_norm.weight": "4ef2cdb53da166df70b39f3e6b17af51848cfa5ea3c27ad6a1ae2a1bb1da1ce9", + "blk.21.attn_k.weight": "5d40f32a706f670c19972b14176bf660d5b045e3637b110dbf8d7de4ff32101a", + "blk.21.attn_output.weight": "18afaa916752ce16c9653ec0ec7e2fe60be55faa2aa5025d147be184adb75cac", + "blk.21.attn_q.weight": "2621daa5f858931514a4b2f0fe8d81cf9b96f541e6af99bfa7539e9bde8e34ee", + "blk.21.attn_v.weight": "63226dafc54c899bbce4aa49efceeedd8908e94faa613450fdda91f332b62864", + "blk.22.attn_norm.weight": "cf3058daab4d2c04387e7d169d1553bb8e7358eea66285ec067703f6ce62043a", + "blk.22.ffn_down.weight": "6a58d5fd220abdbac6cee7ba048abab794731af318f04982c2506df59413d0b3", + "blk.22.ffn_gate.weight": "d5614535324b03c7b91727a903b2a72f8d07ad17f7aa8b61ea173cf9b895069e", + "blk.22.ffn_up.weight": "ec20da3949566e93f66cabb67f8cd7eab399047ec6ebf5d43edfaf3669b82296", + "blk.22.ffn_norm.weight": "84c82f38f53a649972a44466fc476bf764e064ce18de870291edc302f3700e28", + "blk.22.attn_k.weight": "a3d2ecc37fde7c201176bb8abadf27f0d8ede9679a6034913e03d9db924fda12", + "blk.22.attn_output.weight": "5a3b8bb433f43a387df43dd371bdf80ddfac986dfeaf38e9bac1d7a0ec6628de", + "blk.22.attn_q.weight": "3a875cec661b4859f30a8fd2c866811184b25b68c9e36fe2663d299caf8b59c6", + "blk.22.attn_v.weight": "8717a83b79035058dcfd3ef6f8e5b36e71d77379e5a239e1899eef8766fb7703", + "blk.23.attn_norm.weight": "2b4a68a0a2f023dd646e4755c9bef17c2f631901154afd839edac7ac006ec99c", + "blk.23.ffn_down.weight": "29499b1586c6fc4883c9b7a9c8cf388035146b5aecf90c5c4c8c8e082c71e7d7", + "blk.23.ffn_gate.weight": "7d6554036d21c587b9b556428054f9c15cbef96d24b257f906fcef4ae38bd9c8", + "blk.23.ffn_up.weight": "19761ecb288d6ebd44b681c4535661583b1e19dc29e96d0c007333cd8f00aacf", + "blk.23.ffn_norm.weight": "37dc35500790a4ca33807b39cf7af65065e535dc25b9e94f3ed2759f61887ac9", + "blk.23.attn_k.weight": "717547d00323817b0cb40a72ec5f8cf42ecd1f9e3e42715c2cc5e38f07fffffe", + "blk.23.attn_output.weight": "a24786feb6a905fdf166d7500133757cbe494779d4ebcba9eb03046b319557df", + "blk.23.attn_q.weight": "6a2c4a98f138b928d22136efa163562691d3b4ed526d52d46a2fa2694a8f3965", + "blk.23.attn_v.weight": "c6e6081eb9c38a7fda023085957b460e9ea321e1fff408b38c2b58595c39979c", + "blk.24.attn_norm.weight": "5e6283f891e538670425f3e244b08dc6f96f33dfa4aefa913f8eb17212421850", + "blk.24.ffn_down.weight": "e09eb170f389deea0a4a1cbfdb52c12490768a2c60491b7bef8a4c445e2a08f5", + "blk.24.ffn_gate.weight": "af29d815cf49a38fc2ebd0bf9b2dd9933d023a29f2d766981acb9a1b53f09117", + "blk.24.ffn_up.weight": "36ccd9333426666de9d3088bd4dcdf5b624b09dca9e3a83a22fc0383f2d950fa", + "blk.24.ffn_norm.weight": "a88e1692318826db6ac42582d182e51a3c698c655d0e21e04fa086318832d07b", + "blk.24.attn_k.weight": "f7d61d6d1225289bcc502e3bbb0168b4584add0253218c1b77ac92ccef9a1c2e", + "blk.24.attn_output.weight": "85a1363b3ccc87312094c2195022687c16b0dad7fafb9e80bb4ec474d53c29ac", + "blk.24.attn_q.weight": "53482a2c008f42f4fad779ca323addc3712040149dfc12f782417756388a72bb", + "blk.24.attn_v.weight": "67498272369af7dd10097c73b07f731b565cfc9a559e711cc0d526389e7b44e2", + "blk.25.attn_norm.weight": "98dd617def5cb7825ee4833132ca2da2121245921585e1d9e36b93344adc321b", + "blk.25.ffn_down.weight": "7fd477d6c50aed5f424a878dd284343379cffbee8a34c0b6e55100c8305fa13f", + "blk.25.ffn_gate.weight": "f892c9806c8ec22e8aa746734ac9213428c534921cf161239e1d249fdb5d1ec0", + "blk.25.ffn_up.weight": "528bed14c9bf9762f790525ee40412545221f4321d2a2323fa8e73c58b7643c5", + "blk.25.ffn_norm.weight": "ca5831966672e7be6a578feeb631ec3570d3b5afe12860819ccb96e896ffc346", + "blk.25.attn_k.weight": "610d3068cc9b20401f0c3a0efea39a279dd9f564fde19baf3403b2ec2319e4c4", + "blk.25.attn_output.weight": "798aaf702e53b657265ac3b5e6caf3a0ab515bdadfeb1a3a156b4f3bfba76666", + "blk.25.attn_q.weight": "8a7fa25248de83029fb97b51d036a01baebe31fcb4be121ab00dd8b7de209b10", + "blk.25.attn_v.weight": "2a53d5e9f8a1218c66958c6388d3b37400a9af7956c785024ca44bfbc3c7d371", + "blk.26.attn_norm.weight": "5f44fc043481eb0771f3e6d2420bcbcf73140afb9a9feb8eddb6575452acebee", + "blk.26.ffn_down.weight": "944a60a409d0d5b6a851e33c69aca152454b691711a8b96f5bcc488772ab2833", + "blk.26.ffn_gate.weight": "2a0ca4abb3de5593e6693d8be69b63d6d1a639855ac8332a75f520353f030c62", + "blk.26.ffn_up.weight": "0b1df496163f9ac07bf89375d3eb441b51a81d41b47d769a04a61efc18dbe35b", + "blk.26.ffn_norm.weight": "56b8dd046e9be6ea71f7efd80dbd14e7fb1aa020d3cd38e063275f3873fd12f8", + "blk.26.attn_k.weight": "b1dabfabb970e6971c7ea6e53c63cf7ef56341e6a2edd9cf177785cad9af2f9a", + "blk.26.attn_output.weight": "39532c7e836baad164a655fb97ec5114ea4da37ffba9fdea2684f6e4450e6f84", + "blk.26.attn_q.weight": "8f48bf6aaa1252bc149e98af2be1777a5c0d2c3274c6d314171ea9344a41b604", + "blk.26.attn_v.weight": "02fb145f7fd905133750e90571effacadddfd3f4966552dc59982ac3900ab8c4", + "blk.27.attn_norm.weight": "654d168fc3cab716d91261f5719f180b7d697218401633b4878a759f1b5283f2", + "blk.27.ffn_down.weight": "2823272bec3a1c12f02cc4cb24aa4031abd7e9dbe0b02676e2305b21671818f0", + "blk.27.ffn_gate.weight": "b1a1d40cd02f97182cac17a79971d1934ee0daf3aa0bf11303568c636e208a64", + "blk.27.ffn_up.weight": "ed62ec72a020d070e64eb7b50237b32213944727b5b2427f45d989f50df5fb2a", + "blk.27.ffn_norm.weight": "c69649ac65d694b306a905dee8b03b89eec1ed188b1eaaf38f8e29d4b12e38a0", + "blk.27.attn_k.weight": "cc57bbf413f1fd227128dc66efc8590c73634cbd6f96d01ec4878b5e7ca6a925", + "blk.27.attn_output.weight": "cac407ad02361d53207b3c7e25ceab84dcb4347b8087055162e2efe14d11d84a", + "blk.27.attn_q.weight": "0af18e07cee12015761c07c94407024f4f4d77d97bdb24163db0e16669e2cef3", + "blk.27.attn_v.weight": "a1d08fbdfa40af773c5adcf93bd68b78a44ed144e3fc6bbeb8af02e937527eb6", + "blk.28.attn_norm.weight": "f39a51f814512b040a1082143150e4a49ff730f85cef49d7f77fc79d83e91f40", + "blk.28.ffn_down.weight": "74f29ed51055d1c1adb8f0660bbe538a27e016c65650f2d67efc6f1c84fa1b45", + "blk.28.ffn_gate.weight": "ae48bb16487ded6781c60aafc0bf738fb4ae15729952906f247d216592ce249a", + "blk.28.ffn_up.weight": "543009727718ac22f11ee4b17815f68ea6f15ba1f3e7ed5ecdb755cf6417565b", + "blk.28.ffn_norm.weight": "b8f9e54c322079ff20a82b88948cdc2916c22c7db40b9a9ed6d3cbe89efb727e", + "blk.28.attn_k.weight": "55d055ba653b728d6e784f9e013786fed07115c9fdf23367e3941386d5e77db8", + "blk.28.attn_output.weight": "155101c03ddbf18f4fd0694bfc982f33c7bae25c9b087d6f5273c2bfbffcf2c9", + "blk.28.attn_q.weight": "1ed19bfdd22e9c14eca014739982492e9516d411515a8585f65cf754d849e53f", + "blk.28.attn_v.weight": "11ba854dd575c025d37256eee9041f6d1bd2b549a083d6409a09bfc1542913f3", + "blk.29.attn_norm.weight": "02b0bf5e2fcefd11a153cc988c81ba672682e4844fcf6442423e21a0e10d566d", + "blk.29.ffn_down.weight": "594bb692ec2779938721ff4748666ca8370e0e4fe85229503f616438b8884f5f", + "blk.29.ffn_gate.weight": "8bedcf47e91dcb2cf4093de56b048ee411faab6ff472f89ab2c9c113a08e6967", + "blk.29.ffn_up.weight": "e241a547b5fd6dfca8200b8141e21c1c487a96cbc4e5855f181a7ed1be91b642", + "blk.29.ffn_norm.weight": "e63eba5e4c6b288bfd9f15e46e236086456c8b7f1f9c732c0b5de84962a2e7cc", + "blk.29.attn_k.weight": "afe5979d5bcf211aebb526620f5974bcb0a2c39c8be71e815575c55d6385e3aa", + "blk.29.attn_output.weight": "9c944ed44b124b014906fc240afd3b90aed56bbd9567f2eddfd5b7a685b3cb48", + "blk.29.attn_q.weight": "e234e08e5c1bd9245a2edc8d63e9933b6b879f97c01392209cad4f55f05f3ada", + "blk.29.attn_v.weight": "5cb8e3e5f954e775c5a5e4de7a9a62b17e9c6931bb0ff0e2f82c4126fd3e1a1c", + "blk.30.attn_norm.weight": "a65483ee51a0b214144ec8a14f28ea5437586e9e12ebe342a57d1f8627ee12af", + "blk.30.ffn_down.weight": "417959da77ceb33ead4271cbb9428b195196173a893c44e52880a7ec61b4856b", + "blk.30.ffn_gate.weight": "a0d503ffcbe45dc927600bb98c9f6082487e65cb577ab545add400d666a87638", + "blk.30.ffn_up.weight": "f8ab957b82ffcd10b21303cb5e866209b6fe95f827b1b94e9a949207952d12c0", + "blk.30.ffn_norm.weight": "210c7ceb0514a9ef27b5d4d1b3aff6dde43f1af0345a050d71097940e0e73e03", + "blk.30.attn_k.weight": "16861b9abcf5a3fe73c93d977ca45a1e6daa65be0fd85c2cff53486ce2033afa", + "blk.30.attn_output.weight": "ca541fb2e57e2257118c35784845b0c731278af8db3036ac53d71aa1681fdbdc", + "blk.30.attn_q.weight": "f7834917748e26bb456b945e230bc926c228e93696bc01fbc2b134bdeeac71a1", + "blk.30.attn_v.weight": "9292783171dbe5eb689d17c9bda11e537f0e9b328fced6986c938d61ed590e81", + "blk.31.ffn_gate.weight": "e4766a04bcd8f937ba883c6a144101e546747804ca66c35c97281d6ccb47b566", + "blk.31.ffn_up.weight": "cc1e666116f7e6b06736db4aa4b81003c583f54f4d9200bfa48842249940e16a", + "blk.31.attn_k.weight": "fc80b57557687504efae7d24265cb7dc39b8f826bb3d897a11783012dbedc44f", + "blk.31.attn_output.weight": "215617f50a1f5d9b2250b82f3652b35a9e9aa0ad9ef2b485d73965a14b2b872a", + "blk.31.attn_q.weight": "274b4f1dfb0bdec28632705677049fb3e327ce6d9e1f3baaad1560439039982f", + "blk.31.attn_v.weight": "e641b8b926f9dfcbbf6b6da1c02555525ac4b1c306d96f20cfbba7d6662c4e56", + "blk.31.attn_norm.weight": "b3243c361d4041ddb892ce6862dd5091f57d87357e3c67e177451b85d8baf34d", + "blk.31.ffn_down.weight": "0a00cd3ecd5e91624a27f9e239b1de425d5ba3cfff82c256a11a4ad434abf3c2", + "blk.31.ffn_norm.weight": "2a0d67ea2bb1303975712243f07273c92fce83baa11b1cd6d8e42e74ea3c810b", + "output.weight": "768615f077fb797967844571c58b94d7c399d884d115be3ab4b0154504cae892", + "output_norm.weight": "7cc5b7ce10e5082000fa00bfa68af8c7c5da218e59e2c41cf2f1499d40ca229e" +} diff --git a/convert/testdata/Mistral-7B-Instruct-v0.2.json b/convert/testdata/Mistral-7B-Instruct-v0.2.json new file mode 100644 index 00000000..88d447b3 --- /dev/null +++ b/convert/testdata/Mistral-7B-Instruct-v0.2.json @@ -0,0 +1,313 @@ +{ + "general.architecture": "llama", + "general.file_type": "1", + "general.quantization_version": "2", + "llama.block_count": "32", + "llama.context_length": "32768", + "llama.embedding_length": "4096", + "llama.feed_forward_length": "14336", + "llama.attention.head_count": "32", + "llama.attention.head_count_kv": "8", + "llama.attention.layer_norm_rms_epsilon": "1e-05", + "llama.rope.dimension_count": "128", + "tokenizer.ggml.model": "llama", + "tokenizer.ggml.add_bos_token": "true", + "tokenizer.ggml.add_eos_token": "false", + "tokenizer.ggml.bos_token_id": "1", + "tokenizer.ggml.eos_token_id": "2", + "tokenizer.ggml.unknown_token_id": "0", + "tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676", + "tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e", + "tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6", + "token_embd.weight": "cde834ccac5e94324b25cb81b02d27312cac0c551b55a7e1d555d90bf6cb6e81", + "blk.0.attn_k.weight": "458bfdd9715c66e017c2447b1ed3c582963a3111479314e664faad8c914f42be", + "blk.0.attn_norm.weight": "e1fd60b95f713bae7b7e3ca933c64ae6c9cd1e8d808000204bbfdc19f0ba635b", + "blk.0.attn_output.weight": "df13b6a157d9d4f96c53b012b3b9bcd207d0c94144cbd22ae3ec13bb07d6c373", + "blk.0.attn_q.weight": "13b4126b4245bf06c915a93317c42b8174e05053535ec99dc576541e4cec7c25", + "blk.0.attn_v.weight": "5b1781d3a341214511b27eb4e268674ea3ea829dbdf8ae5a6bb89b3c0b33fafd", + "blk.0.ffn_down.weight": "49186f5d8148d316b07458841d13a2e66587f4af69b776188a809591ed9c070d", + "blk.0.ffn_gate.weight": "4397e30ece09136f00f4ff84ff49e5241b765a374deb8c5a12e897e2bf73473e", + "blk.0.ffn_norm.weight": "43260589aac3850a779bca3f9649f793bbfbe5db538361cb743b3830217f8287", + "blk.0.ffn_up.weight": "fd7ac918240a07566f6967527ffca58fcf433a30b78fdd6d84b2136d4ebd9987", + "blk.1.attn_k.weight": "209839566c7d235bdc20565a4766378b6ee8553133a5a3315abe8a85baa80712", + "blk.1.attn_norm.weight": "58c52986f7c69784ba327cb7f350923420782bee17fa39b1fbd13839d4005357", + "blk.1.attn_output.weight": "5067cc628449682665dfcf59b16e58fe2a9d2a81cb099f0fcd42f4f8670c6740", + "blk.1.attn_q.weight": "f410f9f0dd5edc09401af597d02e2a4c727f1502ec3ec3898321617b36c6df6b", + "blk.1.attn_v.weight": "d40fa49e07c102c0644e130e7909eaa93ed0d54e2edddc0759e721d58a4e4f5e", + "blk.1.ffn_down.weight": "594b1eff6ed4defbdd819fabbe2d48764984f08878a860bdb808511d5a25b8db", + "blk.1.ffn_gate.weight": "4cda97541e388a5bb607ce4cc8b3db1da7045830a630e7ba4d17807befcff346", + "blk.1.ffn_norm.weight": "66c13d7481be65b97aa474735ddc9674f33d512ddda76fa6fb45c7464b09f1ed", + "blk.1.ffn_up.weight": "1adc6de288ba4cc1237833ca8b4eb81107149842e38bc452e18e5cfe284338a2", + "blk.2.attn_k.weight": "5420423559f236ab22d85a00849f31e0cc6e9c7dd879de724393d8cd2b379153", + "blk.2.attn_norm.weight": "495fe1ab40cc52aa054ddd4f0c2d2790f4326c8d103296b1b38f3b1060db2a24", + "blk.2.attn_output.weight": "ccb83e7085381f558bfd65588c525ad2671feddcbc3887afb4038ad9c7aac348", + "blk.2.attn_q.weight": "2e8f77478392bc93c2a391f2e0f4a173a952bbab88a7aca099c6ee909726409a", + "blk.2.attn_v.weight": "d64512590f3b7ebbb9e77c2eb97fbda90b00d45c944f2b174f03a2cb11007567", + "blk.2.ffn_down.weight": "1de5084a05dcaa6b1bd926e83517dbe9ebe7fde79235fe56018b3028b1aa6397", + "blk.2.ffn_gate.weight": "cbea526b557f49aad8c976973cf367fcd12175b900f551984f498b9e07e4b7fd", + "blk.2.ffn_norm.weight": "530aa49b10c7eae08899d143409240deb95dae4e1d5bf78cea3b26393cff3ba1", + "blk.2.ffn_up.weight": "13a5fc19b96b4dcc1e9bd01998c8272ebe52034c1933ed123a506b711fae9a5c", + "blk.3.attn_k.weight": "1913b63a73305941d8cdc472e7f101c633d3357a78602eac0a4b49a744261075", + "blk.3.attn_norm.weight": "9c11bed5ab41f4adbfdae4ead65b525c8f19443e656a8c61ba412a4e1ad1193b", + "blk.3.attn_output.weight": "bb0b42c1d34779c5943272ed71f1dbb31ad8edd75f8bcd5c868f88505ac3a610", + "blk.3.attn_q.weight": "3461a1fe4e49f5319ea047cae98ccdb46528a3ec23831183fe87610b48c94948", + "blk.3.attn_v.weight": "82aa30be6a61526a41fb79bb28a2617416f5909f0477aa9e95e16be9370fcb38", + "blk.3.ffn_down.weight": "68521011ae03f5e3b0966127111afa8ee9f2eaeeef8d3a0b86b633e0332e9fbf", + "blk.3.ffn_gate.weight": "1e89e26338fd364bb679695968c65106382f15ad55c95cbb5ec9bdfeb766f432", + "blk.3.ffn_norm.weight": "c81932529a5a8c417c27b888dbe95fff8b447c2ea5f6f560444ec5d50b93832c", + "blk.3.ffn_up.weight": "305021735afd8669afefd713f56137248d5e817e60471a112ad06b7fa07ffe88", + "blk.4.attn_k.weight": "cc26ba5c5c28082a79e6abfe61186029e80b145252ca6a7924c437f0bcf2d51b", + "blk.4.attn_norm.weight": "302d251fdcc91f7468cf33f80b49484251d8917d7018ad264ab3a85c8ecf9ddd", + "blk.4.attn_output.weight": "a012f5bee3520cd4ce51f0076c132ebc3653309f304032ad051aa308f55f36de", + "blk.4.attn_q.weight": "3c8d607e447f5ef21e73af71e3c0d32fae16f91f31faae34ff06912cf9cb68fa", + "blk.4.attn_v.weight": "49f6c81a634ce46d71c2350206ecbd231b1732af96e4e4e67693c41a07e007d8", + "blk.4.ffn_down.weight": "e89504f311a4a34dc819a67b761022f14d71c43df3ead4f892c87aaa8e9f0adf", + "blk.4.ffn_gate.weight": "18b22f079a2fbaefe3572eec61fdcd996fd747724e2f0ff4f08cfcb43eb7bfb6", + "blk.4.ffn_norm.weight": "22415a492c168a0878912b05c854a631228b01c3ea8842e1d75989ec46c18a65", + "blk.4.ffn_up.weight": "f57379eae2874d8853f14ddf0f0fcc4ff1338574d5ed5d7e88331d5fb84f5642", + "blk.5.attn_k.weight": "d627af853c40bddf9762ce3988008c1ff17f2686fa8f73a0b5da38010147c316", + "blk.5.attn_norm.weight": "9ce01092c7f7f1c3ef72d6b794da12d77aa1f6a24fb96ba1b9bd5a0bcc3e2443", + "blk.5.attn_output.weight": "0388da8064c4b6b795ce2d8079e8a36535e82b2c9cf794e38ce8ae460aae726d", + "blk.5.attn_q.weight": "039b7ce1c909761fdf475c06cf14cabe5a90199282c89e4dcf460e95a4b6275d", + "blk.5.attn_v.weight": "c47bfd8d2496bdb6e00e03b903e15fd0ee806a515094ec257e43cc433147ab7e", + "blk.5.ffn_down.weight": "1d62e6708974bae318cbf00a8bf621d9ba0537e549ce4710a536520a8d14168e", + "blk.5.ffn_gate.weight": "8b42b1b11c92db19985094cbb50434e3a7c9cfea71ee6f21ea79eae7c49284a5", + "blk.5.ffn_norm.weight": "e0bc520f1505e687ec391d632a381d38d8ebcdec19f614a11a2000ab573e8b7b", + "blk.5.ffn_up.weight": "8cdcd17d2ea89bb9ab902dbc6bf3f827fa4ee029c6bf19eecbdefd146d8b6f2f", + "blk.6.attn_k.weight": "5dc6bcff89794d1756bf57ec665b58622d9352130d31082a6c66e1a079f99932", + "blk.6.attn_norm.weight": "13b26008abe0f119b5104b9d78ebd5e797d3cdd68122b93d73a3b4831a54d085", + "blk.6.attn_output.weight": "f5a49917ea70c3fb311ccfffbfafa63ab18416a5d55e5429b70ce8bfba57c075", + "blk.6.attn_q.weight": "d9c2f652c87dbd09ec3822e12876648fa32e86553ac25afab723b1cd9f8cef90", + "blk.6.attn_v.weight": "5ecc5fe67609a35151011cb526f45c56fc0a999079ae0ff37c755ca03c68c555", + "blk.6.ffn_down.weight": "0ec125ae0ecb2d9277fdb1b04f17efee94e37d0ae37311057c212ca2db3fe6d1", + "blk.6.ffn_gate.weight": "fa4d6d38355ee8aa3b80b476d65ae7e343c9b7770d7b097fc848ee8a6e091d1f", + "blk.6.ffn_norm.weight": "30e8f7defc627532e1739dc76d31223d45767391a431f925b63dabe334b0f392", + "blk.6.ffn_up.weight": "6b97cc32b290fa9087806b5d65aa6dc1760737730c8c71394cc4f30c2157f9ab", + "blk.7.attn_k.weight": "0231cb127cb7c3714cd72b8f39343891d7715a9bab2237ade9e7bc5f4ed2e68a", + "blk.7.attn_norm.weight": "7c3187f07eead7d219d98ab2daf87905e88d5f1ace109b6f5fa55dce3914981f", + "blk.7.attn_output.weight": "2f30ad972c284ae7c8eb0482053433495ebe8fe9c5ee2c28b4bc4ed1f33050fe", + "blk.7.attn_q.weight": "3a2b4b8d61cc9956d304fa9f82a9e65b4bb9fda2196670b16df7e0d8c43eff2c", + "blk.7.attn_v.weight": "d2aab97d0dcf0f61dd2f32848f7a8a99c423a4948a660a660a03a546972b8db8", + "blk.7.ffn_down.weight": "2270d520468c5549cd30023ff9c452a277058310104c4239a616373fc5a94387", + "blk.7.ffn_gate.weight": "4134a3ef71b3eac8f76b6f1a2e58625b3bae48081f175994bc3ed7d8b0d4f2d0", + "blk.7.ffn_norm.weight": "42df4abd4b8769b16f3930068f96960af1b061f1aeb7505384f272233b2badff", + "blk.7.ffn_up.weight": "c920549054ec16ff8c73a72f5d837cf4e11885e44db57c1c1c584c18fbd7a9a5", + "blk.8.attn_k.weight": "01c609bd3bf31ce65688f1f640ee413740e821330134d4ed1877a3065d1527d5", + "blk.8.attn_norm.weight": "48857411f769b00290f4e4f2e593e092781fdc2503f80c1e3eeda1b85a20f74d", + "blk.8.attn_output.weight": "90fb273f8df83744554bd59236515c16c5a5a698ca3fbedc17cc89ddcee354ff", + "blk.8.attn_q.weight": "ade617ac4653c7f00593dbb51837a468afef20a14eaab3780fb96ac3d6714369", + "blk.8.attn_v.weight": "c2c37496494864fee5c527d1fe1f88529d31c73f9cbd02ef9b2e9b23611ea50f", + "blk.8.ffn_down.weight": "2da58572e9ad79087c03cbb0c23c9ef69f93ec221fd5fe4ed92fb93871d23ffa", + "blk.8.ffn_gate.weight": "4483294e628edaa4901708e73e92c917bdd93b780fa01aa74aed57166f2bbf0a", + "blk.8.ffn_norm.weight": "c0cbb7a4f8123b62f0c4652a687f3b394802bc32870dc446eefb709e42043a7f", + "blk.8.ffn_up.weight": "9eaf8a2060cb9224cd585997cd671866c4051ad885c2c6d9fdc7056c2a5c0d89", + "blk.9.attn_k.weight": "5dd36c45fbc9c50fd35c36cd75576288506971eac5c5311d4f5c16ef60099645", + "blk.9.attn_norm.weight": "3c8ca64f2f75ed7c8fc1da010c23be787648139a96ca0ef3ad10be7b14942b8d", + "blk.9.attn_output.weight": "6277e1f833024f53c409be919ec76d34464a78b278c8f9dbf79e777746e3b995", + "blk.9.attn_q.weight": "87352b70d9e328c2d51d59090cf5ea5a046529864a890d0bc8986447a0a5c006", + "blk.9.attn_v.weight": "2efdf01161d7a82a9117cc2d87d37dba5ffefcf730781cb94fcc95130e48ff9e", + "blk.9.ffn_down.weight": "e7658a2ca984961c7ace16acb679387bedb1fef656b5330bbbf588db19673a75", + "blk.9.ffn_gate.weight": "773cd330d4ff5d64be8af00adf2e2722fae4e33fc26bb9d03549f6f4b3b0fe57", + "blk.9.ffn_norm.weight": "c8b86cd5c43b332f72060b807091c33a258e5dac01358ff4733b916cd34c9c97", + "blk.9.ffn_up.weight": "d8cc3bcff18bd46124ba2aa7caacc71220b44eeef6fccb993b4c6cb53e8f2c3a", + "blk.10.attn_k.weight": "964bdf3b4e77b915a216f750ff7b0f2eb1dd6bfa071358aef21010b90111044d", + "blk.10.attn_norm.weight": "59ed411d91d14775764eb514acb0895a75a10cbbfbc1c15d453bc50f8046cb7f", + "blk.10.attn_output.weight": "4d35a2a44cfe4ac0a83fd3ab0dcf1f5a0bf54cdb3b7be9fc353ed32c8a3eb81c", + "blk.10.attn_q.weight": "defff5339450dd881ac352f5c459293f39e07b9619ebd10ed632d79a3f310278", + "blk.10.attn_v.weight": "b9803e8d6a54acea58f662d4c0a5c8ebdf986676de7dfe12d4b288937881ce93", + "blk.10.ffn_down.weight": "eba856be64e4be20b92fb4639a783454dd92427250759df92a337e39f1971c08", + "blk.10.ffn_gate.weight": "2d5c509b066584db4de3632b01234e86edcde35409c5ebce18957dc80fe465e3", + "blk.10.ffn_norm.weight": "ecb9a8679945ff0273856624ce435dd250ffe5a440ea0861a5c84f0e4c44d2c6", + "blk.10.ffn_up.weight": "e76ec7e993f399af02958778c643aa78368e3067846714165eb5aba9d5f547f5", + "blk.11.attn_k.weight": "29c6d1f34bd3ba2f0904e57b32a5bf8dcb2834d439159a33edf234ce0b775677", + "blk.11.attn_norm.weight": "b5817b275149cd2abe18a6a10e19854605fc58fd364666744362ceee8cfe49f4", + "blk.11.attn_output.weight": "1e05653220e237cbe0cc770033e183c9a0eed5680510997409b16186c6691950", + "blk.11.attn_q.weight": "03db725ae669151e4d536e50285b3b047ad097f52475df208ed3e790e31a44be", + "blk.11.attn_v.weight": "27cdf1d4e971326c451a4615a0b79a8c7fe9508f9b76c0d52fa01971fc7eb403", + "blk.11.ffn_down.weight": "176938cd7c2966094f614cace8ba568b10532e45a0d438f80eccd19b6c2a7f87", + "blk.11.ffn_gate.weight": "9782339915dd6fa70013628a01524ee1d01ad8beab04068da7ac6a5ee7603a60", + "blk.11.ffn_norm.weight": "8245f6391e3be97811c0ff27f0d8f484ecc82a468a837c893f059745bfcd95eb", + "blk.11.ffn_up.weight": "15616ddde096d0d25e906375c548b6de4bd5576d1f6b68eefdc29f14e183af42", + "blk.12.attn_k.weight": "66dd21604993edd1b1fe547bcaa06f5bb7e31c9204902d147a227e4badf7feec", + "blk.12.attn_norm.weight": "23a69f85dd8a0904b9839cc5d0afcda299b74e82ae2642106224a1c820f2b761", + "blk.12.attn_output.weight": "4a98d132e376beb274a39d4ea9b6a1b870ad5c66625439d7ff6f45c229c3ca04", + "blk.12.attn_q.weight": "1c6c309d63afcfde32fe37257e300a78e25d01117e33490801107c0e75d1ea66", + "blk.12.attn_v.weight": "723d9e4ebe4e2b1974afa01d8f512b52933698fa36717dd47b37b07760c50a10", + "blk.12.ffn_down.weight": "00e0fb09e1f1fbbf3803f1dee373eaae7a93756b6e13063ab77f9927bc6f996a", + "blk.12.ffn_gate.weight": "89159f7f97aefb1e100107e3ac2d694e1008ad873f79bb953d60c2c1bb22724d", + "blk.12.ffn_norm.weight": "5f70aebd0e43a39d6373d8658cc670c13aadd7818831d3d84f761d5f688442f0", + "blk.12.ffn_up.weight": "faec21b446f061eb4dca561a3180712724347b77a71eb312e7afe9be9e89fa04", + "blk.13.attn_k.weight": "3d440825d19eac3b1753b34d94fee2b3a3cb6636c10b2703ffcf688d3c1eded3", + "blk.13.attn_norm.weight": "47b575e57e410738ad13fd3c74bb49c06b3d31030910834ece509cd1a5c6d9be", + "blk.13.attn_output.weight": "05436d8e613f4475741c1798a7c371b53d61b229507fa04fe23c504ba1f0e12a", + "blk.13.attn_q.weight": "002b5024ce520da41256e3ded5cdc60e5ae07ad9b202cb19d76ab511efd02b1b", + "blk.13.attn_v.weight": "c1f2d6763587c50312cee0d7140fa2c7ee326f5b172bc99b2d8946e08329cabd", + "blk.13.ffn_down.weight": "b5c4e0d8a3ff96cd76a135e415b89f02d28c28f7f3c16a36af31ef0ab8773da5", + "blk.13.ffn_gate.weight": "ae06e9e3d2e1f64c7ad23a4009dc904c2eccd7241f9f91c4974ab2504f116be0", + "blk.13.ffn_norm.weight": "e44a22321bcbcb4a3c345b504e939e8071370f54a8cd702fabdb40b97e0d7683", + "blk.13.ffn_up.weight": "7e6f366d538e21ad431264b12c011892d0be9dfe4c4da9f730af677f920641ba", + "blk.14.attn_k.weight": "95492d6417952ec24b2cab87bceb750fc7e95ac6b1944fc328a3852d980164be", + "blk.14.attn_norm.weight": "6b7b09e1c51addcdbb160ea59edf032531421c520ec5645fe1ff9ca4180cef54", + "blk.14.attn_output.weight": "75887474e4d72c218e6ab0f69f1bf3ec3dc414d51b36fc59df00cdb23421bb6a", + "blk.14.attn_q.weight": "940e33f76e48c21215d19e8a21234c8246d4d084381a7d9806aecb24b071d5bd", + "blk.14.attn_v.weight": "c58601cf5a9833f80f7f9a5b2656e8eab5eb133211446ebd48f8be15fed4ebb9", + "blk.14.ffn_down.weight": "f9f886e7f9b2a54d717b08947a25a0a93e8c2a5b8bcd5a907c06817c8ee3ac11", + "blk.14.ffn_gate.weight": "727ed0ee68594a3f59d704ed3240b6929f083b9c36650fb848d182315737245c", + "blk.14.ffn_norm.weight": "bd2471008ff1b2bae9aa26bea019393fb2bbc5b9493b8cec3ebd2c280fca24ca", + "blk.14.ffn_up.weight": "b006446769f51e4f93b503c4727deae897bc1fc7f4fad49f85024b63c4548d38", + "blk.15.attn_k.weight": "23bb70f9035356624039547a603e46be7d1e4403616eafc2451cc09c5373d522", + "blk.15.attn_norm.weight": "718cb371ca052eeb3bfac6ac506abb887df125271821fd171797a7f2d8dd6313", + "blk.15.attn_output.weight": "c76a2695a204b43a8e5acfa5720590b5d449a9ad9e082cbe3e80fab5903ea16a", + "blk.15.attn_q.weight": "2b3e4037b9e91bdd26d6e8d904cf39f948192dcf09bb6445cb55ca058d4f4626", + "blk.15.attn_v.weight": "7c15e89b6acafc8619e86aa9d412f5893ab17843ff2cfaf40eea9637b24910c6", + "blk.15.ffn_down.weight": "e16fd4bdc6d1c1209c6b633454df4992870c8cefb2cb0e8c92a7e489e9fb5d19", + "blk.15.ffn_gate.weight": "95a46bea366c260337c537fde06b4cbeaeec52484a69c3390bb1d178eb0525c9", + "blk.15.ffn_norm.weight": "37730293f704da265dc6d1896b3be00c39c0a41dab07f573af39dc30a481d623", + "blk.15.ffn_up.weight": "ba74a199da2d0875d7410824238c4ffafbda3993568812284a72b8800df91f15", + "blk.16.attn_k.weight": "f58f79a2a91c9a763adefce0c53a71eb5ce6bd8442f4af554b04b58083bff27e", + "blk.16.attn_norm.weight": "0c16e41b95e81978e0e0e3b338e2afe2d297426578cacee94de15df74e94eaad", + "blk.16.attn_output.weight": "ead22fc337514e4add49aee19720008558e52090466866e849671953a1fccba4", + "blk.16.attn_q.weight": "ef59c4e8fe8918c1add43d7e9c6fb3ef799dd3e1bdd731ec7b6a4a6f97c86048", + "blk.16.attn_v.weight": "902e6b84c2b64241470b13e6f412f859f66b4b223bcfb9c15d5cb1106b07ef3b", + "blk.16.ffn_down.weight": "2ad6e9eb4d8372c32a554395d460d17cfb02d6dbcb757cc962b6bfa36db4f5ee", + "blk.16.ffn_gate.weight": "825b2d50fcce3dbe6a5d8d8a50a95466f83ca4a10343efe67894c20b4628fb15", + "blk.16.ffn_norm.weight": "3bf6ac90befb0e17e077c8ea9454a8485a30f89f2d761ec7751b60c90aed1af9", + "blk.16.ffn_up.weight": "9fbdd08739b32411f5ab0252174d386bab19eb0b17884862f760429b7d41d78c", + "blk.17.attn_k.weight": "4033398718bf3674830ed1b73071ed8482b6dd4ef27f31a6c5fbb998321b6c07", + "blk.17.attn_norm.weight": "714f2e8ac9592966a0f1c02ee979eee8f84586405b992e8ee9543e840199ffa1", + "blk.17.attn_output.weight": "b6bbb618597d767b8f535117be68f92911e4a71d4eb4d8b5d943444151445ece", + "blk.17.attn_q.weight": "b84a0dc00ceb515faa2628125dcec502eed923077b21cfe900a4ff16c2e5f9ed", + "blk.17.attn_v.weight": "4387c7d6a17da9cc7a6bca8f4a75618b20407d570792056283a8e93b6ec65f18", + "blk.17.ffn_down.weight": "47db95c6f1e12b399c3eaf9ddba261782dd71173dd163b52af96541cf87b5196", + "blk.17.ffn_gate.weight": "59abaded0aedfd12f01df81f7a811e84db6a227f51b60abe9a247ca726e87392", + "blk.17.ffn_norm.weight": "b7e86445be5c7b722e01ddb98d5c7527ca86cb827ce0354f2c269e0f2558751e", + "blk.17.ffn_up.weight": "8e31c293bac649d2f60da4b3fc4a3acdce1111ec6058d8805eeeb242443011de", + "blk.18.attn_k.weight": "5ce762ab7b032511c131df81093b587871718c7097f79d8e07d707571f18a47b", + "blk.18.attn_norm.weight": "1f52cdc7af1f4dc1f0ef6ad1ad02e18cda32133654e57cfa9c72ada9c0b1d995", + "blk.18.attn_output.weight": "6486957f30bf8a88516e25772c6650f98b13923f490a2865a8752e36439d1cfa", + "blk.18.attn_q.weight": "93621c8abf69d2ca29c5207180eb628fb2b544d89de6c4a7fb0699be95534899", + "blk.18.attn_v.weight": "11604083b5a74828ac1d226af015ad5dc0215a1fdca44fa7131c2163c02d8156", + "blk.18.ffn_down.weight": "8f9997feb94385f106915df810239c9753b31efda2bf14bdf18a9fbbeec8233d", + "blk.18.ffn_gate.weight": "427c213b3a4e94af703429daf2f65766f70424d8230c123e7e712a18bceb5ecb", + "blk.18.ffn_norm.weight": "c45d305c4ea6a54013ba112f12dafaade064a32cf01317373464a3618d8ba44a", + "blk.18.ffn_up.weight": "a2811f2e73ac9eb9cce91a21a454e84e230a155244e2cd73f2c12aad3c9b8cfd", + "blk.19.attn_k.weight": "b2daed159925eac58c291e2f1e2000beed21002b03c9e1bc7e7a52e22240666c", + "blk.19.attn_norm.weight": "6307306ede2ab5bffa1bcac3f8b139354678c0376b1d9f5530c1fcb4268cfeb4", + "blk.19.attn_output.weight": "ebb98218b2a9c84d3fb6baeb02c5df264b7ab80d994d1098ba1cd47aa398effe", + "blk.19.attn_q.weight": "4f10df2ad09177e7528e9456039b670d07db22940a49417101b725d239c16724", + "blk.19.attn_v.weight": "30f1efc5114badaeaafa91fa466dc7fa14b1616db433c6f563ab851f7333a5dd", + "blk.19.ffn_down.weight": "be5ec7fe6b48855cd0015b0e430d1b70c620de87a7ff188c7c1afef546d7b6bd", + "blk.19.ffn_gate.weight": "10dffea4213881f8a9b583ee0fd370e033756d32255ed15053f794375b9400e9", + "blk.19.ffn_norm.weight": "e75cd24ade45dca78fdb0cbcaaa2d4a17d83a5a73dcc94ce0ec2d68fbdb2a881", + "blk.19.ffn_up.weight": "63e81bdb951410ffa81bcfba1b94a679ec9ebae59cd1623ce2651ed5d4c78bfd", + "blk.20.attn_k.weight": "c2fc5ad39e9bdd45e73c6e54aecc474388d944c4be1ee1921b7fcd035bad02e0", + "blk.20.attn_norm.weight": "aaa9169171937bdce20c1f057e94e9252f221cabacf1ced12e11b9586f23d308", + "blk.20.attn_output.weight": "a9f4fb496e4bc053e3f6cf2e72e22d4cd2b545ef6c32f7e782c2ef6ebcc21d4b", + "blk.20.attn_q.weight": "5a07ac619ed251494170b213921ef3fcc4c2712839da262516d9d5b8ea1ff185", + "blk.20.attn_v.weight": "d6689473105d241eacb17f09f06000ee237336916cf5ec4f48271c5b41bcb8e7", + "blk.20.ffn_down.weight": "74be38db51df736f26ede7c6b52ea787e385f181cb66231e2cced4556a25c9b8", + "blk.20.ffn_gate.weight": "ea91e06dc3d051c0ba0243b5a8bb40edbf254eadfb54fda7247e05cfdd88cbe2", + "blk.20.ffn_norm.weight": "5fbd357b3d6f44a7a91e8a4fc246b24303891b7957e0f3c32818ae5dc16ddd8d", + "blk.20.ffn_up.weight": "fe3290333e056af4ed12942ac72aeba97a6b562e2db05e79cd35dd07eab5b101", + "blk.21.attn_k.weight": "201ec6ee95f06ea5eb80fe86fd07bd016d3ae9ab6abd25d631834414e14a010e", + "blk.21.attn_norm.weight": "ea8154f93e06485828475a00b98cc397ac84768dd70e06ecc0c075b5712d7276", + "blk.21.attn_output.weight": "9f8af74d531478fd304723fd8e4e01578db598441b80dc7c960cb801dbbc501e", + "blk.21.attn_q.weight": "277de9953a8d3cff894ffd06c15ad0ee1407e319df0c1a693d4f45fa9c74ac7f", + "blk.21.attn_v.weight": "6bfdc16cfb898909b7788ddd39dd04b928f31d6732772195d53c558004638dca", + "blk.21.ffn_down.weight": "173877146cb94801157796ee9e5eecf3f46acb3b5e797f90b83a3fc22395eb30", + "blk.21.ffn_gate.weight": "53146713e2ca1be80496024077a028f6b6d749b02e71003c349e113b436f48f4", + "blk.21.ffn_norm.weight": "b28b97e18ab20a5c553ba422f7d7f6014f5902f1d62a69abd20d9fe19a5f9462", + "blk.21.ffn_up.weight": "5c39d0ac4d602b8ec8909dade93b2efcd6b6d9d84a19b252d76bb66dcfaab87c", + "blk.22.attn_k.weight": "01f26272c82917a87a3ccf922fa1d521a952b05de878241b7efe3525b617ac87", + "blk.22.attn_norm.weight": "5ffc96249d8873b506e9eb7158bdfd07fa1429e53c1951430ca7505d25f11c76", + "blk.22.attn_output.weight": "9c2201569358f720244b9c9497e4da02585a167b1414c8a506b85ad75ba990d0", + "blk.22.attn_q.weight": "906036eb4ddf027f6d920f9356a6a2a5e529b96f4e1231a0496d46b4434a5842", + "blk.22.attn_v.weight": "30ede8b0d166003a4b8a81fc99437f557719fc36e5c4dd510c9f161f36a47e73", + "blk.22.ffn_down.weight": "d04c164beabab30e1837b843e18852260efccfbb9d96a34ddd816e6fb3ba23c5", + "blk.22.ffn_gate.weight": "19c889db6b19179f0a62d5981a1506592c65de83760d67afbe00d202202750a8", + "blk.22.ffn_norm.weight": "4885eff2d851b32dbd306bd632c725857e6d164f0fa8b3d5857e572e6ef98ee9", + "blk.22.ffn_up.weight": "365594d8db8e95cf87cc33ac23947942dc326110175cc8ec5a07b5c7059089a7", + "blk.23.attn_k.weight": "badfea1569da0fc6ab817c5727ca3a69b07d9cfd622fb8be5e66678d5b3f7ae2", + "blk.23.attn_norm.weight": "8968f78a379ac3ca5458b4ed4251e8d9112aca6d6dd1ef6440b4bb0b380375a4", + "blk.23.attn_output.weight": "93e43393c03956287b1fe31e9735ff1cfe84f4ae56b83dbaebe96275e4e11831", + "blk.23.attn_q.weight": "aaff73c725a8700ae66bf26ac8869dfe96738eff23a8ff340de2ab53400a5795", + "blk.23.attn_v.weight": "3a86a8dcf14a746ed1411f5a7e634064bc4dfd6511c24cfeccfb2c9ebb6b4101", + "blk.23.ffn_down.weight": "d4da6f37bd7ef69bb203f7b0dd59f50bce37432c70627e6cf274ab81548af5cf", + "blk.23.ffn_gate.weight": "5b6072936c4a693923bb4e3d1473fd45545cb02fc07799aca458ef0449a04061", + "blk.23.ffn_norm.weight": "cd76e37025f84773180298ddb15e0d4ba9cfc7d832e19c791049daa47c6d9c10", + "blk.23.ffn_up.weight": "cde43b99b83124a13b2e4753d12674b3a61dfb34c04703007ced3e8e2aee1801", + "blk.24.attn_k.weight": "457379edc4cce4cbbe107385079019bc922264fdfc7bd1d1ae84343a81460c66", + "blk.24.attn_norm.weight": "0ce0dfab2edeede5da419fa7833db78e36222cf25c358d08f3ec664310f031fb", + "blk.24.attn_output.weight": "0cf91c2fd40c204d2fd4b9c85b69281e5ad4ea8442972fcd44b5fc8e835ffdf8", + "blk.24.attn_q.weight": "87ede30c09eafec6a4e6285674c1bc4637140b168b2da4ed34f36fdb6e176cc9", + "blk.24.attn_v.weight": "4c0b078b2798ca35d6d2c2258fe499820d2bc88700654ba4016e4b028f563590", + "blk.24.ffn_down.weight": "cdb8540c32b1ab988f984484928d39f6841f2131c1cebe90ad9456737fccbcaf", + "blk.24.ffn_gate.weight": "da2e0e913648b5526bd2bbb344038dd067639343aed3b413662b064b0db7556e", + "blk.24.ffn_norm.weight": "8940bd781c610d75eb2be63cfc8d869a3af05e53c963dc7fd4c6f653df5a80ab", + "blk.24.ffn_up.weight": "90cbac2a58801abe11ed6c24560aa4acb949f79429f2aa8ff129ac05868bb87d", + "blk.25.attn_k.weight": "90607131e36998e990ce718ad05cbecd1bcaed010931401ce6baa3b0d93ebce6", + "blk.25.attn_norm.weight": "fbf679c85656c04a6cf8fedd5412c1ace22960e6c2d47f2d43997827811fbb97", + "blk.25.attn_output.weight": "08412724ee7a2086514406e6f68fb9f622e10bac25b0c373b294709f4b09bd2b", + "blk.25.attn_q.weight": "9c1238e98a2747654a0d4371d3e7ea8b979867f609dc42482544f25591e85c7f", + "blk.25.attn_v.weight": "a57796a535c6cb09581cbafd6a91dc14adc8cca2a2465a7ffd0aec546cd84074", + "blk.25.ffn_down.weight": "f7e34e8a6391b480da08b52640613ccadce268373934b409759743a1735b74d6", + "blk.25.ffn_gate.weight": "b8d0b2f4612678b5ce42bd4a683f8024514b75fb5ebf6b22c600811e95582ee4", + "blk.25.ffn_norm.weight": "cde1fdba2369d315f3c6940a997c471ec891924e642505db580d732763bd7b75", + "blk.25.ffn_up.weight": "72e700c32ac8b9c47559c2222e45888a480b527ea512075423c5dc01678e2bb3", + "blk.26.attn_k.weight": "6ac83b3414ae75bf3a9055c32e49d2c40fe611ab21f8444f03d2f465d18122c9", + "blk.26.attn_norm.weight": "55f9d6dc9d75973dc75136ecb9d991b4398097ac133070873fb96ec76a6f60bc", + "blk.26.attn_output.weight": "ebc4fcbd15b33263e50ed2ad45740867cce15bc90e1216623babcb1820734509", + "blk.26.attn_q.weight": "080f057521073e412936fe3fee64fd574c8128fa4a148b879d3e598fe4954581", + "blk.26.attn_v.weight": "0fa2830d6746487ac91b243716e4302361f891e4e008eddd14abec47c7809d5e", + "blk.26.ffn_down.weight": "cb2ab8af1653adc57111ada49d2825c6995e338c8208455b92de10e580f60f31", + "blk.26.ffn_gate.weight": "231ce30966086bce2dc0e0afd34a22a1958cfda7a57c41b3b8e9444c5dfde8a6", + "blk.26.ffn_norm.weight": "35d959d25d17b00617590f5d5831bf705c385c51e46297a14375a700effca6af", + "blk.26.ffn_up.weight": "367680c8d332538b467d1ef87cfeb36cc5c6af564c5023c5fb50e728e3438287", + "blk.27.attn_k.weight": "0bfcb351c6d17aeac5b55a915074fbdf00f11c4bda98babb196ac8804805746b", + "blk.27.attn_norm.weight": "5d598a88c2e75ba59dd7ba4fee940bdec92d72038f1286536d2dfb71d008a09c", + "blk.27.attn_output.weight": "23a9da7347336479f6a10ded14cb3f46e06b5bd56dc4b0fbc526c688552ec840", + "blk.27.attn_q.weight": "b83319dba9055f069208e9c9d66da08bc6874f23e575288fcd81697d1777aa54", + "blk.27.attn_v.weight": "36ed34ccb2f36fdf16b2c2dd225a98ea6b7b0e376e7791191136ccd7bd7a4add", + "blk.27.ffn_down.weight": "5488e1d3a58c71b5e9ddda430540b4776b268cfe1457cbc1c2622dedd9e4526e", + "blk.27.ffn_gate.weight": "4ff48011ee0bac39af704849d9132a2410392c87a509c684f2062f6b76b498fb", + "blk.27.ffn_norm.weight": "32afe99675983da3de2961d1b5ca41c98970a356823597fe29e91f6e86abf0e8", + "blk.27.ffn_up.weight": "1eae3088a75629571fdbf6a20f141bc2bb2ed3f5ba2b9fd1d949f80695e442a1", + "blk.28.attn_k.weight": "c4e80af714962d6f9040d2c09f316f4a1cbc3a2e994e19902d7c653cf3c73dba", + "blk.28.attn_norm.weight": "c1ecf85dedc1c83d5d402bb7c94fb8b9c11f1a3e5f64e7680f80912d4a560794", + "blk.28.attn_output.weight": "72ba47c061b21f5ebc5213a455eaf6fc49c8f8e04ff9ce37e6ed4921b629161d", + "blk.28.attn_q.weight": "c4abc47234307f44b8ca789aa6668e298158fa4b459b2c1e84bd581806591cc1", + "blk.28.attn_v.weight": "aeba950799d4950e491ad0fcbe30334e39b8975177990a2cb339031c45ac153c", + "blk.28.ffn_down.weight": "4e84ce382a37b994fb8608df451a60040559e3f4f3241c3b3cb8989a3ed50d83", + "blk.28.ffn_gate.weight": "04df157acdc8e8534ad60acc2d2a4dd3a7a6610f6382535ec728994fa6f83f83", + "blk.28.ffn_norm.weight": "4d0386dae2bd1c1a9d0f9730718333e3a486c3bc6a5c5d482193c75d39832c80", + "blk.28.ffn_up.weight": "fec60bb0a3daf182a14bd8311fe6dd1e3fd020c5fc273e2549cdb1a2d6b79b05", + "blk.29.attn_k.weight": "b0532a263aa5a4e2a7a80adc83fc5dec974493bd18da7f953e7ebfc3f3a19aae", + "blk.29.attn_norm.weight": "593fc3b4000c35b7a59dace09ca1756c08be0105b2edd354a0e1c16c82898859", + "blk.29.attn_output.weight": "315b896f9f0cbacd0ca8937384c3a3a227efa908cb8c3a9125ec00c480e32b9b", + "blk.29.attn_q.weight": "d482d45386d4ad3394f08e9dff233ee3a70d0427d65c0b8fa05905da7e25ca53", + "blk.29.attn_v.weight": "cd3b5a6e2852da796902930a6a84bc87fc6a7c7bf51f8fc23758d12a39013b36", + "blk.29.ffn_down.weight": "5b3dba6f9753bd1b1ebcba65ef5373dd62c38e755c44b7231b95d93d45761f89", + "blk.29.ffn_gate.weight": "8610d9d2db15c256243ffcca3ffd31786d0ada0af0e7c7aa3fd20524370ab036", + "blk.29.ffn_norm.weight": "1a2ef2d38b7ac3e51190b9ccb8b6552ba83ab290e523356a7f851ddb35dedca2", + "blk.29.ffn_up.weight": "a5fdd15811bde16dc27677cf1a4c97daab4c28cb12a9530f1a0e573134fdb69c", + "blk.30.attn_k.weight": "1efeb0b5f4b45a85cdf47300f892ac77ac1f38000ec3653565d1303d1fb8c743", + "blk.30.attn_norm.weight": "c73934c182c7fe80838ec1d0b92f50a583f75f7a3d78d822f009b58ad2c80e65", + "blk.30.attn_output.weight": "3a0fd89de2d274614750345d827a9c886a4f97b343a13cdf680390505df596a3", + "blk.30.attn_q.weight": "711e113362bdb067db843c66236704eb1cd3fc5f40e3767143e96d510686ef4e", + "blk.30.attn_v.weight": "82b12a9a74fd3d91b73cc2e841e2b3f0a5197ccd2998afa17020995f880d2267", + "blk.30.ffn_down.weight": "af9f4b1287c0d824ae22d6e335d19e04a70135b835be7caa2435f1d85e931993", + "blk.30.ffn_gate.weight": "e2ab3e6f15f5c50fca66c084cb6a57a2b6b82406d65150e82ea0437b93dd9a46", + "blk.30.ffn_norm.weight": "c1b9c325c83f00e177386a4d7e769945f2995e60950c4a576c0a2c4ab9703d04", + "blk.30.ffn_up.weight": "9b94a21efd419715d82071b490d3b635cf1e8da080620dcc39e5bde976d7e9a6", + "blk.31.attn_k.weight": "0db0d82e3ddcc2c06209f5f013e1d72a84a996c40bf00186be485b909cc268e8", + "blk.31.attn_norm.weight": "2b8b7239471f57140c5cdfe06bd224a4f6326282f99736e44fba4c7b120ac101", + "blk.31.attn_output.weight": "a310b048840cc3ff2be4b84796340e8e2cdf05ec89d14bd3655c109b2bfa9fcd", + "blk.31.attn_q.weight": "f45e0cd95645175ea82813455356d171838539bc3f7676d877c698f2af0a0eda", + "blk.31.attn_v.weight": "8bde008e809112aa7e7c23e9c3099087bcc557313b01306c87efa0a4a30805ba", + "blk.31.ffn_down.weight": "8266fec7e203fbfad7033120861e44984581ff8b6851d01dfb7b81c5d8fa90ec", + "blk.31.ffn_gate.weight": "b73bc0aa5baf006d9ef6403104891b8133671b0992398fe038380b67e0d7e2cf", + "blk.31.ffn_norm.weight": "9c62cc27a7b6017c1df8ad49bff249a8245e8895c6754f402cd44623fda83268", + "blk.31.ffn_up.weight": "5b970a4694ea3171a0167f6e1636d9f00268bc1c9640430ffc35218494884adb", + "output.weight": "74fa0ef08c57a30e633e7117b1e9c805f833e2e5e21434bc79ddf9c92c6d7330", + "output_norm.weight": "59b8a59fd3fbf39353506116e43e5e76edd0cbf2a2873d869da4cf27a04997c3" +} diff --git a/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json b/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json new file mode 100644 index 00000000..a1596532 --- /dev/null +++ b/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json @@ -0,0 +1,348 @@ +{ + "general.architecture": "llama", + "general.file_type": "1", + "general.quantization_version": "2", + "llama.block_count": "32", + "llama.context_length": "32768", + "llama.embedding_length": "4096", + "llama.feed_forward_length": "14336", + "llama.rope.dimension_count": "128", + "llama.rope.freq_base": "1e+06", + "llama.attention.head_count": "32", + "llama.attention.head_count_kv": "8", + "llama.attention.layer_norm_rms_epsilon": "1e-05", + "llama.expert_count": "8", + "llama.expert_used_count": "2", + "tokenizer.ggml.model": "llama", + "tokenizer.ggml.add_bos_token": "true", + "tokenizer.ggml.add_eos_token": "false", + "tokenizer.ggml.bos_token_id": "1", + "tokenizer.ggml.eos_token_id": "2", + "tokenizer.ggml.unknown_token_id": "0", + "tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676", + "tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e", + "tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6", + "token_embd.weight": "1d1d1d39a867d5a4bfb32792a47247d2638c10c95a6259391d02843583505cc4", + "blk.0.ffn_gate_exps.weight": "2e5cd43ac3f26c44f071926ff6c3f239ecc52a34bc9a5b5906d3d4c1bf2fbbfa", + "blk.0.ffn_down_exps.weight": "a4dfc7e7c96e7402eb70279601675b956bb7331da8101e63fe5c0a611b6972e5", + "blk.0.ffn_up_exps.weight": "2d5d87b378b2319c344ed2c642598b6f7cb6beeb582a8ea51abc9ae690d473c3", + "blk.0.ffn_gate_inp.weight": "a46aaf5aba7401ce6e41f158242b4879d34901661f3ede85496cbd0ce79d6314", + "blk.0.attn_norm.weight": "3fe37d913bdd2b65076bcdd6efe64a37b0b03cacbb1b80b9f7089068aa35f38c", + "blk.0.ffn_norm.weight": "5e14308a3c894734eb204c8f558bdc817e94bbd5b4e9cb4094e91ba388c8f7f2", + "blk.0.attn_k.weight": "73d943dcac0911e87bd771f4aa1c901e1bfe1aed293af06e1a67812159859f67", + "blk.0.attn_output.weight": "4c5f754c855e262e8d4c94c6fbbb57af06399dc0e170d7d99a1a17fc9aab9227", + "blk.0.attn_q.weight": "d6fd7403c873d49c05f6f03208f30d99ad34cb3b71c9990c47334d502a8e4c7b", + "blk.0.attn_v.weight": "cf17cf64b2d683bd9de6cebaf60e5c264df6fdc38fe719dde9d54c80334f6366", + "blk.1.ffn_gate_inp.weight": "0d524de81cd915816b4e714bf595ad6946a9130b3de731cd89428b2781230809", + "blk.1.attn_k.weight": "2ea47f412992b374c70674730fe84700e0c8cce177086ce9b6635e42408964bd", + "blk.1.attn_output.weight": "b4b2520794d54113e86c8ff678eacfc62e35be4395a594a6c8c22b4383ebcc0c", + "blk.1.attn_q.weight": "5db930c98c4f91f6eab57eb974c72210b158e366d23d6d2890b2759c053bee33", + "blk.1.attn_v.weight": "079bdde09668394bf7af9f8bc175017b4f48f0ab64e6dd855a4d7561d1693c0f", + "blk.1.ffn_gate_exps.weight": "146a62de19f9ab093deb101f9640534ffc3dc40d69f508be12fc0475d01b0c7a", + "blk.1.ffn_down_exps.weight": "949da94a3c0f375160672a979e85f7def284264b10d48d038238aad5f5ece793", + "blk.1.ffn_up_exps.weight": "7016a3f467d9e3f2f4b4019579ed86b757469cd367f2b225483305376b4bb3c1", + "blk.1.attn_norm.weight": "1614d1e6ed537737275eb888666c7bac533f4eefbe73dec92b591045ca9e1afd", + "blk.1.ffn_norm.weight": "405a455fa7d1ec36894652ceb554bbcb09a07fd6405f42741e66dc4a4665c19c", + "blk.2.ffn_gate_exps.weight": "90d5003fc7421f44220c0842d43128955e91488f6f785fe570b62d81b719e964", + "blk.2.ffn_down_exps.weight": "ecdc2b5a8b504ef0a7833acff47d69b0c1fa9c22126de1bb120ff5e48c3d6e2c", + "blk.2.ffn_up_exps.weight": "2cbd9485a32460d315eb50a2f3b00863fd77245bfe885b7565efac1cdb1f191e", + "blk.2.ffn_gate_inp.weight": "0d0a17a1a2c7a61f2cca49ecbb479154dc93a870873257bc4f225e7607f2e2c2", + "blk.2.attn_norm.weight": "b2e4c5a977f87a6f880896bd73596234c9b83622fa0d7add5892501e3155913c", + "blk.2.ffn_norm.weight": "0ab875b4280afa922376cfc7b9aa3f7071c9432ea1254091ce7de3749df0e8e6", + "blk.2.attn_k.weight": "bb884af51fb51550acfef54ccf1b58ce8284e587806e6a2f88c8265e1ad05a5e", + "blk.2.attn_output.weight": "0f03099ba1ef342ea61af9cd71d028123bbd8b1dd7d7fd9b509aef77815427d9", + "blk.2.attn_q.weight": "8fad0d29eb4c9d24e564774ee3316b9eb7a4c4985e4567111d2c836c830f6cf3", + "blk.2.attn_v.weight": "fe04c847ff677632401a94e7b6b6fdca60391ab21cb23bd791533115de6303a1", + "blk.3.ffn_gate_inp.weight": "29e3aaa724590c070e614af8288939603d2641b0ef11e8c0f476bebb2776673c", + "blk.3.attn_k.weight": "231cc5631def10f7f292d8862d6125ff555164cd70480ac76362149fad204497", + "blk.3.attn_output.weight": "86467a605c62852e05fda1a7ef43150df2cf715fe59785dbcba09f1c27cfa086", + "blk.3.attn_q.weight": "901822402453922225c2d6ac79616691d48217635d5ff7338daa971d5ddee210", + "blk.3.attn_v.weight": "27030784f44375720df2f090933645a31a022d3fb3b14573e5ca0b78f44070c1", + "blk.3.ffn_gate_exps.weight": "231ba59cc0b988d125d77bf627aa3f04636684870af88f081f3944b48a160d86", + "blk.3.ffn_down_exps.weight": "530c3ab44ae4d66e8afa4d10c153ba5dfcdfb7321989a988e62e9d12e7234625", + "blk.3.ffn_up_exps.weight": "b85c2d4d9d11332e702b3c0a6610d4f525f9a93e5d12f5c7c55c592c40755e75", + "blk.3.attn_norm.weight": "05dbb6d88cfa6b199f9d705ccbda97c0ef13f9ec875c595398a1a42d009a4555", + "blk.3.ffn_norm.weight": "6880b1c27d46969ce36fac049c05dc8b89e4bb47dc89df357e32df7e18fc512e", + "blk.4.ffn_gate_exps.weight": "a883b4f225b760c5a2f6605dc5e2167ab85bb398c70bf64ceb539fcbd6128dcd", + "blk.4.ffn_down_exps.weight": "d291bb656aae77947d4b525e2819bf4112afece53ff31de9dab999af1f65f9c4", + "blk.4.ffn_up_exps.weight": "38592afb8ba3dcfb26970f906174f7d3fa62da44fa4be4fc6912a19030ea9164", + "blk.4.ffn_gate_inp.weight": "1596cb74e8fd6c3080b937b06468bb397b0dbb661e6d180a6bcbdc43e8bfd0c6", + "blk.4.attn_norm.weight": "f90c83c5ff4366281d283384efc941620542b9cfdea160d678dc54a75e33f758", + "blk.4.ffn_norm.weight": "d28d8c49d1746b7cc085562d1074905fd14023844de823dc4fb22202bb280790", + "blk.4.attn_k.weight": "792bbf412cc357140fdaba543e547a9b2f7582919e307bbd9a80c7d6d8f5f1f9", + "blk.4.attn_output.weight": "d98e4a062d2631d9c315f1990d5f6ca9a88e7e0e46387f611ccb0353f876aa12", + "blk.4.attn_q.weight": "1a11a55a91d9f748a72176ff6b1c174844df406e00d1b66b9aa64dc6ee4bcd1d", + "blk.4.attn_v.weight": "04cb3c02b12a6313c7ac7044513441083d534fb4c5a3f63bbaa58f7edbd2fadb", + "blk.5.ffn_gate_inp.weight": "cbd5cdf015d33a2da6703eb74c22fcb97581fb9175435173b6dc4f9e8364320d", + "blk.5.attn_k.weight": "4fdf3405e4d657403f5647b51233521310ee984b4b81bbcd901cb3e6ab76b7ff", + "blk.5.attn_output.weight": "4a25662c46979a29600ed77e1907cf81fb16ef30e724c155444e54ccb76af481", + "blk.5.attn_q.weight": "e2acb30e30b97300039bb20ad0878f05159d5657fa811748a51d5b6fb35d631e", + "blk.5.attn_v.weight": "306504b6a26aa123c63dbbed3f4ced0ed2ee8fb6a30bf0093539b817539f5ece", + "blk.5.ffn_gate_exps.weight": "7e34df9b9944dbeea5e8565786d3aa6937314a4b87acd4d0874687877c5a39fd", + "blk.5.ffn_down_exps.weight": "c4b7a57a42b5ac0a8ae27dcd5cb2646d7a7cc7123126d44a56ab128e85f60b13", + "blk.5.ffn_up_exps.weight": "09d47593b6dd6c664a9155bff02fc2eb7ac4a70219a88162d05c802a01d3c6ba", + "blk.5.attn_norm.weight": "58804a036d6ac4c1fe357b8b6a97a5c37cae1c2f06ee0086c041d449c1c6ef6a", + "blk.5.ffn_norm.weight": "d872dee6789f0826211aa46ca9d0869e3e96bcace9e77d6559a7b6f3e524f3ca", + "blk.6.ffn_gate_inp.weight": "fb1eae732e974d6c1d020a5b4ef98c5f33016f984701bcea656f999a99daad66", + "blk.6.attn_k.weight": "55e9c59c5051ab5519b3a7962e1b5fa96a3c0251cb6200dc2f177885ad2de470", + "blk.6.attn_output.weight": "f3c834a8d0027370350e2b6294d95434d31432e57be6313b013c15a56303d61c", + "blk.6.attn_q.weight": "efaefe5f11c2140dc7cb532b0832c2a0b363a165cbda21f00fadae77efca377b", + "blk.6.attn_v.weight": "900bd734d75616d846a90a121c97e081c956a3d1ab012f66dd0bc62c43e1ec3c", + "blk.6.ffn_gate_exps.weight": "312a99661b1468fcaed2474621116f1681432755e973f3ee79d01912974fd424", + "blk.6.ffn_down_exps.weight": "ac9cd7db67a2ef0d2b5def86873673d05e48d49d147dd944469dbb8e2d4c46f6", + "blk.6.ffn_up_exps.weight": "57613e7e09579400a1a09fee4445acfbfe83f2f327fdf317877787d96ada6b84", + "blk.6.attn_norm.weight": "0e8801e09885c633bc01a9a5b85d4e878d30158a4eb41a937dc5b760ebd044cb", + "blk.6.ffn_norm.weight": "b8c58062ac93072f878446b0e7f958c737aa47fb769fc3a8f593133d12db2dd1", + "blk.7.ffn_gate_exps.weight": "1ef611732ff13edfa8d30981ed9dac00c15ceba9fc012ed0b199e9280a849948", + "blk.7.ffn_down_exps.weight": "856c6811945c7b0fa461ca17811cfa43436b4cdf5326bad23cbc30883486d7cc", + "blk.7.ffn_up_exps.weight": "6725e3e33994302ee13fa5ec163631ce2dcaa08aadde8fc166c2265d4561c5c5", + "blk.7.ffn_gate_inp.weight": "36b49d7f80c1003dc392b2c1b9960cd49889dd69e77b26b9e4b13d01f3d0a32a", + "blk.7.attn_norm.weight": "7a0ec49acc5e20ee71c6f80ca02f4f1e564c485e0ae0621309e7c2eb0c616cf0", + "blk.7.ffn_norm.weight": "eeae035c39ab6e64bc06a4baa1bf6e50d4c8b8797cb0ad8abd48be86974802c0", + "blk.7.attn_k.weight": "e8f78c1def01a7a38d2d9bf7becb17755e28fefe4927856f7890fbee52840187", + "blk.7.attn_output.weight": "5367f05ac3bb49ef8745ba5902e1bdd4442415a3ebff2c7e1a3918d7be6fe948", + "blk.7.attn_q.weight": "37c95fc5acc55a4f6e5f02cab9be60e4fe54c08b65f98f4455741b4aa542ff4e", + "blk.7.attn_v.weight": "c89f1343486ba55814233511e94090f7365662a8a4214aa4c278cdadc79196c2", + "blk.8.ffn_gate_inp.weight": "4e239afe8c7afb8de3a005757c887cf14b1622ca2d224227591cb0e5301f4c17", + "blk.8.attn_k.weight": "2ad0229f30fdcc1e85ce64e00d8f75902238294844a81d5af43e14ba75c02983", + "blk.8.attn_output.weight": "2e44a4722acb3b521b81d0b910f8ca2f6c286d874a92ddd02150566454061699", + "blk.8.attn_q.weight": "1cd2b09cb2f43e08de776b5f7eac197a5a6d4ffdfd52b21baa36319450147bd0", + "blk.8.attn_v.weight": "5a22c57ebfd33ac500cbcfd321d5b5b1783f8728801db6f3f8bed51c7183e4db", + "blk.8.ffn_gate_exps.weight": "91063fe56cb4f3ff3b41052bb5046fcf8ef61516a603ee90aab893a9d68c15a7", + "blk.8.ffn_down_exps.weight": "d4c3abc8f1d1b462f67f70bd8f404b3fcf45dceeaa8527fa120527254c383c90", + "blk.8.ffn_up_exps.weight": "76a1a1f08ec577716a2e7027b45293e9205751126424f1bebe1de89c78f087d5", + "blk.8.attn_norm.weight": "f980d774da39eb76c52358afac3e38cb4c81cb323deaabbe5c41822e3f17a98e", + "blk.8.ffn_norm.weight": "1c937658cf90f1a85db9a5f26e077730fdd4b694607dbeeb825c5fb2bc407e0b", + "blk.9.ffn_gate_exps.weight": "a2532471ecb7896d5c78e5a34e10cfaf4125265e1595166c8d0d0dfbe2a3187f", + "blk.9.ffn_down_exps.weight": "b47921a28412d48fee450b8b9d97cee42344a2e69f06d407fd9523d7adf13333", + "blk.9.ffn_up_exps.weight": "7c461bd1b2a73b439cff6a10d94afa01e8b06f7e6f09d9a6f28e3876aef48bce", + "blk.9.ffn_gate_inp.weight": "1648dfb08b5c06d7953a5a97ecb764995fae9487fb729a1c867023b2538149d0", + "blk.9.attn_norm.weight": "8635db0f299882a63b7cfcd1d4259c9e53fab22c31d3d054de36b1001380b31b", + "blk.9.ffn_norm.weight": "f9309aa323062d174c463613afef9b0a33501b510bfaa58a8e0e866d12ffef3c", + "blk.9.attn_k.weight": "dfe62030441e947a588512d18d9c6e4ed72c2f71c227d622c095e4263b23dadf", + "blk.9.attn_output.weight": "1977beb75c6349c50ba7dd3865d7c0a9c5c5ddc854413147b0eec98ac4fda351", + "blk.9.attn_q.weight": "eb132596719605cd6bd1782487f121994629e115190edd69240b12af66e734f5", + "blk.9.attn_v.weight": "9e708f15d332d7c5187b0693b1a977eb30a2fa10bf7df48ed9d7537c0aa6ed99", + "blk.10.ffn_gate_inp.weight": "97503a5d166c1925f9b65c0eed980753d411714d66896f3d0fad5286c7aba702", + "blk.10.attn_k.weight": "1ebdd222336bd25b48df1b138cdbe09021c4a5562ea7cb78cadd1255d2be3a39", + "blk.10.attn_output.weight": "5e98faa38e9d514b9057e1c8342c509cbe1083defd518e506f6bad89117d1f5a", + "blk.10.attn_q.weight": "3323a26c87d936d1dd87c577d0b763459fced726679612c874b3de5fc6d969c5", + "blk.10.attn_v.weight": "d5fa73cb56aca388e205f44455e4b4f676fdc12ed7fac4542fbb3b41ecea59ad", + "blk.10.ffn_gate_exps.weight": "225021b53782800906cd13b70be3a4161e8b300b97f984a959ccad6a6e8adcbd", + "blk.10.ffn_down_exps.weight": "f08eb91526bd22f5fd0402fe925d6141cdbb308a1ced0330858d0c85c71f5ef3", + "blk.10.ffn_up_exps.weight": "a9f688350c3b53eaada5103b5848bd9a3d7d6b327a70fa16c24bf28ece933eac", + "blk.10.attn_norm.weight": "5ba426c9dfc79805015ccd76cd1068b0ad3bb7a8453e14bb1d35486f122d8f95", + "blk.10.ffn_norm.weight": "98891d6acbc3986b2581b7a3af9f5946a392d9188972c6a8b15d4e745a4f2482", + "blk.11.ffn_gate_inp.weight": "b2365a60566e7dace892e1cb0e62eb73ce387352601723e847052b34874feaa6", + "blk.11.attn_k.weight": "0efbc1d1430505543ff71532a4fcda821aeac616ef6c1dca40e00d4f2ff70bea", + "blk.11.attn_output.weight": "3d5bd4d9a41236f30d4293edb9ae27beaa113ffb31b4fbfadff3a4c370dfd3e6", + "blk.11.attn_q.weight": "aa11e9db14dd9c77951511443077c2a1a78070753d7bd3d9811038473f69e325", + "blk.11.attn_v.weight": "5adc567f377aa11d1763d35f50e53fb2896a8b03b623ac36acc45efa2486d512", + "blk.11.ffn_gate_exps.weight": "71d07d982aabfab9eed3c733d49c20f023bf475368fc71db5084d91beadc4b47", + "blk.11.ffn_down_exps.weight": "9a06e61461e48b3925a9f7d9cca634d048c8b62163d7bc5c43e35899f959319e", + "blk.11.ffn_up_exps.weight": "bc05494d0dcec61021b3ac0c5bc1bf502736cadf48224e213bc139d562699a89", + "blk.11.attn_norm.weight": "a5758a10bdd0404ae1470e8e9db903985d4d07f60553c5001a5e7b660d4f7ada", + "blk.11.ffn_norm.weight": "814ae037563aad3771787316bec4806c95bf6f5991dd6474b4b1e5cc13dc18ee", + "blk.12.ffn_gate_exps.weight": "3a68b831ba1606fb9ef6dffed4732032447ecef23ea563ff4e79317586c7eb49", + "blk.12.ffn_down_exps.weight": "268b25e13f4b7beab08686e83705a41b21d15251809ee4784526f78a580da829", + "blk.12.ffn_up_exps.weight": "9105751a5b5b42ca2614d0456f24f779d2e2ac8cdff0f96842aa7ae2b70f341e", + "blk.12.ffn_gate_inp.weight": "d0de1558cc1d458c5c504f63ddc59785c323df7330474bb0644c346104b40a3a", + "blk.12.attn_norm.weight": "859a4c8113678e2e202d10299850e0cfb52eb11ea50bcbf4fe3ff39bdd394154", + "blk.12.ffn_norm.weight": "7fbf4c459c1760218877e9ee3f5ad49e960956a4369bcfe96c143f04ff9ddf97", + "blk.12.attn_k.weight": "0a7e254fdf3730a57372b6ff421a613eabaea68cdefd64800857941411318374", + "blk.12.attn_output.weight": "ceb763fc15d88af149d8fb78e82db2b7dab3aeae584af8cf7611a12356a397e5", + "blk.12.attn_q.weight": "a43402d23c46cb2d3cb3c2a98c81b19d10026b7e6742370fed6b2880b6e049b5", + "blk.12.attn_v.weight": "3bc24f2c0480ce91ef72993ee8f1cf962f7359e12183424583ffa1246bf3db52", + "blk.13.ffn_gate_inp.weight": "a6d68c82bfe66d8bab68f980f5f18268a9e2c0cd6b8832ed39010e0de198ae05", + "blk.13.attn_k.weight": "0166c39546b37dc2e01b2b396ba43e183f797dd04eaa51a6d103d8b58ee4bace", + "blk.13.attn_output.weight": "2ce5eb198deab9557475a58b69b11e9874b547e05c23f223c6e42fa35ddca069", + "blk.13.attn_q.weight": "745c1bbdf434284a7fae98f45e821c076dd9c2a2467dba6a9d8cf0041e419dbc", + "blk.13.attn_v.weight": "9ece68d5ac64d1421ea7aa32e1cff9cc1fecf5175f4c4da858dd31d8633e3337", + "blk.13.ffn_gate_exps.weight": "ccfdcb4670b131689de12d396a010b5ea737795cf5c15a14a304d720b3c7c899", + "blk.13.ffn_down_exps.weight": "8b8fb328664764f1aaa5cbdec336d5654e981e965a02ef622bde5f07ea1c164d", + "blk.13.ffn_up_exps.weight": "d2ace0236c2fb3365fdc85499d676a7f65813c48e5085348b1df1799922766ec", + "blk.13.attn_norm.weight": "1ed29d7d89ce52d7cb4d57e895ff7115430466e917136c049c385c030ed44e9c", + "blk.13.ffn_norm.weight": "a194fc542597a4dcfdfaec5e3cba2a2b2b21b21edfc87c39c0d7f7651355bc4d", + "blk.14.ffn_gate_exps.weight": "a625e3574e5e740e7f8e2f9c40390f2f382c720aab5b10534e298002dd8d1fb9", + "blk.14.ffn_down_exps.weight": "bc366f015b83c865946afd74c8a884943e0ea2c671314a0b7bb72f21a44d2f78", + "blk.14.ffn_up_exps.weight": "ee3199bf2086de77b49f57f487676be8ee70e102a2fb5a5ef8ddbbc28a9eff41", + "blk.14.ffn_gate_inp.weight": "2b437870c850fa2e2044d032bb02908af634356e37466fdae260b933e48ee8b4", + "blk.14.attn_norm.weight": "cd8344d193a1cbd42bd898e17f4bcb1ca0b2918420fbdafa9249a6f2b7f4ae06", + "blk.14.ffn_norm.weight": "70eec40374e558fed5b07257283cf36342b6b0129285a00007deb59c32c9f7c8", + "blk.14.attn_k.weight": "4053bdb507e0543d724b632570bac86b31707696d90a0db44c49b2a082e0d599", + "blk.14.attn_output.weight": "0182632cb0e06a07241b8293d25d109fbc1862e1e337d435f908e8681e2eb1ab", + "blk.14.attn_q.weight": "ffc7794a4c1b6f793c842dba969435330a7a80b9212e457b4b2ac33e68b41241", + "blk.14.attn_v.weight": "6411805292d528e61bbaad8f9aab9dd073529a17946c057fb06864fad9cf3211", + "blk.15.ffn_gate_inp.weight": "77d0744567c76e6abb67f81ba9c715b2b544841186d5b948309571eff213bafb", + "blk.15.attn_k.weight": "1f7957954ea4c6521c257b35a360e868ffa02bdb3de91f146d5e06bb4a545c98", + "blk.15.attn_output.weight": "d7809d36bd8d3342240c46fd87bcc7f9821a222f48d9a95e45ae50460265d3cf", + "blk.15.attn_q.weight": "25f509313ae4d8401b871904059f472a26f5714e7c791c725de77a1a522c976e", + "blk.15.attn_v.weight": "96fedf5a591fc0f020e6de10fd72ff12b3ef9cf70cd21dabaa0d3e7b06f54e73", + "blk.15.ffn_gate_exps.weight": "8f950d976b2fd9a3d213b84123cf114c1377efde9352767fb2ddee89e177c8ef", + "blk.15.ffn_down_exps.weight": "6fd09d1557bb94b06efbd4f6a1ca4be532a202ba290e9315bc8da3d12a5c4c4a", + "blk.15.ffn_up_exps.weight": "cbeb59ae7b0266a928dc7e3a6e70a9330b92f9ee1b17ee1ed91022108204a33c", + "blk.15.attn_norm.weight": "2005330911ac2edc7b6d27aca021c67d30d16eb632e49b1a13f30fdb2717aed0", + "blk.15.ffn_norm.weight": "0e9198f3b548eb78acc8961f2b3350d238d26cec110933ba753a8cf0035c501c", + "blk.16.ffn_gate_inp.weight": "a41d1f99d739c8b150c3945b6949763988d0c6a4c5a2b5855592ca1a48ed23d5", + "blk.16.attn_k.weight": "b624e2ec88c2d3047f60530fb87e72cb4a5e655a9663f6f3e9b09e5ad32cddaa", + "blk.16.attn_output.weight": "687759ea75e45108526ffc1573d6fdf084728079bfc2dc89b9979e76280f43c4", + "blk.16.attn_q.weight": "beff3a45c7e9ec82ffc6d3c701126be28654d10aabd747d03441210491fd31b6", + "blk.16.attn_v.weight": "43a349b13f0b9d040cacecd942bcb168c030fef8c75c987d59a4fce6c14e855b", + "blk.16.ffn_gate_exps.weight": "793406d6c13d727c82bb7b692ca98d65ca975baee69fc57be5378d77c5a19b62", + "blk.16.ffn_down_exps.weight": "9bad3dd150d0230404b7f886ac7ff8803225757e813f195cdb26bad245243b4d", + "blk.16.ffn_up_exps.weight": "7449d663023fea3496475bf0a9c1de7272ad0ce9adcb3265e8e424badaa674dc", + "blk.16.attn_norm.weight": "a424ce34c195a401df1ce37ac4f2794e8a6720b1ee8acb21428e2b68c65e0125", + "blk.16.ffn_norm.weight": "405a68bb8e16e1064df2de55ca3cd9ceddda1d9fc0af007a9bd7cad4b2676248", + "blk.17.ffn_gate_exps.weight": "97c6e5321491ca5dc039ee88da0eb0e78f347372785411809af84b3298cb19dd", + "blk.17.ffn_down_exps.weight": "1617ac19788a1be19bac69277408761e6bdf5719d63a8c7fea14d41cc27641b5", + "blk.17.ffn_up_exps.weight": "4ead1c365f112581c10610ea3f63d2a1474311d2503d2060fed4b458ef337f5d", + "blk.17.ffn_gate_inp.weight": "ed4b3393f2523f2b5e0fc7680a1caa2842e605728a529b5af68a7fa8d7abf940", + "blk.17.attn_norm.weight": "beac17ef86a7fb2b5840cc72f7a95a5e3d6bd24e7fa698e0b0ebb9bdac45c561", + "blk.17.ffn_norm.weight": "81cb58ec6d6dc02a0b4ede10adc336dc865fa76f982d4eab0e4a37b40f5b0fac", + "blk.17.attn_k.weight": "eab569e5ea8c8b05e5a6a209fba031129453c2e28181eee3e736b3b04b36bbec", + "blk.17.attn_output.weight": "f85b70f01438ce8fe5d10599b113f30bf18dee2bbae0657d3eba295870001db3", + "blk.17.attn_q.weight": "887ceebfbf6a2b94b43d2df4439ac3a5bbc29311d4b28addc04d525546032047", + "blk.17.attn_v.weight": "2df9414d65014c06a93da22ba3a668be7b83e2e8008e98d7771f7dfebed98298", + "blk.18.ffn_gate_inp.weight": "9b07741a0950fc667e5fd25937e33bc22e1f764f80eb4ff3119f005327ae0f6e", + "blk.18.attn_k.weight": "8649598dbb63938744c39bcda5ce8c31773e29c573be8d4d2c114f5030f8d3e8", + "blk.18.attn_output.weight": "f8e391adb92622298ca834d5d1eda48b69c3b1c51c5a584ef6c54a725c298d75", + "blk.18.attn_q.weight": "84bf8708a2eed618f48f69c178ed7dd11fa4c468102376e72e910ebd037d131f", + "blk.18.attn_v.weight": "31db3cd773f09548c2c1b1eac2718e46364a7810970fe9c433fad9d8de5397eb", + "blk.18.ffn_gate_exps.weight": "be2a2ba378002f1b61f86c273a69eede9b93786d5ce96b4fee1861f730dca4c4", + "blk.18.ffn_down_exps.weight": "d35196159e37705db50a5343e3989f7335477f1a4add67ef42ad64a638cd07ae", + "blk.18.ffn_up_exps.weight": "c6ceedd86e97913a6dcadc838e7abb762d629fb8dd55f15cf02fd9bd66d2ba78", + "blk.18.attn_norm.weight": "41f0b1ad83d6e3cb9fbe0d27878c2e7ad4a351b9f554a6bc9117c01745cdf6e5", + "blk.18.ffn_norm.weight": "96646204bd0d82f25dc77faba4dbd86b1332e449313e6684e00122da8be99057", + "blk.19.ffn_gate_exps.weight": "c6eb7f61e7938bda0492dbc05e51e8f631c99224fe18e99861fc4fc53ba9e9ff", + "blk.19.ffn_down_exps.weight": "4384803da3a3a3d44120d7dd192fe2c9bbd9a1a0cb492dbec1fdd7565230f1e8", + "blk.19.ffn_up_exps.weight": "22d73de2fbb8bb0f1bd2caf17fad8a355c47d914143f7f6e6d0128f66f074a60", + "blk.19.ffn_gate_inp.weight": "9a0cc4a2301a5634022fbce41189021bf0d1a961792d2d9330fd35556d18e5bd", + "blk.19.attn_norm.weight": "c5cc56ec5df9a1f7d5ad71fbda49f1433132e58895d45cb44c73420bd61ebd6b", + "blk.19.ffn_norm.weight": "77e17de741742ef2482fc7872fd423c8e3c1454dc4d2be89ee939084b6d78bc0", + "blk.19.attn_k.weight": "a92ea36ce2e3569656306aeefb835ccd5d1b03b33a86e0d3d030644cc923b813", + "blk.19.attn_output.weight": "5e2a912b37855f84ea964907a1a86d609cbdd79efa0c93c3e8e2fc07caf7c226", + "blk.19.attn_q.weight": "4ef3a5913292ac3c1a6fd3e9e53d011021f2b41d0276cf849706d1ca925cf7a7", + "blk.19.attn_v.weight": "42981b75b68ae852cee638b5433605c147da4392aaa6d7a06e756115b0171f39", + "blk.20.ffn_gate_inp.weight": "71381b9879a7c80b9f7b475abc0aa31b8cd71ccc00856ebe89764a2acb9df2dc", + "blk.20.attn_k.weight": "1928b7ebc054eb3967929ed6fb446314d5352f4aaf8b475ce55c6345019f2ea4", + "blk.20.attn_output.weight": "6071ecd9ca91af0d2ba93fef4a1a56f3b243dd70f862a21a2d164d56f386043b", + "blk.20.attn_q.weight": "002e95042a40f36ceed5829e3d0c8072e5f5e4ee86a089e2902b2348fed24dd5", + "blk.20.attn_v.weight": "42f509cdb1c0e298f89f896e349be86952c5168e49b3f83bb17badbcb7596d57", + "blk.20.ffn_gate_exps.weight": "a684a3ffe4b0a57c819a5fa9cb3521de223f392732927271e97ce925b6e33765", + "blk.20.ffn_down_exps.weight": "e3081a7bc7ba750d8a4886bc8ca4f231b55db4ca082b54b4106c7531964725cb", + "blk.20.ffn_up_exps.weight": "fad0fd5eca36ab154788da28be8ec25bb5d6db06c9d133db89e96df358a2f6a2", + "blk.20.attn_norm.weight": "c3e3f2429715ae95e884ef1246b0b461b23c5cc0ed08beecf70a14cddd184820", + "blk.20.ffn_norm.weight": "ff31f609dda65ca496b0584fabea6550e42edd05ebf229812aa6b7bb5ede15e6", + "blk.21.ffn_gate_exps.weight": "366f09ef0ecfb86808eb3296cc9abdb957951d27f6533c03f1422b54061da660", + "blk.21.ffn_down_exps.weight": "3fc495947d27fcca7fc0893c8a96e5d48ba27b2c8c58f8fcfb8dcfcd5539741c", + "blk.21.ffn_up_exps.weight": "6713ed51410bcc8283cbb001c4ad784098f25701e8021f4fa4f411e186859c4a", + "blk.21.ffn_gate_inp.weight": "6d4c92c01ec801647134d907bf1108878156df266a6107abc10526332b328b93", + "blk.21.attn_norm.weight": "27605719ae2df24f4f2e85a730927cab20367631612cb501631f6bbf38eb1209", + "blk.21.ffn_norm.weight": "ca80ee8177db185b15a4a378c1cb6f7143c76546a7f1726bda23f329323d4ffa", + "blk.21.attn_k.weight": "9e49f743d4a5bda9b4bd9c40c2ca37cdae5aec7e54cb193897ac8b4945ada14d", + "blk.21.attn_output.weight": "ab923540879753feaed152f5950f69cdd83d8f2413ca873f5f038b63ab0aea12", + "blk.21.attn_q.weight": "62617fc3f1c9d2aa672a4d91a121c7a91b92d145b65e75f0b06b4bb7c825dc36", + "blk.21.attn_v.weight": "15f8b2e72f8e8e992f2f6b3e93238a9d7be7bd6136f91c9d04b4b4cd0cd60369", + "blk.22.ffn_gate_inp.weight": "3ddb1773d9257b68add7a2a4e94dad25ed926803e02707863dd742ab9b2dc179", + "blk.22.attn_k.weight": "680e45a9e8d5feddee5266e119dc053bf80718fa9af1cf6803e6f493b265f1eb", + "blk.22.attn_output.weight": "0d5fae3402fb2c5aa3a860010e3973fc8e3168d1015f7a76b7b2964681693206", + "blk.22.attn_q.weight": "eee7e3d426ab533bd18d62c9aa142eedbde394bed07db58313e0fccc82a23237", + "blk.22.attn_v.weight": "26b5be1fe3c2b6824c5a648a3e4bdf17691904526fca158fbc3ebb627b67e2f4", + "blk.22.ffn_gate_exps.weight": "32ab7a7735313d60f6a75229b1aeee940b6aee176c9648536bf5921b0dc2929a", + "blk.22.ffn_down_exps.weight": "67590808f6a67777d3eb7976c31fe616d388b98fecbb12253b72d1241d70753f", + "blk.22.ffn_up_exps.weight": "fc245c0183e6d90829ff5e71a4ec93e4860b3d4c1a17b9dda2fb64f5f5c9ed32", + "blk.22.attn_norm.weight": "128e99d206d4d6724758ec97468af767fa0aea592149c324b731659c1e74a1a8", + "blk.22.ffn_norm.weight": "e45f498033f0cffa15da0eff2c47b4472e43fcf8921729fc4eeb2e3a6b3c78e2", + "blk.23.ffn_gate_inp.weight": "d63e686f5325fbc89fa242c2c52a3b8ff54f867dca914c9ae6eea13e9d6f46e5", + "blk.23.attn_k.weight": "f71f5a577f46ea12b1818f3a5ff4b85ddc45f9a2afb0fa2e041d71a3e31c6779", + "blk.23.attn_output.weight": "92b13563c1e0eac0d748fb67b235dfd7a64c8f16e2dafb316885744582e23b4b", + "blk.23.attn_q.weight": "2f9b9c35dc4f912f3f51c06e2d68f417b51a0de0a84aac530a64f9d3d7b0a2dd", + "blk.23.attn_v.weight": "268e40813806e74a5c364b19556d087bf8374e76e7b6fcf55c381eb7da13ccd1", + "blk.23.ffn_gate_exps.weight": "12f857e7a7ce228afac34d99b602c8d6fe96984f2a21118f459a58cb767ee65e", + "blk.23.ffn_down_exps.weight": "cdb082c16599c3bb36a28066dcc122d9529b54fa91b6cf0153437ec960a5e16d", + "blk.23.ffn_up_exps.weight": "f4b99f6f44d7b8b5a305894e88633bf5938fc1f6303a2b2092399da9c8b64d7c", + "blk.23.attn_norm.weight": "a691392210383915916b4d3886d5e4d56e7855e27e37e414fbd73bf66b3712e6", + "blk.23.ffn_norm.weight": "0c3dc72f667e5ae19b69bfa9f2bd2a01a57681f89ef9527bad4eb0d8c7b70da8", + "blk.24.ffn_gate_exps.weight": "86baca2a3157994df7fd8ced5e08436d5c1810dc29c0715637c36de723e0e7d1", + "blk.24.ffn_down_exps.weight": "ac5d559562b35c34993e34b071f66d15c65be5907797078c2d2a49aba54e3192", + "blk.24.ffn_up_exps.weight": "fce0a099cf09777f44fbab3606ceb75f7fae6f0b80725f9e871654b8cdf9262a", + "blk.24.ffn_gate_inp.weight": "e7c6800c0cfc56b565b2d35ad6f1dbfdb70dd0b05b338bc8da2286ffc3678d79", + "blk.24.attn_norm.weight": "dc6cc18ec52d102d015153c4a1132f9d7a504e29cbdec81c5edbf3b9e65815e1", + "blk.24.ffn_norm.weight": "480d5a1397af5e0e657f1e67d20ec0cdef5724e71246a326843321b87ffabd33", + "blk.24.attn_k.weight": "338c0597954a9b95a782545b2fe36469553e73f86ae2d2b5697767b28e1c7daa", + "blk.24.attn_output.weight": "a77d23b79933c67e52f1eef7f83a3dff4f767ce0bbcc39572f8cec4acd457643", + "blk.24.attn_q.weight": "45c9478593002be1998e96e70668aafa2dd3972380fbc1df12fb05c24ba959e0", + "blk.24.attn_v.weight": "515729420885408a6a9614bc27cda393ed907521318d14d21335d39a3eff0b61", + "blk.25.ffn_gate_inp.weight": "aae4ac40e9ab3925241f9d784b54b38851d9bc999a6c3bc03fc3f17c9b28a67c", + "blk.25.attn_k.weight": "4ab4808d02396c35b00b426f536015673b71c17ae6cd55bbc2e6bfe7a4c59d0c", + "blk.25.attn_output.weight": "1990bb982b77e0c947cd1a8ef0b36227ee1259e6dbbc2829e5c136edf88675eb", + "blk.25.attn_q.weight": "a1490f3048e8c0ec8784f8550c43adf5cc8d0f2f90131c934713fe4b1b015bd7", + "blk.25.attn_v.weight": "f15e53c6d45b3b6f58808fa968425d65e0b26b7f9b268127a77abb1227c67431", + "blk.25.ffn_gate_exps.weight": "656662447ff54f56ee80f78a1b9483f7efdc40f7375d0cd8a9c72ccf21f77e7b", + "blk.25.ffn_down_exps.weight": "db06f101bccbaef19cced0f6c185166e18202465f4a42cddfd535fbe5cbabb4a", + "blk.25.ffn_up_exps.weight": "584a7b02456f27fe1d8d3c7ccd21d426b6ea887795a3ed77f704596a1e3841d7", + "blk.25.attn_norm.weight": "8f0f3597982930fd237e9d609776c64f2b909a455b21678f83a7ebd4bbb83e64", + "blk.25.ffn_norm.weight": "3e7079c32582afba0c55e032f254adc18d2997705eec860185e9a6dd3d82f07e", + "blk.26.ffn_gate_exps.weight": "e70341691b583b86489812b29b77aa41eb658b1865733d6118da54c66e3bfcc6", + "blk.26.ffn_down_exps.weight": "5c1b812d11dfb064af816ced5ab6463bf9722eefdfc341b8a93705d5038fd781", + "blk.26.ffn_up_exps.weight": "e18118362ae54ef7432781c83884f9fb230a9d934e342aabeda8822ea5f71fb6", + "blk.26.ffn_gate_inp.weight": "cd1c5f6710166b9567c6b74c97b2348b191c60aa860958c6bc264ab095261dff", + "blk.26.attn_norm.weight": "71d087531af2520bda2e676c489e8529cef5db8aeea1eec0a937a8b4f2fa2e54", + "blk.26.ffn_norm.weight": "7f704e936fda28eb5c2cc339f0f6a5f78170b5aa43c01265b21668870d819c82", + "blk.26.attn_k.weight": "1cc62a0ce0ae251275d898c52c4a9fba5995fca10955d2011d10dd1a59e1afb8", + "blk.26.attn_output.weight": "636e881b1505f9cef656a4be98bec6a4765321d51f9bf1dac8933397cf44b765", + "blk.26.attn_q.weight": "89a3c4d202d7d6adebb9e0c1bcfd8b775f6456386f1be25e86e43acc949c1e16", + "blk.26.attn_v.weight": "ff2cc963b597cdf1a21703f3e7022af3bb4c65a34a19e19d9309a7c5e198b5bd", + "blk.27.ffn_gate_inp.weight": "6150139498fefe380bb99d11e72028da47a15ecb73dfc5b2774f726f4bed8f9e", + "blk.27.attn_k.weight": "f286eb9e5c56c7b801a497aedc40158c2a27877d7f9fb59b3fc67834798902d2", + "blk.27.attn_output.weight": "5dc3d3a05f9f7729509147fd09c16fb53f85f520cdab5cb69abf4bae3fd460c7", + "blk.27.attn_q.weight": "8462e40f86b24251960d6f35a9ea99b8793a01937faf1aec2859f2e5395dbb61", + "blk.27.attn_v.weight": "bac1a99e38e25953f8315f7212eb9777dc216cadb09b959977885ae62724ceca", + "blk.27.ffn_gate_exps.weight": "6a15eca7f0f6ecfd93db2e55c63875348ec4a78c4ff643ec46df9e958c0101e4", + "blk.27.ffn_down_exps.weight": "2e1c91247c4359e2073a8e5f26fd7f6426da7be3ed5bc65dcfff701f0a5022b2", + "blk.27.ffn_up_exps.weight": "65d6f5c553c9332085eae4aeadf25090b5d7768212ea7b08ed698102c21b29a1", + "blk.27.attn_norm.weight": "7fab8ae63ec8e91ce625cd130ab96d8427dad3a7413bb21b25ec5f408c5b9f5a", + "blk.27.ffn_norm.weight": "532720546b0fdcd423a02ca6e3e9d8aacb84b1b3e8269968f88a47fe2a69bab4", + "blk.28.ffn_gate_inp.weight": "a305ea58d98962d9dcf0c53ad2389b7acc8936fb35a0e3fc9410e7767cd49dea", + "blk.28.attn_k.weight": "8315e8a2e4f78dfdf36d4fc18fffc74bc95fe42c3ae4f9af2b6c874612c0f71b", + "blk.28.attn_output.weight": "9b5fdedd32d39ef46a22cca7cd5355d7b93bd07ea305f466a8aad6ca5a4f3778", + "blk.28.attn_q.weight": "4e8fb96997c30e231c437130f410d7c91d541a816f6c568b5f3bfdb4b8dece74", + "blk.28.attn_v.weight": "1fec739cf3bd7b4913f72ca358d4cf31391c304de44ac0ae31ecb825beaa7cfd", + "blk.28.ffn_gate_exps.weight": "9f259789d535e09268266b9a8020f32d6a6779966c909d91d3a10574f06238a2", + "blk.28.ffn_down_exps.weight": "516d3f8abaedb01b9916a4b67d4672159769138ef2850158bc1b32c41e31f0e8", + "blk.28.ffn_up_exps.weight": "f2f1d88d2c31ed588806fb5ad981d68f5134d7284c4fc022fd018de2eef437fc", + "blk.28.attn_norm.weight": "960fd005598deadaebd969996f4367a9dbfad90539a863674fe95730935acc64", + "blk.28.ffn_norm.weight": "e1993b37ced93d4049e9af2c47b0d9207d8f7e6f2cc3a52f57bef30bc806d805", + "blk.29.ffn_gate_exps.weight": "58927146338f443513337476b3cd30e6341742f096c2beb5890d400f10121298", + "blk.29.ffn_down_exps.weight": "03a3386e4f0b75a28c5608e23b2de8f0de25f21954e4aa7fc343431bde9db07e", + "blk.29.ffn_up_exps.weight": "6916b7490a7ae7b04a5d81cc1e7ac9b20c483434f3b186b12d87fe176bf1567b", + "blk.29.ffn_gate_inp.weight": "98e710e467a3d567abe4ce29d78b8e8dc033148762290c0c5e1ae4d78efd8c78", + "blk.29.attn_norm.weight": "4e64cb307d37be20d55f38c94faf7e451d11df5e60df347906cbaf9c5441be71", + "blk.29.ffn_norm.weight": "696c23a52f742679bd44440d687a4c44b4302d57f1e9dc5610d23374336187e7", + "blk.29.attn_k.weight": "e85253652fd6120c623634ba66b725bf7cd491318b54ccdad2c7df8851d64c0a", + "blk.29.attn_output.weight": "4f650a71efb150d1f24cd4d114d4187bf570ac424da3b92ea6455abdf1aea705", + "blk.29.attn_q.weight": "69fa7da901026ebcbbbc848455b425458b7e3295007d7fc093acf4b38e2166ea", + "blk.29.attn_v.weight": "17e2e7590b317b21f106de546aafd955579703d1e95d6aea044ee72ec3a514c9", + "blk.30.ffn_gate_inp.weight": "3a03284b4aa60d59d4a2ec86253469b61fc656372afca427cb77a5332fbcc62c", + "blk.30.attn_k.weight": "d518cfd0db9708e769eb1399e87ee49357dc54d5afdbac3d4c0ca46c64e789eb", + "blk.30.attn_output.weight": "9b44378714d784c5ef9ab604359091baca4e0ec222afa139b7f840eaefb371fd", + "blk.30.attn_q.weight": "cbb95365bbfbcad0c9cd99b4eebb5a5d32de68ce08e4063b5ec3e792b7548044", + "blk.30.attn_v.weight": "e7985c04fe1740e35a9598f43b67b0922b4fc2d00b68a92a9f917b82c3248de1", + "blk.30.ffn_gate_exps.weight": "8ac4bbd07935d98f895ba94dc174e5ad5046c3c222b53729d60f987c05e7eb70", + "blk.30.ffn_down_exps.weight": "dd672cc71e82abf05064a18121b8e55fe1a4f19bc1d7cb9a142f4add54bc336e", + "blk.30.ffn_up_exps.weight": "12282f664a2a12aa25e2deac58946108715ebb978bafed5274cef24569107646", + "blk.30.attn_norm.weight": "1a33458fee054c6c9c896a4bb0a4e1fbfa0293b2408c7dd2b81d692e966e7273", + "blk.30.ffn_norm.weight": "311e33b68051f507f1478ed8f2693fddb846170ddb7285a91be43f795c2ce31e", + "blk.31.ffn_gate_exps.weight": "8af43d9867a51cd8392fb48b981b0ceee0ae979c491c07d711b3b56b5162c786", + "blk.31.ffn_down_exps.weight": "5579cb7758c1600b19d1f540deffe081b575962e37437b3b2efb2fb0a2924e40", + "blk.31.ffn_up_exps.weight": "f2e7c005276b3a001fb40753f027fa10b4d5a346f43cf4b4bbdeec6e74e1cf6a", + "blk.31.ffn_gate_inp.weight": "89885dc0e30b6b16a90c0331d7fa3174671e941364e8102d934f02132237e61b", + "blk.31.attn_norm.weight": "99e4e9bf86a9edf8c404153a7e8a82324ba79da462622196e2faba161bd95172", + "blk.31.ffn_norm.weight": "55335997cf6de781bf332b943de96ff4646966b05d9fee86b76ea897e27b6ca7", + "blk.31.attn_k.weight": "cee570762b78da6316b637892cc4b080e40f57af5551ffb1866b9a8e80e96628", + "blk.31.attn_output.weight": "fa321ff55ec7819ead7b819fd45215262f39744569765ba2113c989c03588802", + "blk.31.attn_q.weight": "9e2c409b878f8a2a1436874abf428fceb1c534b21f9ad4dd6f532b8a469007f0", + "blk.31.attn_v.weight": "a845d0be68ba537b4a775bfba4d897faf7c82a811a2612b0b7420cc4f3574cb8", + "output.weight": "16101cbb74b54cda9ebc07ca3c762e3263a56efb3cc011156184b95807d7cf13", + "output_norm.weight": "d7aa61585baedd60157aafe157930785742c55989c288573566a971b02423564" +} diff --git a/convert/testdata/gemma-2b-it.json b/convert/testdata/gemma-2b-it.json new file mode 100644 index 00000000..0482f1e1 --- /dev/null +++ b/convert/testdata/gemma-2b-it.json @@ -0,0 +1,188 @@ +{ + "general.architecture": "gemma", + "general.file_type": "1", + "general.quantization_version": "2", + "gemma.block_count": "18", + "gemma.context_length": "8192", + "gemma.embedding_length": "2048", + "gemma.feed_forward_length": "16384", + "gemma.attention.head_count": "8", + "gemma.attention.head_count_kv": "1", + "gemma.attention.key_length": "256", + "gemma.attention.value_length": "256", + "gemma.attention.layer_norm_rms_epsilon": "1e-06", + "tokenizer.ggml.model": "llama", + "tokenizer.ggml.add_bos_token": "true", + "tokenizer.ggml.add_eos_token": "false", + "tokenizer.ggml.bos_token_id": "2", + "tokenizer.ggml.eos_token_id": "1", + "tokenizer.ggml.padding_token_id": "0", + "tokenizer.ggml.unknown_token_id": "3", + "tokenizer.ggml.scores": "0872465d173867d755d3ee728f882b9dc2057a0bfd596fe1e3d131522f1250d8", + "tokenizer.ggml.token_type": "485e40bf3d715a4764818fc097d6a2a41db872d82ee714bc500872a3437ff48d", + "tokenizer.ggml.tokens": "c6e66de1841f04de8b8d236d461ab720a4c9b9b5414dc293a09c6e10eab45fda", + "token_embd.weight": "17b87ab2c01c80657855a5413d0457b4a041afaeda0cc785080e44e2f04acf07", + "blk.0.attn_k.weight": "28ac0da05754ad2714ae95da28a5ad191192140b30b8fd22d108d4700c9d989f", + "blk.0.attn_norm.weight": "3f9d5675d1ab0eb8a816719dac9fab81f2e95c52be02c34263339acbc087febb", + "blk.0.attn_output.weight": "703295c2c63990ff896778685c678f145298886f680f3ed5dc2a7ad54c293265", + "blk.0.attn_q.weight": "69c2d0e4870e9d722a190d356203c9605575a16863466c3d1747966ef1cf5791", + "blk.0.attn_v.weight": "95219c9c07b5ffe9a9a01e456d845eef2b11f4fc12c93dbbba479db395444c13", + "blk.0.ffn_down.weight": "a2feb5eb3d572c57c5bafbf0ab506862df1160fe40965dcfe4b9fd855c08bed7", + "blk.0.ffn_gate.weight": "fcca072c445c31f4dc4d5dfaa785b1bdf7271342442099b74fd17268b5829fbf", + "blk.0.ffn_norm.weight": "7621f95dbd245cade6fffd6b08797d69d8e3954e960f0b5551b90d967ab95448", + "blk.0.ffn_up.weight": "14a9bcdd451403c67136391e1b6e53b3b1830f00199bd911dbcc56d8749c14f4", + "blk.1.attn_k.weight": "c70f73c5df20579cb44d971164b48b5f0d8d5abdb38b381e7a8b880ba12aa406", + "blk.1.attn_norm.weight": "88b6b91f93a1ef83425a7c7dc2a2fbd3b22704a04c64a80061df376ac8c33626", + "blk.1.attn_output.weight": "f031a537490c452be3b3bb51e6b7949a636405756e160976a1c070a792ea00ee", + "blk.1.attn_q.weight": "bdb23214b1cf9cfd30f863a0a5868e52c6809d93b7e8f44df096a94204d9896a", + "blk.1.attn_v.weight": "e9bbc0b05f2c872fb1403f8f938cd1612b502229ee401f12593b1164c61acc00", + "blk.1.ffn_down.weight": "5ff53811038b661a7b8f2bfdf213bebfb185ec1a6060b662f063714f33584d79", + "blk.1.ffn_gate.weight": "205085c8c951a5c7543b1495183cd96028fb49f67464b3e9862a2693a6077a33", + "blk.1.ffn_norm.weight": "798f354fc85afce9625f5d10093a585a966831698a0560e6c9b97ce659eb4b22", + "blk.1.ffn_up.weight": "db92dc5684cb6e90940e13f4d1da555ed20ba4f8cab1e990ddfd7553e2e91315", + "blk.2.attn_k.weight": "ef5ce360c4eed6d00d03ca4761e0f8e4b0af4509978468314be14f3d46621044", + "blk.2.attn_norm.weight": "6dadbc05dbd0d3fabb4216affa60a3de1378a82d2859dc90b338cbe70f50d455", + "blk.2.attn_output.weight": "6bbf87a966f691bbfd7c8d25629aa4e6710107bd431a667434861febb391edc5", + "blk.2.attn_q.weight": "4e575c09ae2de417ce9057ce8b073680e860a24aae13a472b68f101b760752e5", + "blk.2.attn_v.weight": "cd33f7f01141e9439afdaf2ea1aaced9feaa335e32a58daa136ebd555d4d96f4", + "blk.2.ffn_down.weight": "b970ff1b0b6494165defe2fbfa1d31425766ed71e64de9ec4e66ac3955c8bc5f", + "blk.2.ffn_gate.weight": "dbb3e1360402e0e369b101995bb686b73f95d4a7673f061be85d64d15dfb0061", + "blk.2.ffn_norm.weight": "bfb7980105d8ac9647710454f57a5cdac50598a0f6f4884e16f1d94b00844687", + "blk.2.ffn_up.weight": "50ef89339b275a438b664686f6227dd9b6e43853ed6856ec9e33ef4bbd90bda1", + "blk.3.attn_k.weight": "be942ea98151434eebcd2c1da4b00e0146152fe524a530689b1fd491cb833d21", + "blk.3.attn_norm.weight": "0df2f218daf609c289fb7c60c5f375fa99c0d4e04381ad5a494a19144edd8e20", + "blk.3.attn_output.weight": "c2184aaf86aa2cb8f47be49f60b165834e97205f39c6ee1dfd19fd4411a156ce", + "blk.3.attn_q.weight": "4f86e2a0a4221c1c84ff9c409ac89893cb95d7208cf65bf1e98e24e01125f991", + "blk.3.attn_v.weight": "abfdb8a60c349dadde641d1afc9542025e24fbf41a3238bfa9675e0b1f1e4b68", + "blk.3.ffn_down.weight": "58821a8d87008d47d122427911c6fad5272aca70c448bbae223256a74bacd07e", + "blk.3.ffn_gate.weight": "776e051f1a0ddd5c4934e69186683a75ca9a3c8c0f61911bba321fed1dd287d2", + "blk.3.ffn_norm.weight": "7f380f29335e28be90bfcfae6f6d69fdf5751211b36d2dd62aa5541ed113e4f2", + "blk.3.ffn_up.weight": "fc5ae8d488894cbd4951059675468d227da27871d26e925c9941863841c097ee", + "blk.4.attn_k.weight": "14833b078cc4c5137bdd5fdc0538047974ca147a99b0282e1b144440c78bc1db", + "blk.4.attn_norm.weight": "0a69957d4a15599fb80ad4753558020804925221457d9a5052926754d3768065", + "blk.4.attn_output.weight": "887a49b6130fb6297cf10767207c3dd97191b2cf63723449af9c27bca8dbeda0", + "blk.4.attn_q.weight": "51fd577b76764824dd6f0d4891c137ebe4736f591b5ca2793c5fff2be49abbde", + "blk.4.attn_v.weight": "1a623c43cf9c509d1b7ea0d1a5c04d0af4809665f9f9e93b7d6dba8c5df178fa", + "blk.4.ffn_down.weight": "5d61e8856d8941d2b1fd138116d015f63840d0fa1e31e20e20a5ceca1536ceec", + "blk.4.ffn_gate.weight": "06640f7273764f8ca5df7e386547417916b6cd7d565a8343153113239a94b0a1", + "blk.4.ffn_norm.weight": "91a6c6c41b894228e361435ecbc5058dca34d4911a23da5b56de219299c964d3", + "blk.4.ffn_up.weight": "d016dac1055e36d6a10b6317e57f98a904709ea892ef3194342f4d2f6326561e", + "blk.5.attn_k.weight": "987146afe124131500808cc0da33c06d207433656d41df6e6d8c99118a83bac5", + "blk.5.attn_norm.weight": "6b354938966f2608a2fb8d0f5b363ed0d8b0967c2ec8d0abd5c625b413042ded", + "blk.5.attn_output.weight": "cdcbfe02c6ff79d5326882b017a02099f5af71beedf6b1b3eb4de01e3a844536", + "blk.5.attn_q.weight": "b910d0cff781d3efb42eab0a302f46f286b2de717079175680d5b42bf8c309c8", + "blk.5.attn_v.weight": "66d3a279f747412f9f4b0e8abad44540c122ab2e811a7ee74c1f33bc36caade9", + "blk.5.ffn_down.weight": "c9b0efd2212981f16d956d8571f054b68780ad01f4917033647e359b557a4653", + "blk.5.ffn_gate.weight": "fe96b94109ca141c01f6a04788e20783019ca6ec334aa1f3134810bdb499e557", + "blk.5.ffn_norm.weight": "aa7b016e832e7055a36c6e20de58ea1936f995f390401fff1c5fc65906064e49", + "blk.5.ffn_up.weight": "555ce27c4873d3375394f38ad3b45e3d8848f9d5642dc1602383d0f0a33c2a14", + "blk.6.attn_k.weight": "88280d461db324c4f36475ce396793063e61a27283ec64511b0480890fb5b3b4", + "blk.6.attn_norm.weight": "af8f460c411f660d33196286d208f1845fd5a2b45f7b56549a4df31e7515447a", + "blk.6.attn_output.weight": "dd9996fb0a256e8375ad3917705258a33fce006bcea0f536caae420a77974d8b", + "blk.6.attn_q.weight": "7a4841541191e037cfb9b07930c4d8cab451809658b182f0ada6ccde9615c003", + "blk.6.attn_v.weight": "ae81e6a592b64d701a9d40233e986039a56cba8d8d24f61aea93c6393cf3078a", + "blk.6.ffn_down.weight": "622dd1ce1706355cbc659a8ab2c4509678ffe0f3ad34258e5e25ed2a5d951bcd", + "blk.6.ffn_gate.weight": "8389a735c0bd5591010f8ced9805a2a12c749f6df0d3c18ad4d05c2a302e7168", + "blk.6.ffn_norm.weight": "621f5346400382474d61358397bd58fb1459b07c53e376e4bca15e08b3f9b3fb", + "blk.6.ffn_up.weight": "8d834e4c42f13c251dfee36cf89e12f1bd400680d00d5c2e6cac0459e9ce2f7f", + "blk.7.attn_k.weight": "8bd0412de65a3e64901ef8fe6a28c95e116bf39dc9aa22f0126b9d36688e5ea7", + "blk.7.attn_norm.weight": "056d8e56be4e87d6dc6f900762f0dc6fde07bfdc50dd85bfc510415e2bba3f3d", + "blk.7.attn_output.weight": "27972eda51da53d416ff95aed78149a2c5a287b47d2cd46f2f544ca692ecb3bb", + "blk.7.attn_q.weight": "41eca977b9371f7932800c11a9c45b931310196919e2a0651b847703b180fc7f", + "blk.7.attn_v.weight": "13c74fd7e07f08883a09fb070a1fe5bbdd2341b4cb8d1cac07c4b637049b5774", + "blk.7.ffn_down.weight": "9e75db42468800849a9a7da603d0072c5e86c8ed2b4d8b20a312a51fb86a7a10", + "blk.7.ffn_gate.weight": "db6bdc3117f910088aaf7db51f2da63ea5bd933de36af5599c215bfb26f7db2b", + "blk.7.ffn_norm.weight": "48bb82b49bfc8679a1e77f282ee182d952db7a3c11be7ef9a102ee2ddd8011e2", + "blk.7.ffn_up.weight": "feebea87175817a0f3585ec0af09dc873d94c203581ae97a712eb356d3b49efe", + "blk.8.attn_k.weight": "d5640ad71b6af68d88e17bf8e7fc26c907d2262605457a84247dd9afc2884d69", + "blk.8.attn_norm.weight": "75b850c481a69083ae09d0207ba7317b37c735a39fcf5fef5400e6c84fb1257f", + "blk.8.attn_output.weight": "cbd669dbdea2bdd90f9f0cc97566b3dffff3c56cecb4f47290ceef30da83b2d6", + "blk.8.attn_q.weight": "9edcb63087a431bac361822497e6ecdaa06d9ea4a1a754e36da7ba9f8db81c7c", + "blk.8.attn_v.weight": "3fb72c2c4f95a83626aa3e30062f9450b09ab37c7871e229f18bbc5cf744633c", + "blk.8.ffn_down.weight": "bd69d2c9172974fff154441b237b4787fb53b2d185325442d5048130ef5bc4ef", + "blk.8.ffn_gate.weight": "d04689c80553edd011d1cbaa5d570fffa7fa91e88b66cf1352d89ab60b72f908", + "blk.8.ffn_norm.weight": "e49984183b735b7f2c4e4730c289eed9394056d2e283a00fd83ea0915df31a73", + "blk.8.ffn_up.weight": "8fe62a1ce8e847e567add6c6f6bf2922bc467495b5eb4c116b3cb85b85b3b211", + "blk.9.attn_k.weight": "d90904959e5004cf0d6e729c6bff18cc33c094798b802473c1ec55ab8d276183", + "blk.9.attn_norm.weight": "79277f290cc07411115d8fa138045edf4a17b3416ab2145409cbe8ab829fd4ee", + "blk.9.attn_output.weight": "5a21bf2e1f09a81405025f96d4153ffb630158e17269cff8ffff935c38ceb1a7", + "blk.9.attn_q.weight": "51b1d0febc3b350945be4504f55afa4347517bde0f710e1a4b88e6b17e71e7c7", + "blk.9.attn_v.weight": "aab7e1db0a8b50a03036356791ffce736ab010d15674c96eaef8049d80076054", + "blk.9.ffn_down.weight": "cbf43ec84becb40c9359a181ab0e641fd7faae7d34b549501f7cfb7afdc3d764", + "blk.9.ffn_gate.weight": "dce0e8661c778327bed7f03b6790d26710764188aed9dc746e6e05863891fa57", + "blk.9.ffn_norm.weight": "6d41642104f995c77bf31122b13237caebda3e7fcccb1367ce91db36b015e923", + "blk.9.ffn_up.weight": "82fe4c67bf24e7b2d6f6e05f7b1234c2bf90c3932951091a9066211b8e15ecbb", + "blk.10.attn_k.weight": "f6a9ed8fd8d3229b5d03175c413ffc56a07f2ce7236271986361dd3d8993f9aa", + "blk.10.attn_norm.weight": "cebbef89f0326ca8e02df3867a571e4d61c20c2a12f295f98ae590d62bc86010", + "blk.10.attn_output.weight": "34f5efb86accb4f06347d83a32558ea8eab3039d128969161a741ebacbb656ff", + "blk.10.attn_q.weight": "1e0efe27df2d5d50f7157253ba2cfd436d6781c3dc78ca176d0c16a210b5b763", + "blk.10.attn_v.weight": "8f085bf50a2b0f83cd6cdda3c8ef5a9e204a36348ed95871aac725d1f68640cf", + "blk.10.ffn_down.weight": "bf3b3cb4cace435809ac7b4cc933f20853af12f1f272d3dcefe7f19c0f203b8b", + "blk.10.ffn_gate.weight": "d3df7a1413b1c5adf1a1dcda9e5225a15c89874bae53bb6137ad1ea42fca2d34", + "blk.10.ffn_norm.weight": "a1da603b0480471b5ed8e862148cecd5fed918f8304d6933ab0bdb25b8d2fb8f", + "blk.10.ffn_up.weight": "bffbba605922e972dc47dda88a0b4659aa52236c76e5fe861a949e6d9a367492", + "blk.11.attn_k.weight": "9f31c63d66cd32c29b1eb8bb829d0c8525ce2ae936e0eefdaab6335a2d12a3df", + "blk.11.attn_norm.weight": "0bde1a266d8b2e8f202bb7e2e88b19147ca83021901f6d3cae77a4df5548c754", + "blk.11.attn_output.weight": "e10725c7cf746ed4a7e472cf7aea6cb564e5db6a1d5197adc980d650a387ccea", + "blk.11.attn_q.weight": "05ee758a7d065802630f8c65dca424364c1c8825e389aa33f9405c45e8a50cce", + "blk.11.attn_v.weight": "0c3ae7090f11775d24c51120db6e305db6aff706493e7ee123dcab74485ba789", + "blk.11.ffn_down.weight": "7ba40b8e12c09c5fb2006b77a771cb01ce894e88a3b3e1877f927a5b89c91709", + "blk.11.ffn_gate.weight": "db76388a023b98097972d354ba1c6a5e26efdeb1c596b9c28bf2cd8f6596975e", + "blk.11.ffn_norm.weight": "a38c3ae1b89a68ddc7b72c99c5b28be7fe3787c4fad9904d0c43d64eaf00c474", + "blk.11.ffn_up.weight": "13c8142f9cf1eddc658babf978daf3515c4ccc45f849f3e7e3930aa18a8480a0", + "blk.12.attn_k.weight": "f03241c36ac87cb57429a2ef22186b8d7d0b590a8b173beb01fa13d93772f3b1", + "blk.12.attn_norm.weight": "4568f654e6d65104d586e7c16ba960c83428698ce103022b7e0be15e2884e13b", + "blk.12.attn_output.weight": "04867603f82f91e41306e09b33ecda0104b3ee4834061f2c0bbdc8da33c72509", + "blk.12.attn_q.weight": "70fe04b9a8e08b6100cc8d6b58bf4cbbad15ca1de82d63baca5d352ba6c4cbae", + "blk.12.attn_v.weight": "15cb28db61a86c98687991d7e611bc92a1fcc6007f3432149cfb5fe518a4f65e", + "blk.12.ffn_down.weight": "6d10c790a4e3dc44c2dc36d96251ae97cdf30a4fa04d4c43e31bfbd038e6a7b7", + "blk.12.ffn_gate.weight": "3462a2d8f6b4743b25e24da51b90018ac2858d05ac7e582bcb69063cfdac1104", + "blk.12.ffn_norm.weight": "1f96392c1faa34e34ae5dea55a6a86c5aa4c79758952075d53d28de89dd88456", + "blk.12.ffn_up.weight": "d22eacc612a7411953d948483c5fb201e11722955ee0754da866e7bec578ac6d", + "blk.13.attn_k.weight": "5864977e6b733ea942647d6feed5c76156c48c200649c22e4e11b9e5860e57f3", + "blk.13.attn_norm.weight": "87e053535144723db4145aa5402acc54331b7696752d852bb9fc542ff33f0fb5", + "blk.13.attn_output.weight": "078145f5ad83f8b14f97a869346f7fd1583b24d1e3edadaa95d3da4242973f8f", + "blk.13.attn_q.weight": "3b8caf35504cbc4d1a7dd6e011a95760703b7f71e2218b030b1254f811362dd7", + "blk.13.attn_v.weight": "4fdf8365a603e043e5b40c4a21c84ac167f9be62794178f9d8a608dfe5653bf9", + "blk.13.ffn_down.weight": "a07d3abbfcacf48ba028df2cab895be32cc15022d23389a745286e79c1b1d1fd", + "blk.13.ffn_gate.weight": "1d2ab39666aa2909acc96787432a3ed13b19d25170f74665fadff9b17bbaffb1", + "blk.13.ffn_norm.weight": "4f2e809fda5f3eadf52578ee50e0ba36e53be91e55dce418c12dfe595f5f18e7", + "blk.13.ffn_up.weight": "8783d2720c2c37ca176a5801e0b3ef1f9cc9cf3ef1cd37af423aaf6b2a27e2bd", + "blk.14.attn_k.weight": "ce9428e2b55d43ae0c6690dbd56182f99adc427694ba8236b405cc8ea5035e86", + "blk.14.attn_norm.weight": "6abb35f9db8251d6ae954bda147c6ada2371b0574d11702e828f3c6ac99b7cc0", + "blk.14.attn_output.weight": "fe3880916d0ceb5bff672c88bbefb7060a545be609bf049beb2024b38221836d", + "blk.14.attn_q.weight": "7c8ad81be6f4a350931fd108b5f7c9e366e8c26ef62d1d85ffef5dca8fd893f8", + "blk.14.attn_v.weight": "e4bdedffacbebe38567a0734dfd67db90e911d9a9669fcde9a7c4ad8a0066c52", + "blk.14.ffn_down.weight": "ef6694dff1e05820aac0cd2b22f39ac7788b4967afc9250775575554c66aab2c", + "blk.14.ffn_gate.weight": "db63c4179e2db704bc505e2b4696e055b593e295a1b7c4c586fc793bdd5aab19", + "blk.14.ffn_norm.weight": "2796a62d832a9710148f95d533320492a33e712b2e5218659c548705bd11684d", + "blk.14.ffn_up.weight": "3f78c78d8c2d54df45f799d4ff902316628af296834afe4ceed63d4a324ff03e", + "blk.15.attn_k.weight": "6e810ee3859e07695645ee0c9a5efc7962668984a5f0a9325f47e462743b447c", + "blk.15.attn_norm.weight": "0956b576ae96db0b28cb09f761f801cfd9281432284664f0fe181c8d9c55d1ec", + "blk.15.attn_output.weight": "03a17f7e94208177aace5cc41b7f54670ba57873b7274ff6e23caf58cce110ca", + "blk.15.attn_q.weight": "b8edafe7d2216a6f8b4ae4905a906475490e6ea418f6e1d3cec563dbdc6fab91", + "blk.15.attn_v.weight": "f8ae8cae0f4cfa34a459824eba57350c3c248104ba5607e7d9dc7d7c39aaf4a6", + "blk.15.ffn_down.weight": "8d02eb439da852246d2ca67e9b7b6de0b090b80744355e64728a23e41926505b", + "blk.15.ffn_gate.weight": "ed5bf361c67db8731f186b775826f21c33bdb521111fd2d922539719a770239f", + "blk.15.ffn_norm.weight": "5942ca3c73209ac9a0c8bfd9b4aab7f7be7aee9aa12d9c35833493b44af76767", + "blk.15.ffn_up.weight": "f4bebf4ad99ec5f911327dec347be6c595814885309c7bc5647ce28c7f4d1cf5", + "blk.16.attn_k.weight": "756a534c19364448e0958b8948fe33891c6ccda0fbb4dfa2024e1f532a87804b", + "blk.16.attn_norm.weight": "386b7b9e4e6509f6af9c022d942b6c6c6cc136aeed8751ecb037c74d7c4bfb93", + "blk.16.attn_output.weight": "3ba1a766a25830b84d7c22178203635f9c5624caad290bc5e5d73da5d5e7a2ec", + "blk.16.attn_q.weight": "d39b0c91e1fda7685d50a0f7cc8d18c44b5bdc90a142c7fda0bc329cca1afa74", + "blk.16.attn_v.weight": "98b33fcb0ee3483cff1b06ecb44d7b7ffb4d34c268248e4d73dfdf82b2065b2f", + "blk.16.ffn_down.weight": "14006f5e4acb2f9416271ae562e299359cd2585739c7fc77ccbca54495563948", + "blk.16.ffn_gate.weight": "12f8abae2d301d8f88bedb6af98b1daecc7b0b8d05148594f931f30958d77aca", + "blk.16.ffn_norm.weight": "129a15a046ee96d06de288bd43c80f77a6b0fb3a159c7367154c6e4aaf362672", + "blk.16.ffn_up.weight": "b4a5911a45f3871ef1d4efb7dc7108645a564b70f818eccf45beebef2e844ee9", + "blk.17.attn_k.weight": "5e1bfcff0146ebdde3817b656952892eb671e14e75afc92fa53f84f8eecbec4c", + "blk.17.attn_norm.weight": "60bc988fab7c4b29ee9de599df41a8de00caa94fcd74677da011fac82f60f465", + "blk.17.attn_output.weight": "ba49b40d6a0b5685f749c24b0edbed3adc44dbe13b5d5e5fa1e56169fc746555", + "blk.17.attn_q.weight": "82bb415d24efcd14d03ace03f907bb70db6a204c76a0bdd1892e0fba165db87d", + "blk.17.attn_v.weight": "73dbe54beb91a899884e275ea81ffc5187a20cb7d5b68d5c299b783096999d94", + "blk.17.ffn_down.weight": "7c086166241e0664f8963fd1ca4ed74c737abfb2525ec20f8435821ff50158f3", + "blk.17.ffn_gate.weight": "51a32f78244d42a539f619c5ce661db9e6cf41636280a826d439b5444edcd28c", + "blk.17.ffn_norm.weight": "c4bb247fccd1ecc84875028af63dd20aaf5cbd17eb94a9bc36679c09285dccab", + "blk.17.ffn_up.weight": "b5886182790bc6fbadd63de9bc4ffee416f3b69a66280d197ab8c18edf769abf", + "output_norm.weight": "481f3097d0a20412e35b3a739b1b958487bcd41ff67744baa3c9acbddd2ee4d4" +} diff --git a/convert/tokenizer.go b/convert/tokenizer.go index fd6df5f5..0d42a6d8 100644 --- a/convert/tokenizer.go +++ b/convert/tokenizer.go @@ -3,19 +3,150 @@ package convert import ( "cmp" "crypto/sha256" + "encoding/hex" "encoding/json" + "errors" "fmt" + "io/fs" "log/slog" "os" "slices" +) - "golang.org/x/exp/maps" +const ( + _ int32 = iota + tokenTypeNormal + tokenTypeUnknown + tokenTypeControl + tokenTypeUserDefined + tokenTypeUnused + tokenTypeByte ) type Tokenizer struct { - Version string `json:"version"` - AddedTokens []Token `json:"added_tokens"` - Model TokenizerModel `json:"model"` + *Vocabulary + SpecialVocabulary []*SpecialVocabulary + Merges []string + + Pre string + Template string +} + +func parseTokenizer(fsys fs.FS, specialTokenTypes []string) (*Tokenizer, error) { + v, err := parseVocabulary(fsys) + if err != nil { + return nil, err + } + + t := &Tokenizer{ + Vocabulary: v, + Pre: "default", + } + + addedTokens := make(map[string]token) + if f, err := fsys.Open("tokenizer.json"); errors.Is(err, os.ErrNotExist) { + } else if err != nil { + return nil, err + } else { + defer f.Close() + + var tt tokenizer + if err := json.NewDecoder(f).Decode(&tt); err != nil { + return nil, err + } + + for _, t := range tt.AddedTokens { + addedTokens[t.Content] = t + } + + t.Merges = tt.Model.Merges + + sha256sum := sha256.New() + for _, pt := range tt.PreTokenizer.PreTokenizers { + switch pt.Type { + case "Split": + if pt.Pattern.Regex != "" { + // create a checksum of all Split pretokenizers which should be sufficient + // to identify the pretokenizer + sha256sum.Write([]byte(pt.Pattern.Regex)) + } + } + } + + switch digest := hex.EncodeToString(sha256sum.Sum(nil)); digest { + case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f": + t.Pre = "llama-bpe" + case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02": + t.Pre = "deepseek-llm" + case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e": + t.Pre = "deepseek-coder" + case "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855": + // noop, empty pretokenizer + default: + slog.Warn("unknown pretokenizer, using default", "digest", digest) + } + } + + if f, err := fsys.Open("tokenizer_config.json"); errors.Is(err, os.ErrNotExist) { + } else if err != nil { + return nil, err + } else { + defer f.Close() + + var p map[string]json.RawMessage + if err := json.NewDecoder(f).Decode(&p); err != nil { + return nil, err + } + + if template, ok := p["chat_template"]; ok { + if err := json.Unmarshal(template, &t.Template); err != nil { + return nil, err + } + } + + for _, st := range specialTokenTypes { + sv := SpecialVocabulary{Type: st} + if bts, ok := p[fmt.Sprintf("add_%s_token", st)]; ok { + if err := json.Unmarshal(bts, &sv.AddToken); err != nil { + return nil, err + } + } + + if bts, ok := p[fmt.Sprintf("%s_token", st)]; ok { + var content string + if err := json.Unmarshal(bts, &content); err != nil { + var mm map[string]any + if err := json.Unmarshal(bts, &mm); err != nil { + continue + } + + content, ok = mm["content"].(string) + if !ok { + continue + } + } + + sv.Content = content + } + + if id, ok := addedTokens[sv.Content]; ok { + sv.ID = id.ID + t.SpecialVocabulary = append(t.SpecialVocabulary, &sv) + } + } + } + + return t, nil +} + +type tokenizer struct { + Version string `json:"version"` + AddedTokens []token `json:"added_tokens"` + Model struct { + Type string `json:"type"` + Vocab map[string]int `json:"vocab"` + Merges []string `json:"merges"` + } `json:"model"` PreTokenizer struct { PreTokenizers []struct { @@ -27,80 +158,108 @@ type Tokenizer struct { } `json:"pre_tokenizer"` } -type TokenizerModel struct { - Type string `json:"type"` - Vocab map[string]int `json:"vocab"` - Merges []string `json:"merges"` - Tokens []Token -} - -type Token struct { +type token struct { ID int `json:"id"` Content string `json:"content"` Special bool `json:"special"` UserDefined bool } -func (t *Token) Type() int32 { - switch { - case t.Special: - return tokenTypeControl - case t.UserDefined: - return tokenTypeUserDefined - default: - return tokenTypeNormal - } +type Vocabulary struct { + Model string + Tokens []string + Scores []float32 + Types []int32 } -func (t *Tokenizer) maxID() int { - return max( - slices.Max(maps.Values(t.Model.Vocab)), - slices.MaxFunc(t.AddedTokens, func(a, b Token) int { - return cmp.Compare(a.ID, b.ID) - }).ID, - ) -} - -func parseTokens(dirpath string) (pre string, tokens []Token, merges []string, err error) { - f, err := os.Open(dirpath) +func parseVocabularyFromTokenizer(fsys fs.FS) (*Vocabulary, error) { + f, err := fsys.Open("tokenizer.json") if err != nil { - panic(err) + return nil, err } defer f.Close() - var t Tokenizer + var t tokenizer if err := json.NewDecoder(f).Decode(&t); err != nil { - return "", nil, nil, err + return nil, err } - tokens = make([]Token, t.maxID()+1) + var tokens []token for k, v := range t.Model.Vocab { - tokens[v] = Token{ID: v, Content: k, Special: false, UserDefined: false} + tokens = append(tokens, token{ + ID: v, + Content: k, + }) } - for _, v := range t.AddedTokens { - v.UserDefined = true - tokens[v.ID] = v + for _, t := range t.AddedTokens { + t.UserDefined = true + tokens = append(tokens, t) } - sha256sum := sha256.New() - for _, pt := range t.PreTokenizer.PreTokenizers { - if pt.Type == "Split" && pt.Pattern.Regex != "" { - sha256sum.Write([]byte(pt.Pattern.Regex)) + slices.SortFunc(tokens, func(i, j token) int { + return cmp.Compare(i.ID, j.ID) + }) + + v := Vocabulary{Model: "gpt2"} + for _, t := range tokens { + v.Tokens = append(v.Tokens, t.Content) + v.Scores = append(v.Scores, float32(t.ID)) + + switch { + case t.Special: + v.Types = append(v.Types, tokenTypeControl) + case t.UserDefined: + v.Types = append(v.Types, tokenTypeUserDefined) + default: + v.Types = append(v.Types, tokenTypeNormal) } } - switch digest := fmt.Sprintf("%x", sha256sum.Sum(nil)); digest { - case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f": - pre = "llama-bpe" - case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02": - pre = "deepseek-llm" - case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e": - pre = "deepseek-coder" - default: - slog.Warn("unknown pretokenizer, using default", "digest", digest) - pre = "default" + return &v, nil +} + +func parseVocabulary(fsys fs.FS) (*Vocabulary, error) { + patterns := []struct { + Pattern string + Func func(fs.FS) (*Vocabulary, error) + }{ + {"tokenizer.model", parseSentencePiece}, + {"tokenizer.json", parseVocabularyFromTokenizer}, } - return pre, tokens, t.Model.Merges, nil + for _, pattern := range patterns { + if _, err := fs.Stat(fsys, pattern.Pattern); errors.Is(err, os.ErrNotExist) { + continue + } else if err != nil { + return nil, err + } + + return pattern.Func(fsys) + } + + return nil, errors.New("unknown tensor format") +} + +type SpecialVocabulary struct { + Type string + ID int + Content string + AddToken bool +} + +func (sv SpecialVocabulary) Key() string { + switch t := sv.Type; t { + case "bos", "eos", "cls", "mask": + return t + case "unk": + return "unknown" + case "sep": + //nolint:misspell // this is an upstream typo + return "seperator" + case "pad": + return "padding" + } + + panic("unknown special vocabulary type") } diff --git a/convert/tokenizer_spm.go b/convert/tokenizer_spm.go new file mode 100644 index 00000000..babf702c --- /dev/null +++ b/convert/tokenizer_spm.go @@ -0,0 +1,83 @@ +package convert + +import ( + "cmp" + "encoding/json" + "errors" + "fmt" + "io/fs" + "os" + "slices" + + "google.golang.org/protobuf/proto" + + "github.com/ollama/ollama/convert/sentencepiece" +) + +func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) { + bts, err := fs.ReadFile(fsys, "tokenizer.model") + if err != nil { + return nil, err + } + + var spm sentencepiece.ModelProto + if err := proto.Unmarshal(bts, &spm); err != nil { + return nil, err + } + + v := Vocabulary{Model: "llama"} + for _, piece := range spm.GetPieces() { + v.Tokens = append(v.Tokens, piece.GetPiece()) + v.Scores = append(v.Scores, piece.GetScore()) + + switch t := piece.GetType(); t { + case sentencepiece.ModelProto_SentencePiece_UNKNOWN, + sentencepiece.ModelProto_SentencePiece_CONTROL, + sentencepiece.ModelProto_SentencePiece_UNUSED, + sentencepiece.ModelProto_SentencePiece_BYTE: + v.Types = append(v.Types, int32(t)) + default: + v.Types = append(v.Types, int32(sentencepiece.ModelProto_SentencePiece_NORMAL)) + } + } + + f, err := fsys.Open("added_tokens.json") + if errors.Is(err, os.ErrNotExist) { + return &v, nil + } else if err != nil { + return nil, err + } + defer f.Close() + + var atm map[string]int + if err := json.NewDecoder(f).Decode(&atm); err != nil { + return nil, err + } + + type t struct { + id int + content string + } + + var ts []t + for content, id := range atm { + ts = append(ts, t{id, content}) + } + + slices.SortFunc(ts, func(i, j t) int { + return cmp.Compare(i.id, j.id) + }) + + n := len(v.Tokens) + for i, t := range ts { + if t.id != i+n { + return nil, fmt.Errorf("invalid token id: %d", t.id) + } + + v.Tokens = append(v.Tokens, t.content) + v.Scores = append(v.Scores, -1000.0) + v.Types = append(v.Types, tokenTypeUserDefined) + } + + return &v, nil +} diff --git a/convert/torch.go b/convert/torch.go deleted file mode 100644 index 55414adc..00000000 --- a/convert/torch.go +++ /dev/null @@ -1,287 +0,0 @@ -package convert - -import ( - "encoding/binary" - "encoding/json" - "fmt" - "io" - "log/slog" - "os" - "path/filepath" - "regexp" - "strings" - - "github.com/nlpodyssey/gopickle/pytorch" - "github.com/nlpodyssey/gopickle/types" - "github.com/x448/float16" - - "github.com/ollama/ollama/llm" -) - -type torchWriterTo struct { - t *llm.Tensor - - params *Params - bo ByteOrder - - storage pytorch.StorageInterface - repacker func(string, []float32, []uint64) ([]float32, error) -} - -type TorchFormat struct{} - -func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) { - slog.Debug("getting torch tensors") - - var files []string - if pt, _ := filepath.Glob(filepath.Join(dirpath, "consolidated*.pth")); len(pt) > 0 { - files = append(files, pt...) - } else if pt, _ := filepath.Glob(filepath.Join(dirpath, "pytorch_model*.pth")); len(pt) > 0 { - files = append(files, pt...) - } - - var offset uint64 - var tensors []llm.Tensor - for _, fn := range files { - m, err := pytorch.Load(fn) - if err != nil { - slog.Error(fmt.Sprintf("error unpickling: %q", err)) - return []llm.Tensor{}, err - } - - for _, k := range m.(*types.Dict).Keys() { - if strings.HasSuffix(k.(string), "self_attn.rotary_emb.inv_freq") { - continue - } - - t, _ := m.(*types.Dict).Get(k) - tshape := t.(*pytorch.Tensor).Size - - var size uint64 - var kind uint32 - switch len(tshape) { - case 0: - continue - case 1: - // convert to float32 - kind = 0 - size = uint64(tshape[0] * 4) - case 2: - // convert to float16 - kind = 1 - size = uint64(tshape[0] * tshape[1] * 2) - } - - ggufName, err := tf.GetLayerName(k.(string)) - if err != nil { - slog.Error(err.Error()) - return nil, err - } - slog.Debug(fmt.Sprintf("'%35s': '%30s' %10d [%#v]", k.(string), ggufName, size, tshape)) - - shape := []uint64{0, 0, 0, 0} - for i := range tshape { - shape[i] = uint64(tshape[i]) - } - - tensor := llm.Tensor{ - Name: ggufName, - Kind: kind, - Offset: offset, // calculate the offset - Shape: shape, - } - - tensor.WriterTo = torchWriterTo{ - t: &tensor, - params: params, - bo: params.ByteOrder, - storage: t.(*pytorch.Tensor).Source, - } - - tensors = append(tensors, tensor) - offset += size - } - } - - return tensors, nil -} - -func getAltParams(dirpath string) (*Params, error) { - f, err := os.Open(filepath.Join(dirpath, "params.json")) - if err != nil { - slog.Error("no params.json") - return nil, err - } - defer f.Close() - - type TorchParams struct { - HiddenSize int `json:"dim"` - AttentionHeads int `json:"n_heads"` - KeyValHeads int `json:"n_kv_heads"` - HiddenLayers int `json:"n_layers"` - RopeTheta float64 `json:"rope_theta"` - NormEPS float64 `json:"norm_eps"` - } - - var tparams TorchParams - - d := json.NewDecoder(f) - err = d.Decode(&tparams) - if err != nil { - return nil, err - } - - params := &Params{ - Architectures: []string{"LlamaForCausalLM"}, - HiddenSize: tparams.HiddenSize, - AttentionHeads: tparams.AttentionHeads, - KeyValHeads: tparams.KeyValHeads, - HiddenLayers: tparams.HiddenLayers, - NormEPS: tparams.NormEPS, - } - - switch { - case tparams.RopeTheta == 1000000: - // Codellama - params.ContextSize = 16384 - case tparams.NormEPS == 1e-06: - // llama2 - slog.Debug("Found llama2 - setting context size to 4096") - params.ContextSize = 4096 - default: - params.ContextSize = 2048 - } - - params.ByteOrder = binary.LittleEndian - return params, nil -} - -func (m *TorchFormat) GetParams(dirpath string) (*Params, error) { - f, err := os.Open(filepath.Join(dirpath, "config.json")) - if err != nil { - if os.IsNotExist(err) { - // try params.json instead - return getAltParams(dirpath) - } else { - return nil, err - } - } - - var params Params - d := json.NewDecoder(f) - err = d.Decode(¶ms) - if err != nil { - return nil, err - } - - params.ByteOrder = binary.LittleEndian - return ¶ms, nil -} - -func (m *TorchFormat) GetLayerName(n string) (string, error) { - directMap := map[string]string{ - "tok_embeddings.weight": "token_embd.weight", - "output.weight": "output.weight", - "norm.weight": "output_norm.weight", - "rope.freqs": "rope_freqs.weight", - "model.embed_tokens.weight": "token_embd.weight", - "lm_head.weight": "output.weight", - "model.norm.weight": "output_norm.weight", - } - - lMap := map[string]string{ - "layers.(\\d+).attention_norm.weight": "blk.$1.attn_norm.weight", - "layers.(\\d+).attention_output_norm.weight": "blk.$1.attn_norm.weight", - "layers.(\\d+).feed_forward.w2.weight": "blk.$1.ffn_down.weight", - "layers.(\\d+).feed_forward.w1.weight": "blk.$1.ffn_gate.weight", - "layers.(\\d+).feed_forward.w3.weight": "blk.$1.ffn_up.weight", - "layers.(\\d+).ffn_norm.weight": "blk.$1.ffn_norm.weight", - "layers.(\\d+).attention.wk.weight": "blk.$1.attn_k.weight", - "layers.(\\d+).attention.wo.weight": "blk.$1.attn_output.weight", - "layers.(\\d+).attention.wq.weight": "blk.$1.attn_q.weight", - "layers.(\\d+).attention.wv.weight": "blk.$1.attn_v.weight", - "model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight", - "model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight", - "model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight", - "model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight", - "model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight", - "model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight", - "model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight", - "model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight", - "model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight", - } - - v, ok := directMap[n] - if ok { - return v, nil - } - - // quick hack to rename the layers to gguf format - for k, v := range lMap { - re := regexp.MustCompile(k) - newName := re.ReplaceAllString(n, v) - if newName != n { - return newName, nil - } - } - - return "", fmt.Errorf("couldn't find a layer name for '%s'", n) -} - -func (r torchWriterTo) WriteTo(w io.Writer) (n int64, err error) { - var f32s []float32 - switch s := r.storage.(type) { - case *pytorch.FloatStorage: - f32s = s.Data - case *pytorch.HalfStorage: - f32s = s.Data - case *pytorch.BFloat16Storage: - f32s = s.Data - default: - return 0, fmt.Errorf("unknown data type: %T", s) - } - - if r.repacker != nil { - f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape) - if err != nil { - return 0, err - } - } - - switch r.t.Kind { - case 0: - return 0, binary.Write(w, r.bo, f32s) - case 1: - f16s := make([]uint16, len(f32s)) - for i := range f32s { - f16s[i] = float16.Fromfloat32(f32s[i]).Bits() - } - - return 0, binary.Write(w, r.bo, f16s) - default: - return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind) - } -} - -func (m *TorchFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) { - switch len(params.Architectures) { - case 0: - return nil, fmt.Errorf("No architecture specified to convert") - case 1: - switch params.Architectures[0] { - case "LlamaForCausalLM": - return &LlamaModel{ - ModelData{ - Name: name, - Path: dirPath, - Params: params, - Format: m, - }, - }, nil - default: - return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0]) - } - } - - return nil, fmt.Errorf("Unknown error") -} diff --git a/docs/openai.md b/docs/openai.md index fee30f71..b4443cb0 100644 --- a/docs/openai.md +++ b/docs/openai.md @@ -27,6 +27,15 @@ chat_completion = client.chat.completions.create( ], model='llama3', ) + +list_completion = client.models.list() + +model = client.models.retrieve("llama3") + +embeddings = client.embeddings.create( + model="all-minilm", + input=["why is the sky blue?", "why is the grass green?"] +) ``` ### OpenAI JavaScript library @@ -45,6 +54,15 @@ const chatCompletion = await openai.chat.completions.create({ messages: [{ role: 'user', content: 'Say this is a test' }], model: 'llama3', }) + +const listCompletion = await openai.models.list() + +const model = await openai.models.retrieve("llama3"); + +const embedding = await openai.embeddings.create({ + model: "all-minilm", + input: ["why is the sky blue?", "why is the grass green?"], +}); ``` ### `curl` @@ -66,6 +84,16 @@ curl http://localhost:11434/v1/chat/completions \ ] }' +curl http://localhost:11434/v1/models + +curl http://localhost:11434/v1/models/llama3 + +curl http://localhost:11434/v1/embeddings \ + -H "Content-Type: application/json" \ + -d '{ + "model": "all-minilm", + "input": ["why is the sky blue?", "why is the grass green?"] + }' ``` ## Endpoints @@ -103,6 +131,34 @@ curl http://localhost:11434/v1/chat/completions \ - [ ] `user` - [ ] `n` +### `/v1/models` + +#### Notes + +- `created` corresponds to when the model was last modified +- `owned_by` corresponds to the ollama username, defaulting to `"library"` + +### `/v1/models/{model}` + +#### Notes + +- `created` corresponds to when the model was last modified +- `owned_by` corresponds to the ollama username, defaulting to `"library"` + +### `/v1/embeddings` + +#### Supported request fields + +- [x] `model` +- [x] `input` + - [x] string + - [x] array of strings + - [ ] array of tokens + - [ ] array of token arrays +- [ ] `encoding format` +- [ ] `dimensions` +- [ ] `user` + ## Models Before using a model, pull it locally `ollama pull`: diff --git a/envconfig/config.go b/envconfig/config.go index 0abc6968..b82b773d 100644 --- a/envconfig/config.go +++ b/envconfig/config.go @@ -1,11 +1,11 @@ package envconfig import ( - "errors" "fmt" "log/slog" "math" "net" + "net/url" "os" "path/filepath" "runtime" @@ -14,296 +14,16 @@ import ( "time" ) -type OllamaHost struct { - Scheme string - Host string - Port string -} - -func (o OllamaHost) String() string { - return fmt.Sprintf("%s://%s:%s", o.Scheme, o.Host, o.Port) -} - -var ErrInvalidHostPort = errors.New("invalid port specified in OLLAMA_HOST") - -var ( - // Set via OLLAMA_ORIGINS in the environment - AllowOrigins []string - // Set via OLLAMA_DEBUG in the environment - Debug bool - // Experimental flash attention - FlashAttention bool - // Set via OLLAMA_HOST in the environment - Host *OllamaHost - // Set via OLLAMA_KEEP_ALIVE in the environment - KeepAlive time.Duration - // Set via OLLAMA_LLM_LIBRARY in the environment - LLMLibrary string - // Set via OLLAMA_MAX_LOADED_MODELS in the environment - MaxRunners int - // Set via OLLAMA_MAX_QUEUE in the environment - MaxQueuedRequests int - // Set via OLLAMA_MODELS in the environment - ModelsDir string - // Set via OLLAMA_NOHISTORY in the environment - NoHistory bool - // Set via OLLAMA_NOPRUNE in the environment - NoPrune bool - // Set via OLLAMA_NUM_PARALLEL in the environment - NumParallel int - // Set via OLLAMA_RUNNERS_DIR in the environment - RunnersDir string - // Set via OLLAMA_SCHED_SPREAD in the environment - SchedSpread bool - // Set via OLLAMA_TMPDIR in the environment - TmpDir string - // Set via OLLAMA_INTEL_GPU in the environment - IntelGpu bool - - // Set via CUDA_VISIBLE_DEVICES in the environment - CudaVisibleDevices string - // Set via HIP_VISIBLE_DEVICES in the environment - HipVisibleDevices string - // Set via ROCR_VISIBLE_DEVICES in the environment - RocrVisibleDevices string - // Set via GPU_DEVICE_ORDINAL in the environment - GpuDeviceOrdinal string - // Set via HSA_OVERRIDE_GFX_VERSION in the environment - HsaOverrideGfxVersion string -) - -type EnvVar struct { - Name string - Value any - Description string -} - -func AsMap() map[string]EnvVar { - ret := map[string]EnvVar{ - "OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug, "Show additional debug information (e.g. OLLAMA_DEBUG=1)"}, - "OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention, "Enabled flash attention"}, - "OLLAMA_HOST": {"OLLAMA_HOST", Host, "IP Address for the ollama server (default 127.0.0.1:11434)"}, - "OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive, "The duration that models stay loaded in memory (default \"5m\")"}, - "OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary, "Set LLM library to bypass autodetection"}, - "OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners, "Maximum number of loaded models per GPU"}, - "OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueuedRequests, "Maximum number of queued requests"}, - "OLLAMA_MODELS": {"OLLAMA_MODELS", ModelsDir, "The path to the models directory"}, - "OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory, "Do not preserve readline history"}, - "OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune, "Do not prune model blobs on startup"}, - "OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel, "Maximum number of parallel requests"}, - "OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowOrigins, "A comma separated list of allowed origins"}, - "OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir, "Location for runners"}, - "OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread, "Always schedule model across all GPUs"}, - "OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir, "Location for temporary files"}, - } - if runtime.GOOS != "darwin" { - ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices, "Set which NVIDIA devices are visible"} - ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices, "Set which AMD devices are visible"} - ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices, "Set which AMD devices are visible"} - ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal, "Set which AMD devices are visible"} - ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion, "Override the gfx used for all detected AMD GPUs"} - ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGpu, "Enable experimental Intel GPU detection"} - } - return ret -} - -func Values() map[string]string { - vals := make(map[string]string) - for k, v := range AsMap() { - vals[k] = fmt.Sprintf("%v", v.Value) - } - return vals -} - -var defaultAllowOrigins = []string{ - "localhost", - "127.0.0.1", - "0.0.0.0", -} - -// Clean quotes and spaces from the value -func clean(key string) string { - return strings.Trim(os.Getenv(key), "\"' ") -} - -func init() { - // default values - NumParallel = 0 // Autoselect - MaxRunners = 0 // Autoselect - MaxQueuedRequests = 512 - KeepAlive = 5 * time.Minute - - LoadConfig() -} - -func LoadConfig() { - if debug := clean("OLLAMA_DEBUG"); debug != "" { - d, err := strconv.ParseBool(debug) - if err == nil { - Debug = d - } else { - Debug = true - } - } - - if fa := clean("OLLAMA_FLASH_ATTENTION"); fa != "" { - d, err := strconv.ParseBool(fa) - if err == nil { - FlashAttention = d - } - } - - RunnersDir = clean("OLLAMA_RUNNERS_DIR") - if runtime.GOOS == "windows" && RunnersDir == "" { - // On Windows we do not carry the payloads inside the main executable - appExe, err := os.Executable() - if err != nil { - slog.Error("failed to lookup executable path", "error", err) - } - - cwd, err := os.Getwd() - if err != nil { - slog.Error("failed to lookup working directory", "error", err) - } - - var paths []string - for _, root := range []string{filepath.Dir(appExe), cwd} { - paths = append(paths, - root, - filepath.Join(root, "windows-"+runtime.GOARCH), - filepath.Join(root, "dist", "windows-"+runtime.GOARCH), - ) - } - - // Try a few variations to improve developer experience when building from source in the local tree - for _, p := range paths { - candidate := filepath.Join(p, "ollama_runners") - _, err := os.Stat(candidate) - if err == nil { - RunnersDir = candidate - break - } - } - if RunnersDir == "" { - slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'") - } - } - - TmpDir = clean("OLLAMA_TMPDIR") - - LLMLibrary = clean("OLLAMA_LLM_LIBRARY") - - if onp := clean("OLLAMA_NUM_PARALLEL"); onp != "" { - val, err := strconv.Atoi(onp) - if err != nil { - slog.Error("invalid setting, ignoring", "OLLAMA_NUM_PARALLEL", onp, "error", err) - } else { - NumParallel = val - } - } - - if nohistory := clean("OLLAMA_NOHISTORY"); nohistory != "" { - NoHistory = true - } - - if spread := clean("OLLAMA_SCHED_SPREAD"); spread != "" { - s, err := strconv.ParseBool(spread) - if err == nil { - SchedSpread = s - } else { - SchedSpread = true - } - } - - if noprune := clean("OLLAMA_NOPRUNE"); noprune != "" { - NoPrune = true - } - - if origins := clean("OLLAMA_ORIGINS"); origins != "" { - AllowOrigins = strings.Split(origins, ",") - } - for _, allowOrigin := range defaultAllowOrigins { - AllowOrigins = append(AllowOrigins, - fmt.Sprintf("http://%s", allowOrigin), - fmt.Sprintf("https://%s", allowOrigin), - fmt.Sprintf("http://%s", net.JoinHostPort(allowOrigin, "*")), - fmt.Sprintf("https://%s", net.JoinHostPort(allowOrigin, "*")), - ) - } - - AllowOrigins = append(AllowOrigins, - "app://*", - "file://*", - "tauri://*", - ) - - maxRunners := clean("OLLAMA_MAX_LOADED_MODELS") - if maxRunners != "" { - m, err := strconv.Atoi(maxRunners) - if err != nil { - slog.Error("invalid setting, ignoring", "OLLAMA_MAX_LOADED_MODELS", maxRunners, "error", err) - } else { - MaxRunners = m - } - } - - if onp := os.Getenv("OLLAMA_MAX_QUEUE"); onp != "" { - p, err := strconv.Atoi(onp) - if err != nil || p <= 0 { - slog.Error("invalid setting, ignoring", "OLLAMA_MAX_QUEUE", onp, "error", err) - } else { - MaxQueuedRequests = p - } - } - - ka := clean("OLLAMA_KEEP_ALIVE") - if ka != "" { - loadKeepAlive(ka) - } - - var err error - ModelsDir, err = getModelsDir() - if err != nil { - slog.Error("invalid setting", "OLLAMA_MODELS", ModelsDir, "error", err) - } - - Host, err = getOllamaHost() - if err != nil { - slog.Error("invalid setting", "OLLAMA_HOST", Host, "error", err, "using default port", Host.Port) - } - - if set, err := strconv.ParseBool(clean("OLLAMA_INTEL_GPU")); err == nil { - IntelGpu = set - } - - CudaVisibleDevices = clean("CUDA_VISIBLE_DEVICES") - HipVisibleDevices = clean("HIP_VISIBLE_DEVICES") - RocrVisibleDevices = clean("ROCR_VISIBLE_DEVICES") - GpuDeviceOrdinal = clean("GPU_DEVICE_ORDINAL") - HsaOverrideGfxVersion = clean("HSA_OVERRIDE_GFX_VERSION") -} - -func getModelsDir() (string, error) { - if models, exists := os.LookupEnv("OLLAMA_MODELS"); exists { - return models, nil - } - home, err := os.UserHomeDir() - if err != nil { - return "", err - } - return filepath.Join(home, ".ollama", "models"), nil -} - -func getOllamaHost() (*OllamaHost, error) { +// Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable. +// Default is scheme "http" and host "127.0.0.1:11434" +func Host() *url.URL { defaultPort := "11434" - hostVar := os.Getenv("OLLAMA_HOST") - hostVar = strings.TrimSpace(strings.Trim(strings.TrimSpace(hostVar), "\"'")) - - scheme, hostport, ok := strings.Cut(hostVar, "://") + s := strings.TrimSpace(Var("OLLAMA_HOST")) + scheme, hostport, ok := strings.Cut(s, "://") switch { case !ok: - scheme, hostport = "http", hostVar + scheme, hostport = "http", s case scheme == "http": defaultPort = "80" case scheme == "https": @@ -323,38 +43,242 @@ func getOllamaHost() (*OllamaHost, error) { } } - if portNum, err := strconv.ParseInt(port, 10, 32); err != nil || portNum > 65535 || portNum < 0 { - return &OllamaHost{ + if n, err := strconv.ParseInt(port, 10, 32); err != nil || n > 65535 || n < 0 { + slog.Warn("invalid port, using default", "port", port, "default", defaultPort) + return &url.URL{ Scheme: scheme, - Host: host, - Port: defaultPort, - }, ErrInvalidHostPort + Host: net.JoinHostPort(host, defaultPort), + } } - return &OllamaHost{ + return &url.URL{ Scheme: scheme, - Host: host, - Port: port, - }, nil + Host: net.JoinHostPort(host, port), + } } -func loadKeepAlive(ka string) { - v, err := strconv.Atoi(ka) +// Origins returns a list of allowed origins. Origins can be configured via the OLLAMA_ORIGINS environment variable. +func Origins() (origins []string) { + if s := Var("OLLAMA_ORIGINS"); s != "" { + origins = strings.Split(s, ",") + } + + for _, origin := range []string{"localhost", "127.0.0.1", "0.0.0.0"} { + origins = append(origins, + fmt.Sprintf("http://%s", origin), + fmt.Sprintf("https://%s", origin), + fmt.Sprintf("http://%s", net.JoinHostPort(origin, "*")), + fmt.Sprintf("https://%s", net.JoinHostPort(origin, "*")), + ) + } + + origins = append(origins, + "app://*", + "file://*", + "tauri://*", + ) + + return origins +} + +// Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable. +// Default is $HOME/.ollama/models +func Models() string { + if s := Var("OLLAMA_MODELS"); s != "" { + return s + } + + home, err := os.UserHomeDir() if err != nil { - d, err := time.ParseDuration(ka) - if err == nil { - if d < 0 { - KeepAlive = time.Duration(math.MaxInt64) + panic(err) + } + + return filepath.Join(home, ".ollama", "models") +} + +// KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable. +// Negative values are treated as infinite. Zero is treated as no keep alive. +// Default is 5 minutes. +func KeepAlive() (keepAlive time.Duration) { + keepAlive = 5 * time.Minute + if s := Var("OLLAMA_KEEP_ALIVE"); s != "" { + if d, err := time.ParseDuration(s); err == nil { + keepAlive = d + } else if n, err := strconv.ParseInt(s, 10, 64); err == nil { + keepAlive = time.Duration(n) * time.Second + } + } + + if keepAlive < 0 { + return time.Duration(math.MaxInt64) + } + + return keepAlive +} + +func Bool(k string) func() bool { + return func() bool { + if s := Var(k); s != "" { + b, err := strconv.ParseBool(s) + if err != nil { + return true + } + + return b + } + + return false + } +} + +var ( + // Debug enabled additional debug information. + Debug = Bool("OLLAMA_DEBUG") + // FlashAttention enables the experimental flash attention feature. + FlashAttention = Bool("OLLAMA_FLASH_ATTENTION") + // NoHistory disables readline history. + NoHistory = Bool("OLLAMA_NOHISTORY") + // NoPrune disables pruning of model blobs on startup. + NoPrune = Bool("OLLAMA_NOPRUNE") + // SchedSpread allows scheduling models across all GPUs. + SchedSpread = Bool("OLLAMA_SCHED_SPREAD") + // IntelGPU enables experimental Intel GPU detection. + IntelGPU = Bool("OLLAMA_INTEL_GPU") +) + +func String(s string) func() string { + return func() string { + return Var(s) + } +} + +var ( + LLMLibrary = String("OLLAMA_LLM_LIBRARY") + TmpDir = String("OLLAMA_TMPDIR") + + CudaVisibleDevices = String("CUDA_VISIBLE_DEVICES") + HipVisibleDevices = String("HIP_VISIBLE_DEVICES") + RocrVisibleDevices = String("ROCR_VISIBLE_DEVICES") + GpuDeviceOrdinal = String("GPU_DEVICE_ORDINAL") + HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION") +) + +func RunnersDir() (p string) { + if p := Var("OLLAMA_RUNNERS_DIR"); p != "" { + return p + } + + if runtime.GOOS != "windows" { + return + } + + defer func() { + if p == "" { + slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'") + } + }() + + // On Windows we do not carry the payloads inside the main executable + exe, err := os.Executable() + if err != nil { + return + } + + cwd, err := os.Getwd() + if err != nil { + return + } + + var paths []string + for _, root := range []string{filepath.Dir(exe), cwd} { + paths = append(paths, + root, + filepath.Join(root, "windows-"+runtime.GOARCH), + filepath.Join(root, "dist", "windows-"+runtime.GOARCH), + ) + } + + // Try a few variations to improve developer experience when building from source in the local tree + for _, path := range paths { + candidate := filepath.Join(path, "ollama_runners") + if _, err := os.Stat(candidate); err == nil { + p = candidate + break + } + } + + return p +} + +func Uint(key string, defaultValue uint) func() uint { + return func() uint { + if s := Var(key); s != "" { + if n, err := strconv.ParseUint(s, 10, 64); err != nil { + slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue) } else { - KeepAlive = d + return uint(n) } } - } else { - d := time.Duration(v) * time.Second - if d < 0 { - KeepAlive = time.Duration(math.MaxInt64) - } else { - KeepAlive = d - } + + return defaultValue } } + +var ( + // NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable. + NumParallel = Uint("OLLAMA_NUM_PARALLEL", 0) + // MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable. + MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0) + // MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable. + MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512) + // MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable. + MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0) +) + +type EnvVar struct { + Name string + Value any + Description string +} + +func AsMap() map[string]EnvVar { + ret := map[string]EnvVar{ + "OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"}, + "OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"}, + "OLLAMA_HOST": {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"}, + "OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"}, + "OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"}, + "OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"}, + "OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"}, + "OLLAMA_MODELS": {"OLLAMA_MODELS", Models(), "The path to the models directory"}, + "OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"}, + "OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"}, + "OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"}, + "OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"}, + "OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir(), "Location for runners"}, + "OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"}, + "OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"}, + } + if runtime.GOOS != "darwin" { + ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"} + ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible"} + ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible"} + ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible"} + ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"} + ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"} + } + return ret +} + +func Values() map[string]string { + vals := make(map[string]string) + for k, v := range AsMap() { + vals[k] = fmt.Sprintf("%v", v.Value) + } + return vals +} + +// Var returns an environment variable stripped of leading and trailing quotes or spaces +func Var(key string) string { + return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'") +} diff --git a/envconfig/config_test.go b/envconfig/config_test.go index a5d73fd7..92a500f1 100644 --- a/envconfig/config_test.go +++ b/envconfig/config_test.go @@ -1,87 +1,234 @@ package envconfig import ( - "fmt" "math" - "net" "testing" "time" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" + "github.com/google/go-cmp/cmp" ) -func TestConfig(t *testing.T) { - Debug = false // Reset whatever was loaded in init() - t.Setenv("OLLAMA_DEBUG", "") - LoadConfig() - require.False(t, Debug) - t.Setenv("OLLAMA_DEBUG", "false") - LoadConfig() - require.False(t, Debug) - t.Setenv("OLLAMA_DEBUG", "1") - LoadConfig() - require.True(t, Debug) - t.Setenv("OLLAMA_FLASH_ATTENTION", "1") - LoadConfig() - require.True(t, FlashAttention) - t.Setenv("OLLAMA_KEEP_ALIVE", "") - LoadConfig() - require.Equal(t, 5*time.Minute, KeepAlive) - t.Setenv("OLLAMA_KEEP_ALIVE", "3") - LoadConfig() - require.Equal(t, 3*time.Second, KeepAlive) - t.Setenv("OLLAMA_KEEP_ALIVE", "1h") - LoadConfig() - require.Equal(t, 1*time.Hour, KeepAlive) - t.Setenv("OLLAMA_KEEP_ALIVE", "-1s") - LoadConfig() - require.Equal(t, time.Duration(math.MaxInt64), KeepAlive) - t.Setenv("OLLAMA_KEEP_ALIVE", "-1") - LoadConfig() - require.Equal(t, time.Duration(math.MaxInt64), KeepAlive) -} - -func TestClientFromEnvironment(t *testing.T) { - type testCase struct { +func TestHost(t *testing.T) { + cases := map[string]struct { value string expect string - err error + }{ + "empty": {"", "127.0.0.1:11434"}, + "only address": {"1.2.3.4", "1.2.3.4:11434"}, + "only port": {":1234", ":1234"}, + "address and port": {"1.2.3.4:1234", "1.2.3.4:1234"}, + "hostname": {"example.com", "example.com:11434"}, + "hostname and port": {"example.com:1234", "example.com:1234"}, + "zero port": {":0", ":0"}, + "too large port": {":66000", ":11434"}, + "too small port": {":-1", ":11434"}, + "ipv6 localhost": {"[::1]", "[::1]:11434"}, + "ipv6 world open": {"[::]", "[::]:11434"}, + "ipv6 no brackets": {"::1", "[::1]:11434"}, + "ipv6 + port": {"[::1]:1337", "[::1]:1337"}, + "extra space": {" 1.2.3.4 ", "1.2.3.4:11434"}, + "extra quotes": {"\"1.2.3.4\"", "1.2.3.4:11434"}, + "extra space+quotes": {" \" 1.2.3.4 \" ", "1.2.3.4:11434"}, + "extra single quotes": {"'1.2.3.4'", "1.2.3.4:11434"}, + "http": {"http://1.2.3.4", "1.2.3.4:80"}, + "http port": {"http://1.2.3.4:4321", "1.2.3.4:4321"}, + "https": {"https://1.2.3.4", "1.2.3.4:443"}, + "https port": {"https://1.2.3.4:4321", "1.2.3.4:4321"}, } - hostTestCases := map[string]*testCase{ - "empty": {value: "", expect: "127.0.0.1:11434"}, - "only address": {value: "1.2.3.4", expect: "1.2.3.4:11434"}, - "only port": {value: ":1234", expect: ":1234"}, - "address and port": {value: "1.2.3.4:1234", expect: "1.2.3.4:1234"}, - "hostname": {value: "example.com", expect: "example.com:11434"}, - "hostname and port": {value: "example.com:1234", expect: "example.com:1234"}, - "zero port": {value: ":0", expect: ":0"}, - "too large port": {value: ":66000", err: ErrInvalidHostPort}, - "too small port": {value: ":-1", err: ErrInvalidHostPort}, - "ipv6 localhost": {value: "[::1]", expect: "[::1]:11434"}, - "ipv6 world open": {value: "[::]", expect: "[::]:11434"}, - "ipv6 no brackets": {value: "::1", expect: "[::1]:11434"}, - "ipv6 + port": {value: "[::1]:1337", expect: "[::1]:1337"}, - "extra space": {value: " 1.2.3.4 ", expect: "1.2.3.4:11434"}, - "extra quotes": {value: "\"1.2.3.4\"", expect: "1.2.3.4:11434"}, - "extra space+quotes": {value: " \" 1.2.3.4 \" ", expect: "1.2.3.4:11434"}, - "extra single quotes": {value: "'1.2.3.4'", expect: "1.2.3.4:11434"}, - } - - for k, v := range hostTestCases { - t.Run(k, func(t *testing.T) { - t.Setenv("OLLAMA_HOST", v.value) - LoadConfig() - - oh, err := getOllamaHost() - if err != v.err { - t.Fatalf("expected %s, got %s", v.err, err) - } - - if err == nil { - host := net.JoinHostPort(oh.Host, oh.Port) - assert.Equal(t, v.expect, host, fmt.Sprintf("%s: expected %s, got %s", k, v.expect, host)) + for name, tt := range cases { + t.Run(name, func(t *testing.T) { + t.Setenv("OLLAMA_HOST", tt.value) + if host := Host(); host.Host != tt.expect { + t.Errorf("%s: expected %s, got %s", name, tt.expect, host.Host) + } + }) + } +} + +func TestOrigins(t *testing.T) { + cases := []struct { + value string + expect []string + }{ + {"", []string{ + "http://localhost", + "https://localhost", + "http://localhost:*", + "https://localhost:*", + "http://127.0.0.1", + "https://127.0.0.1", + "http://127.0.0.1:*", + "https://127.0.0.1:*", + "http://0.0.0.0", + "https://0.0.0.0", + "http://0.0.0.0:*", + "https://0.0.0.0:*", + "app://*", + "file://*", + "tauri://*", + }}, + {"http://10.0.0.1", []string{ + "http://10.0.0.1", + "http://localhost", + "https://localhost", + "http://localhost:*", + "https://localhost:*", + "http://127.0.0.1", + "https://127.0.0.1", + "http://127.0.0.1:*", + "https://127.0.0.1:*", + "http://0.0.0.0", + "https://0.0.0.0", + "http://0.0.0.0:*", + "https://0.0.0.0:*", + "app://*", + "file://*", + "tauri://*", + }}, + {"http://172.16.0.1,https://192.168.0.1", []string{ + "http://172.16.0.1", + "https://192.168.0.1", + "http://localhost", + "https://localhost", + "http://localhost:*", + "https://localhost:*", + "http://127.0.0.1", + "https://127.0.0.1", + "http://127.0.0.1:*", + "https://127.0.0.1:*", + "http://0.0.0.0", + "https://0.0.0.0", + "http://0.0.0.0:*", + "https://0.0.0.0:*", + "app://*", + "file://*", + "tauri://*", + }}, + {"http://totally.safe,http://definitely.legit", []string{ + "http://totally.safe", + "http://definitely.legit", + "http://localhost", + "https://localhost", + "http://localhost:*", + "https://localhost:*", + "http://127.0.0.1", + "https://127.0.0.1", + "http://127.0.0.1:*", + "https://127.0.0.1:*", + "http://0.0.0.0", + "https://0.0.0.0", + "http://0.0.0.0:*", + "https://0.0.0.0:*", + "app://*", + "file://*", + "tauri://*", + }}, + } + for _, tt := range cases { + t.Run(tt.value, func(t *testing.T) { + t.Setenv("OLLAMA_ORIGINS", tt.value) + + if diff := cmp.Diff(Origins(), tt.expect); diff != "" { + t.Errorf("%s: mismatch (-want +got):\n%s", tt.value, diff) + } + }) + } +} + +func TestBool(t *testing.T) { + cases := map[string]bool{ + "": false, + "true": true, + "false": false, + "1": true, + "0": false, + // invalid values + "random": true, + "something": true, + } + + for k, v := range cases { + t.Run(k, func(t *testing.T) { + t.Setenv("OLLAMA_BOOL", k) + if b := Bool("OLLAMA_BOOL")(); b != v { + t.Errorf("%s: expected %t, got %t", k, v, b) + } + }) + } +} + +func TestUint(t *testing.T) { + cases := map[string]uint{ + "0": 0, + "1": 1, + "1337": 1337, + // default values + "": 11434, + "-1": 11434, + "0o10": 11434, + "0x10": 11434, + "string": 11434, + } + + for k, v := range cases { + t.Run(k, func(t *testing.T) { + t.Setenv("OLLAMA_UINT", k) + if i := Uint("OLLAMA_UINT", 11434)(); i != v { + t.Errorf("%s: expected %d, got %d", k, v, i) + } + }) + } +} + +func TestKeepAlive(t *testing.T) { + cases := map[string]time.Duration{ + "": 5 * time.Minute, + "1s": time.Second, + "1m": time.Minute, + "1h": time.Hour, + "5m0s": 5 * time.Minute, + "1h2m3s": 1*time.Hour + 2*time.Minute + 3*time.Second, + "0": time.Duration(0), + "60": 60 * time.Second, + "120": 2 * time.Minute, + "3600": time.Hour, + "-0": time.Duration(0), + "-1": time.Duration(math.MaxInt64), + "-1m": time.Duration(math.MaxInt64), + // invalid values + " ": 5 * time.Minute, + "???": 5 * time.Minute, + "1d": 5 * time.Minute, + "1y": 5 * time.Minute, + "1w": 5 * time.Minute, + } + + for tt, expect := range cases { + t.Run(tt, func(t *testing.T) { + t.Setenv("OLLAMA_KEEP_ALIVE", tt) + if actual := KeepAlive(); actual != expect { + t.Errorf("%s: expected %s, got %s", tt, expect, actual) + } + }) + } +} + +func TestVar(t *testing.T) { + cases := map[string]string{ + "value": "value", + " value ": "value", + " 'value' ": "value", + ` "value" `: "value", + " ' value ' ": " value ", + ` " value " `: " value ", + } + + for k, v := range cases { + t.Run(k, func(t *testing.T) { + t.Setenv("OLLAMA_VAR", k) + if s := Var("OLLAMA_VAR"); s != v { + t.Errorf("%s: expected %q, got %q", k, v, s) } }) } diff --git a/gpu/amd_linux.go b/gpu/amd_linux.go index 6493af9e..1ad4b906 100644 --- a/gpu/amd_linux.go +++ b/gpu/amd_linux.go @@ -61,9 +61,9 @@ func AMDGetGPUInfo() []RocmGPUInfo { // Determine if the user has already pre-selected which GPUs to look at, then ignore the others var visibleDevices []string - hipVD := envconfig.HipVisibleDevices // zero based index only - rocrVD := envconfig.RocrVisibleDevices // zero based index or UUID, but consumer cards seem to not support UUID - gpuDO := envconfig.GpuDeviceOrdinal // zero based index + hipVD := envconfig.HipVisibleDevices() // zero based index only + rocrVD := envconfig.RocrVisibleDevices() // zero based index or UUID, but consumer cards seem to not support UUID + gpuDO := envconfig.GpuDeviceOrdinal() // zero based index switch { // TODO is this priorty order right? case hipVD != "": @@ -76,7 +76,7 @@ func AMDGetGPUInfo() []RocmGPUInfo { visibleDevices = strings.Split(gpuDO, ",") } - gfxOverride := envconfig.HsaOverrideGfxVersion + gfxOverride := envconfig.HsaOverrideGfxVersion() var supported []string libDir := "" diff --git a/gpu/amd_windows.go b/gpu/amd_windows.go index 20aed447..a170dfdc 100644 --- a/gpu/amd_windows.go +++ b/gpu/amd_windows.go @@ -53,7 +53,7 @@ func AMDGetGPUInfo() []RocmGPUInfo { } var supported []string - gfxOverride := envconfig.HsaOverrideGfxVersion + gfxOverride := envconfig.HsaOverrideGfxVersion() if gfxOverride == "" { supported, err = GetSupportedGFX(libDir) if err != nil { diff --git a/gpu/assets.go b/gpu/assets.go index 073d2e81..39ff7c21 100644 --- a/gpu/assets.go +++ b/gpu/assets.go @@ -26,7 +26,7 @@ func PayloadsDir() (string, error) { defer lock.Unlock() var err error if payloadsDir == "" { - runnersDir := envconfig.RunnersDir + runnersDir := envconfig.RunnersDir() if runnersDir != "" { payloadsDir = runnersDir @@ -35,7 +35,7 @@ func PayloadsDir() (string, error) { // The remainder only applies on non-windows where we still carry payloads in the main executable cleanupTmpDirs() - tmpDir := envconfig.TmpDir + tmpDir := envconfig.TmpDir() if tmpDir == "" { tmpDir, err = os.MkdirTemp("", "ollama") if err != nil { @@ -105,7 +105,7 @@ func cleanupTmpDirs() { func Cleanup() { lock.Lock() defer lock.Unlock() - runnersDir := envconfig.RunnersDir + runnersDir := envconfig.RunnersDir() if payloadsDir != "" && runnersDir == "" && runtime.GOOS != "windows" { // We want to fully clean up the tmpdir parent of the payloads dir tmpDir := filepath.Clean(filepath.Join(payloadsDir, "..")) diff --git a/gpu/gpu.go b/gpu/gpu.go index 6e25cb46..acab1c8d 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -230,8 +230,8 @@ func GetGPUInfo() GpuInfoList { // On windows we bundle the nvidia library one level above the runner dir depPath := "" - if runtime.GOOS == "windows" && envconfig.RunnersDir != "" { - depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir), "cuda") + if runtime.GOOS == "windows" && envconfig.RunnersDir() != "" { + depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir()), "cuda") } // Load ALL libraries @@ -302,12 +302,12 @@ func GetGPUInfo() GpuInfoList { } // Intel - if envconfig.IntelGpu { + if envconfig.IntelGPU() { oHandles = initOneAPIHandles() // On windows we bundle the oneapi library one level above the runner dir depPath = "" - if runtime.GOOS == "windows" && envconfig.RunnersDir != "" { - depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir), "oneapi") + if runtime.GOOS == "windows" && envconfig.RunnersDir() != "" { + depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir()), "oneapi") } for d := range oHandles.oneapi.num_drivers { @@ -611,7 +611,7 @@ func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) { } func getVerboseState() C.uint16_t { - if envconfig.Debug { + if envconfig.Debug() { return C.uint16_t(1) } return C.uint16_t(0) diff --git a/integration/basic_test.go b/integration/basic_test.go index 6e632a1c..8e35b5c5 100644 --- a/integration/basic_test.go +++ b/integration/basic_test.go @@ -45,14 +45,7 @@ func TestUnicodeModelDir(t *testing.T) { defer os.RemoveAll(modelDir) slog.Info("unicode", "OLLAMA_MODELS", modelDir) - oldModelsDir := os.Getenv("OLLAMA_MODELS") - if oldModelsDir == "" { - defer os.Unsetenv("OLLAMA_MODELS") - } else { - defer os.Setenv("OLLAMA_MODELS", oldModelsDir) - } - err = os.Setenv("OLLAMA_MODELS", modelDir) - require.NoError(t, err) + t.Setenv("OLLAMA_MODELS", modelDir) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) defer cancel() diff --git a/integration/concurrency_test.go b/integration/concurrency_test.go index 8593285b..81d0b587 100644 --- a/integration/concurrency_test.go +++ b/integration/concurrency_test.go @@ -5,14 +5,16 @@ package integration import ( "context" "log/slog" - "os" "strconv" "sync" "testing" "time" - "github.com/ollama/ollama/api" "github.com/stretchr/testify/require" + + "github.com/ollama/ollama/api" + "github.com/ollama/ollama/envconfig" + "github.com/ollama/ollama/format" ) func TestMultiModelConcurrency(t *testing.T) { @@ -106,13 +108,16 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) { // Stress the system if we know how much VRAM it has, and attempt to load more models than will fit func TestMultiModelStress(t *testing.T) { - vram := os.Getenv("OLLAMA_MAX_VRAM") // TODO - discover actual VRAM - if vram == "" { + s := os.Getenv("OLLAMA_MAX_VRAM") // TODO - discover actual VRAM + if s == "" { t.Skip("OLLAMA_MAX_VRAM not specified, can't pick the right models for the stress test") } - max, err := strconv.ParseUint(vram, 10, 64) - require.NoError(t, err) - const MB = uint64(1024 * 1024) + + maxVram, err := strconv.ParseUint(s, 10, 64) + if err != nil { + t.Fatal(err) + } + type model struct { name string size uint64 // Approximate amount of VRAM they typically use when fully loaded in VRAM @@ -121,83 +126,82 @@ func TestMultiModelStress(t *testing.T) { smallModels := []model{ { name: "orca-mini", - size: 2992 * MB, + size: 2992 * format.MebiByte, }, { name: "phi", - size: 2616 * MB, + size: 2616 * format.MebiByte, }, { name: "gemma:2b", - size: 2364 * MB, + size: 2364 * format.MebiByte, }, { name: "stable-code:3b", - size: 2608 * MB, + size: 2608 * format.MebiByte, }, { name: "starcoder2:3b", - size: 2166 * MB, + size: 2166 * format.MebiByte, }, } mediumModels := []model{ { name: "llama2", - size: 5118 * MB, + size: 5118 * format.MebiByte, }, { name: "mistral", - size: 4620 * MB, + size: 4620 * format.MebiByte, }, { name: "orca-mini:7b", - size: 5118 * MB, + size: 5118 * format.MebiByte, }, { name: "dolphin-mistral", - size: 4620 * MB, + size: 4620 * format.MebiByte, }, { name: "gemma:7b", - size: 5000 * MB, + size: 5000 * format.MebiByte, + }, + { + name: "codellama:7b", + size: 5118 * format.MebiByte, }, - // TODO - uncomment this once #3565 is merged and this is rebased on it - // { - // name: "codellama:7b", - // size: 5118 * MB, - // }, } // These seem to be too slow to be useful... // largeModels := []model{ // { // name: "llama2:13b", - // size: 7400 * MB, + // size: 7400 * format.MebiByte, // }, // { // name: "codellama:13b", - // size: 7400 * MB, + // size: 7400 * format.MebiByte, // }, // { // name: "orca-mini:13b", - // size: 7400 * MB, + // size: 7400 * format.MebiByte, // }, // { // name: "gemma:7b", - // size: 5000 * MB, + // size: 5000 * format.MebiByte, // }, // { // name: "starcoder2:15b", - // size: 9100 * MB, + // size: 9100 * format.MebiByte, // }, // } var chosenModels []model switch { - case max < 10000*MB: + case maxVram < 10000*format.MebiByte: slog.Info("selecting small models") chosenModels = smallModels - // case max < 30000*MB: + // case maxVram < 30000*format.MebiByte: default: slog.Info("selecting medium models") chosenModels = mediumModels @@ -226,15 +230,15 @@ func TestMultiModelStress(t *testing.T) { } var wg sync.WaitGroup - consumed := uint64(256 * MB) // Assume some baseline usage + consumed := uint64(256 * format.MebiByte) // Assume some baseline usage for i := 0; i < len(req); i++ { // Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long - if i > 1 && consumed > max { - slog.Info("achieved target vram exhaustion", "count", i, "vramMB", max/1024/1024, "modelsMB", consumed/1024/1024) + if i > 1 && consumed > vram { + slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed)) break } consumed += chosenModels[i].size - slog.Info("target vram", "count", i, "vramMB", max/1024/1024, "modelsMB", consumed/1024/1024) + slog.Info("target vram", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed)) wg.Add(1) go func(i int) { diff --git a/integration/max_queue_test.go b/integration/max_queue_test.go index dfa5eae0..b06197e1 100644 --- a/integration/max_queue_test.go +++ b/integration/max_queue_test.go @@ -5,7 +5,6 @@ package integration import ( "context" "errors" - "fmt" "log/slog" "os" "strconv" @@ -14,8 +13,10 @@ import ( "testing" "time" - "github.com/ollama/ollama/api" "github.com/stretchr/testify/require" + + "github.com/ollama/ollama/api" + "github.com/ollama/ollama/envconfig" ) func TestMaxQueue(t *testing.T) { @@ -27,13 +28,10 @@ func TestMaxQueue(t *testing.T) { // Note: This test can be quite slow when running in CPU mode, so keep the threadCount low unless your on GPU // Also note that by default Darwin can't sustain > ~128 connections without adjusting limits threadCount := 32 - mq := os.Getenv("OLLAMA_MAX_QUEUE") - if mq != "" { - var err error - threadCount, err = strconv.Atoi(mq) - require.NoError(t, err) + if maxQueue := envconfig.MaxQueue(); maxQueue != 0 { + threadCount = maxQueue } else { - os.Setenv("OLLAMA_MAX_QUEUE", fmt.Sprintf("%d", threadCount)) + t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount)) } req := api.GenerateRequest{ diff --git a/llm/ggla.go b/llm/ggla.go index 34c4f6ca..831f6071 100644 --- a/llm/ggla.go +++ b/llm/ggla.go @@ -36,6 +36,8 @@ type ggla struct { kv KV tensors []*Tensor + + tensorOffset uint64 } func newGGLA(container *containerGGLA) *ggla { @@ -50,7 +52,10 @@ func (llm *ggla) KV() KV { } func (llm *ggla) Tensors() Tensors { - return llm.tensors + return Tensors{ + Items: llm.tensors, + Offset: llm.tensorOffset, + } } func (llm *ggla) decode(rs io.ReadSeeker) (retErr error) { @@ -66,6 +71,13 @@ func (llm *ggla) decode(rs io.ReadSeeker) (retErr error) { } llm.kv["alpha"] = alpha + offset, err := rs.Seek(0, io.SeekCurrent) + if err != nil { + return err + } + + llm.tensorOffset = uint64(offset) + for { var dims uint32 if err := binary.Read(rs, binary.LittleEndian, &dims); err != nil { diff --git a/llm/ggml.go b/llm/ggml.go index fddb5039..d7f2eef7 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -112,11 +112,14 @@ func (kv KV) ChatTemplate() string { return s } -type Tensors []*Tensor +type Tensors struct { + Items []*Tensor + Offset uint64 +} func (ts Tensors) Layers() map[string]Layer { layers := make(map[string]Layer) - for _, t := range ts { + for _, t := range ts.Items { parts := strings.Split(t.Name, ".") if parts[0] == "blk" { // join first and second part, e.g. blk.%d diff --git a/llm/gguf.go b/llm/gguf.go index a8427aed..98158313 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -2,11 +2,16 @@ package llm import ( "bytes" + "cmp" "encoding/binary" "encoding/json" "fmt" "io" + "log/slog" + "slices" "strings" + + "golang.org/x/exp/maps" ) type containerGGUF struct { @@ -88,7 +93,8 @@ type gguf struct { kv KV tensors []*Tensor - parameters uint64 + parameters uint64 + tensorOffset uint64 scratch [16 << 10]byte } @@ -100,16 +106,15 @@ func newGGUF(container *containerGGUF) *gguf { } } -func NewGGUFV3(bo binary.ByteOrder) *gguf { - return newGGUF(&containerGGUF{ByteOrder: bo, Version: 3}) -} - func (llm *gguf) KV() KV { return llm.kv } func (llm *gguf) Tensors() Tensors { - return llm.tensors + return Tensors{ + Items: llm.tensors, + Offset: llm.tensorOffset, + } } func (llm *gguf) numTensor() uint64 { @@ -199,7 +204,7 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error { return fmt.Errorf("failed to read tensor dimensions: %w", err) } - shape := [4]uint64{1, 1, 1, 1} + shape := make([]uint64, dims) for i := 0; uint32(i) < dims; i++ { shape[i], err = readGGUF[uint64](llm, rs) if err != nil { @@ -236,13 +241,21 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error { alignment = 32 } + offset, err := rs.Seek(0, io.SeekCurrent) + if err != nil { + return err + } + + padding := ggufPadding(offset, int64(alignment)) + llm.tensorOffset = uint64(offset + padding) + for _, tensor := range llm.tensors { offset, err := rs.Seek(0, io.SeekCurrent) if err != nil { return fmt.Errorf("failed to get current offset: %w", err) } - padding := llm.padding(offset, int64(alignment)) + padding := ggufPadding(offset, int64(alignment)) if _, err := rs.Seek(padding, io.SeekCurrent); err != nil { return fmt.Errorf("failed to seek to init padding: %w", err) } @@ -261,12 +274,12 @@ func readGGUF[T any](llm *gguf, r io.Reader) (T, error) { return t, err } -func writeGGUF[V any](llm *gguf, w io.Writer, t uint32, v V) error { - if err := binary.Write(w, llm.ByteOrder, t); err != nil { +func writeGGUF[V any](w io.Writer, t uint32, v V) error { + if err := binary.Write(w, binary.LittleEndian, t); err != nil { return err } - return binary.Write(w, llm.ByteOrder, v) + return binary.Write(w, binary.LittleEndian, v) } func readGGUFV1String(llm *gguf, r io.Reader) (string, error) { @@ -330,12 +343,12 @@ func readGGUFString(llm *gguf, r io.Reader) (string, error) { return string(buf), nil } -func writeGGUFString(llm *gguf, w io.Writer, s string) error { - if err := binary.Write(w, llm.ByteOrder, ggufTypeString); err != nil { +func writeGGUFString(w io.Writer, s string) error { + if err := binary.Write(w, binary.LittleEndian, ggufTypeString); err != nil { return err } - if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil { + if err := binary.Write(w, binary.LittleEndian, uint64(len(s))); err != nil { return err } @@ -476,216 +489,72 @@ func readGGUFArray(llm *gguf, r io.Reader) (*array, error) { return a, nil } -func writeGGUFArray[S ~[]E, E any](llm *gguf, w io.Writer, t uint32, s S) error { - if err := binary.Write(w, llm.ByteOrder, ggufTypeArray); err != nil { +// writeGGUFArray writes a slice s of type E to the write with a gguf type of t +func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error { + if err := binary.Write(w, binary.LittleEndian, ggufTypeArray); err != nil { return err } - if err := binary.Write(w, llm.ByteOrder, t); err != nil { + if err := binary.Write(w, binary.LittleEndian, t); err != nil { return err } - if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil { + if err := binary.Write(w, binary.LittleEndian, uint64(len(s))); err != nil { return err } - for _, e := range s { - if err := binary.Write(w, llm.ByteOrder, e); err != nil { - return err - } - } - - return nil + return binary.Write(w, binary.LittleEndian, s) } -var ggufKVOrder = map[string][]string{ - "llama": { - "general.architecture", - "general.name", - "llama.vocab_size", - "llama.context_length", - "llama.embedding_length", - "llama.block_count", - "llama.feed_forward_length", - "llama.attention.head_count", - "llama.attention.head_count_kv", - "llama.attention.layer_norm_rms_epsilon", - "llama.rope.freq_base", - "llama.rope.dimension_count", - "llama.expert_count", - "llama.expert_used_count", - "gemma.context_length", - "gemma.embedding_length", - "gemma.block_count", - "gemma.feed_forward_length", - "gemma.attention.head_count", - "gemma.attention.head_count_kv", - "gemma.attention.layer_norm_rms_epsilon", - "gemma.attention.key_length", - "gemma.attention.value_length", - "general.file_type", - "tokenizer.ggml.pre", - "tokenizer.ggml.model", - "tokenizer.ggml.tokens", - "tokenizer.ggml.scores", - "tokenizer.ggml.merges", - "tokenizer.ggml.token_type", - "tokenizer.ggml.bos_token_id", - "tokenizer.ggml.eos_token_id", - "tokenizer.ggml.unknown_token_id", - "tokenizer.ggml.padding_token_id", - "tokenizer.ggml.add_bos_token", - "tokenizer.ggml.add_eos_token", - "tokenizer.chat_template", - "bert.pooling_type", - }, -} - -func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error { - switch llm.Version { - case 3: - llm.V3.NumTensor = uint64(len(tensors)) - llm.V3.NumKV = uint64(len(kv)) - default: - return fmt.Errorf("not implemented: ggufv%d", llm.Version) - } - - if err := binary.Write(ws, llm.ByteOrder, []byte("GGUF")); err != nil { +func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error { + if err := binary.Write(ws, binary.LittleEndian, []byte("GGUF")); err != nil { return err } - if err := binary.Write(ws, llm.ByteOrder, llm.Version); err != nil { + if err := binary.Write(ws, binary.LittleEndian, uint32(3)); err != nil { return err } - if err := binary.Write(ws, llm.ByteOrder, llm.numTensor()); err != nil { + if err := binary.Write(ws, binary.LittleEndian, uint64(len(ts))); err != nil { return err } - if err := binary.Write(ws, llm.ByteOrder, llm.numKV()); err != nil { + if err := binary.Write(ws, binary.LittleEndian, uint64(len(kv))); err != nil { return err } - kvCheck := make(map[string]bool) - for k := range kv { - kvCheck[k] = false - } + keys := maps.Keys(kv) + slices.Sort(keys) - for _, k := range ggufKVOrder["llama"] { - v, ok := kv[k] - if !ok { - continue - } - kvCheck[k] = true - - if err := binary.Write(ws, llm.ByteOrder, uint64(len(k))); err != nil { - return err - } - - if err := binary.Write(ws, llm.ByteOrder, []byte(k)); err != nil { - return err - } - - var err error - switch v := v.(type) { - case uint32: - err = writeGGUF(llm, ws, ggufTypeUint32, v) - case float32: - err = writeGGUF(llm, ws, ggufTypeFloat32, v) - case bool: - err = writeGGUF(llm, ws, ggufTypeBool, v) - case string: - err = writeGGUFString(llm, ws, v) - case []int32: - err = writeGGUFArray(llm, ws, ggufTypeInt32, v) - case []uint32: - err = writeGGUFArray(llm, ws, ggufTypeUint32, v) - case []float32: - err = writeGGUFArray(llm, ws, ggufTypeFloat32, v) - case []string: - if err := binary.Write(ws, llm.ByteOrder, ggufTypeArray); err != nil { - return err - } - - if err := binary.Write(ws, llm.ByteOrder, ggufTypeString); err != nil { - return err - } - - if err := binary.Write(ws, llm.ByteOrder, uint64(len(v))); err != nil { - return err - } - - for _, e := range v { - if err := binary.Write(ws, llm.ByteOrder, uint64(len(e))); err != nil { - return err - } - - if err := binary.Write(ws, llm.ByteOrder, []byte(e)); err != nil { - return err - } - } - default: - return fmt.Errorf("improper type for '%s'", k) - } - if err != nil { + for _, key := range keys { + if err := ggufWriteKV(ws, key, kv[key]); err != nil { return err } } - for k, v := range kvCheck { - if !v { - return fmt.Errorf("Didn't know how to write kv %s", k) + slices.SortFunc(ts, func(a, b Tensor) int { + var i, j int + if n, err := fmt.Sscanf(a.Name, "blk.%d", &i); err != nil || n != 1 { + return cmp.Compare(a.Name, b.Name) + } else if n, err := fmt.Sscanf(b.Name, "blk.%d", &j); err != nil || n != 1 { + return cmp.Compare(a.Name, b.Name) } - } - for _, tensor := range tensors { - if err := binary.Write(ws, llm.ByteOrder, uint64(len(tensor.Name))); err != nil { - return err - } - - if err := binary.Write(ws, llm.ByteOrder, []byte(tensor.Name)); err != nil { - return err - } - - var dims int - for cnt := range len(tensor.Shape) { - if tensor.Shape[cnt] > 0 { - dims++ - } - } - - if err := binary.Write(ws, llm.ByteOrder, uint32(dims)); err != nil { - return err - } - - for i := range dims { - if err := binary.Write(ws, llm.ByteOrder, tensor.Shape[dims-1-i]); err != nil { - return err - } - } - - if err := binary.Write(ws, llm.ByteOrder, tensor.Kind); err != nil { - return err - } - - if err := binary.Write(ws, llm.ByteOrder, tensor.Offset); err != nil { + return cmp.Compare(i, j) + }) + + var s uint64 + for _, t := range ts { + t.Offset = s + if err := ggufWriteTensorInfo(ws, t); err != nil { return err } + s += t.Size() } var alignment int64 = 32 - for _, tensor := range tensors { - offset, err := ws.Seek(0, io.SeekCurrent) - if err != nil { - return err - } - - padding := llm.padding(offset, alignment) - if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil { - return err - } - - if _, err := tensor.WriteTo(ws); err != nil { + for _, t := range ts { + if err := ggufWriteTensor(ws, t, alignment); err != nil { return err } } @@ -693,6 +562,102 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error { return nil } -func (gguf) padding(offset, align int64) int64 { +func ggufWriteKV(ws io.WriteSeeker, k string, v any) error { + slog.Debug(k, "type", fmt.Sprintf("%T", v)) + if err := binary.Write(ws, binary.LittleEndian, uint64(len(k))); err != nil { + return err + } + + if err := binary.Write(ws, binary.LittleEndian, []byte(k)); err != nil { + return err + } + + var err error + switch v := v.(type) { + case uint32: + err = writeGGUF(ws, ggufTypeUint32, v) + case float32: + err = writeGGUF(ws, ggufTypeFloat32, v) + case bool: + err = writeGGUF(ws, ggufTypeBool, v) + case string: + err = writeGGUFString(ws, v) + case []int32: + err = writeGGUFArray(ws, ggufTypeInt32, v) + case []uint32: + err = writeGGUFArray(ws, ggufTypeUint32, v) + case []float32: + err = writeGGUFArray(ws, ggufTypeFloat32, v) + case []string: + if err := binary.Write(ws, binary.LittleEndian, ggufTypeArray); err != nil { + return err + } + + if err := binary.Write(ws, binary.LittleEndian, ggufTypeString); err != nil { + return err + } + + if err := binary.Write(ws, binary.LittleEndian, uint64(len(v))); err != nil { + return err + } + + for _, e := range v { + if err := binary.Write(ws, binary.LittleEndian, uint64(len(e))); err != nil { + return err + } + + if err := binary.Write(ws, binary.LittleEndian, []byte(e)); err != nil { + return err + } + } + default: + return fmt.Errorf("improper type for '%s'", k) + } + + return err +} + +func ggufWriteTensorInfo(ws io.WriteSeeker, t Tensor) error { + slog.Debug(t.Name, "kind", t.Kind, "shape", t.Shape, "offset", t.Offset) + if err := binary.Write(ws, binary.LittleEndian, uint64(len(t.Name))); err != nil { + return err + } + + if err := binary.Write(ws, binary.LittleEndian, []byte(t.Name)); err != nil { + return err + } + + if err := binary.Write(ws, binary.LittleEndian, uint32(len(t.Shape))); err != nil { + return err + } + + for i := range len(t.Shape) { + if err := binary.Write(ws, binary.LittleEndian, t.Shape[len(t.Shape)-i-1]); err != nil { + return err + } + } + + if err := binary.Write(ws, binary.LittleEndian, t.Kind); err != nil { + return err + } + + return binary.Write(ws, binary.LittleEndian, t.Offset) +} + +func ggufWriteTensor(ws io.WriteSeeker, t Tensor, alignment int64) error { + offset, err := ws.Seek(0, io.SeekCurrent) + if err != nil { + return err + } + + if err := binary.Write(ws, binary.LittleEndian, bytes.Repeat([]byte{0}, int(ggufPadding(offset, alignment)))); err != nil { + return err + } + + _, err = t.WriteTo(ws) + return err +} + +func ggufPadding(offset, align int64) int64 { return (align - offset%align) % align } diff --git a/llm/memory_test.go b/llm/memory_test.go index f972f927..3220c8df 100644 --- a/llm/memory_test.go +++ b/llm/memory_test.go @@ -2,25 +2,23 @@ package llm import ( "bytes" - "encoding/binary" "fmt" "os" "testing" "github.com/ollama/ollama/api" - "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/gpu" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) func TestEstimateGPULayers(t *testing.T) { - envconfig.Debug = true + t.Setenv("OLLAMA_DEBUG", "1") + modelName := "dummy" f, err := os.CreateTemp(t.TempDir(), modelName) require.NoError(t, err) defer f.Close() - gguf := NewGGUFV3(binary.LittleEndian) inputLayerCount := 5 tensors := []Tensor{ @@ -32,7 +30,7 @@ func TestEstimateGPULayers(t *testing.T) { {Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, } assert.Len(t, tensors, inputLayerCount+1) - err = gguf.Encode(f, KV{ + err = WriteGGUF(f, KV{ "general.architecture": "llama", "general.name": "name", "llama.context_length": uint32(32), diff --git a/llm/patches/11-phi3-sliding-window.diff b/llm/patches/11-phi3-sliding-window.diff new file mode 100644 index 00000000..fde3dd21 --- /dev/null +++ b/llm/patches/11-phi3-sliding-window.diff @@ -0,0 +1,43 @@ +From 6eedae4cf2fcc8015dac79cb3f28f61fcabacab2 Mon Sep 17 00:00:00 2001 +From: Michael Yang +Date: Wed, 31 Jul 2024 14:57:04 -0700 +Subject: [PATCH] phi3 sliding window + +--- + src/llama.cpp | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/src/llama.cpp b/src/llama.cpp +index a207451f..f2872d4e 100644 +--- a/src/llama.cpp ++++ b/src/llama.cpp +@@ -4893,7 +4893,7 @@ static void llm_load_hparams( + } break; + case LLM_ARCH_PHI3: + { +- ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa); ++ ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa, false); + ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); + + switch (hparams.n_layer) { +@@ -10762,7 +10762,7 @@ struct llm_build_context { + struct ggml_tensor * inp_pos = build_inp_pos(); + + // KQ_mask (mask for 1 head, it will be broadcasted to all heads) +- struct ggml_tensor * KQ_mask_swa = build_inp_KQ_mask_swa(); ++ struct ggml_tensor * KQ_mask = hparams.n_swa > 0 ? build_inp_KQ_mask_swa() : build_inp_KQ_mask(); + + for (int il = 0; il < n_layer; ++il) { + auto residual = inpL; +@@ -10820,7 +10820,7 @@ struct llm_build_context { + + cur = llm_build_kv(ctx0, lctx, kv_self, gf, + model.layers[il].wo, model.layers[il].bo, +- Kcur, Vcur, Qcur, KQ_mask_swa, n_tokens, kv_head, n_kv, 1.0f, cb, il); ++ Kcur, Vcur, Qcur, KQ_mask, n_tokens, kv_head, n_kv, 1.0f, cb, il); + } + + if (il == n_layer - 1) { +-- +2.45.2 + diff --git a/llm/server.go b/llm/server.go index afde077e..7fadb0c9 100644 --- a/llm/server.go +++ b/llm/server.go @@ -163,7 +163,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr } else { servers = serversForGpu(gpus[0]) // All GPUs in the list are matching Library and Variant } - demandLib := envconfig.LLMLibrary + demandLib := envconfig.LLMLibrary() if demandLib != "" { serverPath := availableServers[demandLib] if serverPath == "" { @@ -195,7 +195,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", opts.NumGPU)) } - if envconfig.Debug { + if envconfig.Debug() { params = append(params, "--verbose") } @@ -221,7 +221,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr params = append(params, "--memory-f32") } - flashAttnEnabled := envconfig.FlashAttention + flashAttnEnabled := envconfig.FlashAttention() for _, g := range gpus { // only cuda (compute capability 7+) and metal support flash attention @@ -382,7 +382,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr } slog.Info("starting llama server", "cmd", s.cmd.String()) - if envconfig.Debug { + if envconfig.Debug() { filteredEnv := []string{} for _, ev := range s.cmd.Env { if strings.HasPrefix(ev, "CUDA_") || diff --git a/openai/openai.go b/openai/openai.go index 5bd80660..e66d9416 100644 --- a/openai/openai.go +++ b/openai/openai.go @@ -164,9 +164,15 @@ type ListCompletion struct { } type EmbeddingList struct { - Object string `json:"object"` - Data []Embedding `json:"data"` - Model string `json:"model"` + Object string `json:"object"` + Data []Embedding `json:"data"` + Model string `json:"model"` + Usage EmbeddingUsage `json:"usage,omitempty"` +} + +type EmbeddingUsage struct { + PromptTokens int `json:"prompt_tokens"` + TotalTokens int `json:"total_tokens"` } func NewError(code int, message string) ErrorResponse { @@ -332,6 +338,10 @@ func toEmbeddingList(model string, r api.EmbedResponse) EmbeddingList { Object: "list", Data: data, Model: model, + Usage: EmbeddingUsage{ + PromptTokens: r.PromptEvalCount, + TotalTokens: r.PromptEvalCount, + }, } } diff --git a/server/download.go b/server/download.go index 45483ba6..10074554 100644 --- a/server/download.go +++ b/server/download.go @@ -61,6 +61,36 @@ type blobDownloadPart struct { *blobDownload `json:"-"` } +type jsonBlobDownloadPart struct { + N int + Offset int64 + Size int64 + Completed int64 +} + +func (p *blobDownloadPart) MarshalJSON() ([]byte, error) { + return json.Marshal(jsonBlobDownloadPart{ + N: p.N, + Offset: p.Offset, + Size: p.Size, + Completed: p.Completed.Load(), + }) +} + +func (p *blobDownloadPart) UnmarshalJSON(b []byte) error { + var j jsonBlobDownloadPart + if err := json.Unmarshal(b, &j); err != nil { + return err + } + *p = blobDownloadPart{ + N: j.N, + Offset: j.Offset, + Size: j.Size, + } + p.Completed.Store(j.Completed) + return nil +} + const ( numDownloadParts = 64 minDownloadPartSize int64 = 100 * format.MegaByte diff --git a/server/images.go b/server/images.go index 836dbcc2..5f3eee88 100644 --- a/server/images.go +++ b/server/images.go @@ -70,7 +70,7 @@ type Model struct { License []string Digest string Options map[string]interface{} - Messages []Message + Messages []api.Message Template *template.Template } @@ -184,18 +184,13 @@ func (m *Model) String() string { for _, msg := range m.Messages { modelfile.Commands = append(modelfile.Commands, parser.Command{ Name: "message", - Args: fmt.Sprintf("%s %s", msg.Role, msg.Content), + Args: fmt.Sprintf("%s: %s", msg.Role, msg.Content), }) } return modelfile.String() } -type Message struct { - Role string `json:"role"` - Content string `json:"content"` -} - type ConfigV2 struct { ModelFormat string `json:"model_format"` ModelFamily string `json:"model_family"` @@ -646,7 +641,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio return err } - if !envconfig.NoPrune && old != nil { + if !envconfig.NoPrune() && old != nil { if err := old.RemoveLayers(); err != nil { return err } @@ -885,7 +880,7 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu // build deleteMap to prune unused layers deleteMap := make(map[string]struct{}) - if !envconfig.NoPrune { + if !envconfig.NoPrune() { manifest, _, err = GetManifest(mp) if err != nil && !errors.Is(err, os.ErrNotExist) { return err diff --git a/server/manifest_test.go b/server/manifest_test.go index ca6c3d2e..a4af5d5e 100644 --- a/server/manifest_test.go +++ b/server/manifest_test.go @@ -7,7 +7,6 @@ import ( "slices" "testing" - "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/types/model" ) @@ -108,7 +107,6 @@ func TestManifests(t *testing.T) { t.Run(n, func(t *testing.T) { d := t.TempDir() t.Setenv("OLLAMA_MODELS", d) - envconfig.LoadConfig() for _, p := range wants.ps { createManifest(t, d, p) diff --git a/server/model.go b/server/model.go index c6d3078f..f2946a0b 100644 --- a/server/model.go +++ b/server/model.go @@ -81,112 +81,43 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe return layers, nil } -func extractFromZipFile(p string, file *os.File, fn func(api.ProgressResponse)) error { - stat, err := file.Stat() - if err != nil { - return err - } - - r, err := zip.NewReader(file, stat.Size()) - if err != nil { - return err - } - - fn(api.ProgressResponse{Status: "unpacking model metadata"}) - for _, f := range r.File { - if !filepath.IsLocal(f.Name) { - return fmt.Errorf("%w: %s", zip.ErrInsecurePath, f.Name) - } - - n := filepath.Join(p, f.Name) - if err := os.MkdirAll(filepath.Dir(n), 0o750); err != nil { - return err - } - - // TODO(mxyng): this should not write out all files to disk - outfile, err := os.Create(n) - if err != nil { - return err - } - defer outfile.Close() - - infile, err := f.Open() - if err != nil { - return err - } - defer infile.Close() - - if _, err = io.Copy(outfile, infile); err != nil { - return err - } - - if err := outfile.Close(); err != nil { - return err - } - - if err := infile.Close(); err != nil { - return err - } - } - - return nil -} - -func parseFromZipFile(_ context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) { - tempDir, err := os.MkdirTemp(filepath.Dir(file.Name()), "") - if err != nil { - return nil, err - } - defer os.RemoveAll(tempDir) - - if err := extractFromZipFile(tempDir, file, fn); err != nil { - return nil, err - } - - mf, err := convert.GetModelFormat(tempDir) +func parseFromZipFile(_ context.Context, f *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) { + fi, err := f.Stat() if err != nil { return nil, err } - params, err := mf.GetParams(tempDir) + r, err := zip.NewReader(f, fi.Size()) if err != nil { return nil, err } - mArch, err := mf.GetModelArch("", tempDir, params) + p, err := os.MkdirTemp(filepath.Dir(f.Name()), "") if err != nil { return nil, err } - - fn(api.ProgressResponse{Status: "processing tensors"}) - if err := mArch.GetTensors(); err != nil { - return nil, err - } - - if err := mArch.LoadVocab(); err != nil { - return nil, err - } + defer os.RemoveAll(p) fn(api.ProgressResponse{Status: "converting model"}) - // TODO(mxyng): this should write directly into a layer // e.g. NewLayer(arch.Reader(), "application/vnd.ollama.image.model") - temp, err := os.CreateTemp(tempDir, "fp16") + t, err := os.CreateTemp(p, "fp16") if err != nil { return nil, err } - defer temp.Close() - defer os.Remove(temp.Name()) + defer t.Close() + defer os.Remove(t.Name()) - if err = mArch.WriteGGUF(temp); err != nil { + fn(api.ProgressResponse{Status: "converting model"}) + if err := convert.Convert(convert.NewZipReader(r, p, 32<<20), t); err != nil { return nil, err } - if _, err := temp.Seek(0, io.SeekStart); err != nil { + if _, err := t.Seek(0, io.SeekStart); err != nil { return nil, err } - layer, err := NewLayer(temp, "application/vnd.ollama.image.model") + layer, err := NewLayer(t, "application/vnd.ollama.image.model") if err != nil { return nil, err } diff --git a/server/model_test.go b/server/model_test.go index 5829adfc..0a2225d5 100644 --- a/server/model_test.go +++ b/server/model_test.go @@ -1,16 +1,11 @@ package server import ( - "archive/zip" "bytes" "encoding/json" - "errors" "fmt" - "io" "os" "path/filepath" - "slices" - "strings" "testing" "github.com/google/go-cmp/cmp" @@ -18,103 +13,6 @@ import ( "github.com/ollama/ollama/template" ) -func createZipFile(t *testing.T, name string) *os.File { - t.Helper() - - f, err := os.CreateTemp(t.TempDir(), "") - if err != nil { - t.Fatal(err) - } - - zf := zip.NewWriter(f) - defer zf.Close() - - zh, err := zf.CreateHeader(&zip.FileHeader{Name: name}) - if err != nil { - t.Fatal(err) - } - - if _, err := io.Copy(zh, bytes.NewReader([]byte(""))); err != nil { - t.Fatal(err) - } - - return f -} - -func TestExtractFromZipFile(t *testing.T) { - cases := []struct { - name string - expect []string - err error - }{ - { - name: "good", - expect: []string{"good"}, - }, - { - name: strings.Join([]string{"path", "..", "to", "good"}, string(os.PathSeparator)), - expect: []string{filepath.Join("to", "good")}, - }, - { - name: strings.Join([]string{"path", "..", "to", "..", "good"}, string(os.PathSeparator)), - expect: []string{"good"}, - }, - { - name: strings.Join([]string{"path", "to", "..", "..", "good"}, string(os.PathSeparator)), - expect: []string{"good"}, - }, - { - name: strings.Join([]string{"..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "bad"}, string(os.PathSeparator)), - err: zip.ErrInsecurePath, - }, - { - name: strings.Join([]string{"path", "..", "..", "to", "bad"}, string(os.PathSeparator)), - err: zip.ErrInsecurePath, - }, - } - - for _, tt := range cases { - t.Run(tt.name, func(t *testing.T) { - f := createZipFile(t, tt.name) - defer f.Close() - - tempDir := t.TempDir() - if err := extractFromZipFile(tempDir, f, func(api.ProgressResponse) {}); !errors.Is(err, tt.err) { - t.Fatal(err) - } - - var matches []string - if err := filepath.Walk(tempDir, func(p string, fi os.FileInfo, err error) error { - if err != nil { - return err - } - - if !fi.IsDir() { - matches = append(matches, p) - } - - return nil - }); err != nil { - t.Fatal(err) - } - - var actual []string - for _, match := range matches { - rel, err := filepath.Rel(tempDir, match) - if err != nil { - t.Error(err) - } - - actual = append(actual, rel) - } - - if !slices.Equal(actual, tt.expect) { - t.Fatalf("expected %d files, got %d", len(tt.expect), len(matches)) - } - }) - } -} - func readFile(t *testing.T, base, name string) *bytes.Buffer { t.Helper() diff --git a/server/modelpath.go b/server/modelpath.go index 3fdb4238..354eeed7 100644 --- a/server/modelpath.go +++ b/server/modelpath.go @@ -105,9 +105,7 @@ func (mp ModelPath) GetShortTagname() string { // GetManifestPath returns the path to the manifest file for the given model path, it is up to the caller to create the directory if it does not exist. func (mp ModelPath) GetManifestPath() (string, error) { - dir := envconfig.ModelsDir - - return filepath.Join(dir, "manifests", mp.Registry, mp.Namespace, mp.Repository, mp.Tag), nil + return filepath.Join(envconfig.Models(), "manifests", mp.Registry, mp.Namespace, mp.Repository, mp.Tag), nil } func (mp ModelPath) BaseURL() *url.URL { @@ -118,9 +116,7 @@ func (mp ModelPath) BaseURL() *url.URL { } func GetManifestPath() (string, error) { - dir := envconfig.ModelsDir - - path := filepath.Join(dir, "manifests") + path := filepath.Join(envconfig.Models(), "manifests") if err := os.MkdirAll(path, 0o755); err != nil { return "", err } @@ -129,8 +125,6 @@ func GetManifestPath() (string, error) { } func GetBlobsPath(digest string) (string, error) { - dir := envconfig.ModelsDir - // only accept actual sha256 digests pattern := "^sha256[:-][0-9a-fA-F]{64}$" re := regexp.MustCompile(pattern) @@ -140,7 +134,7 @@ func GetBlobsPath(digest string) (string, error) { } digest = strings.ReplaceAll(digest, ":", "-") - path := filepath.Join(dir, "blobs", digest) + path := filepath.Join(envconfig.Models(), "blobs", digest) dirPath := filepath.Dir(path) if digest == "" { dirPath = path diff --git a/server/modelpath_test.go b/server/modelpath_test.go index 6c4dfbee..849e0fa7 100644 --- a/server/modelpath_test.go +++ b/server/modelpath_test.go @@ -7,8 +7,6 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - - "github.com/ollama/ollama/envconfig" ) func TestGetBlobsPath(t *testing.T) { @@ -63,7 +61,6 @@ func TestGetBlobsPath(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { t.Setenv("OLLAMA_MODELS", dir) - envconfig.LoadConfig() got, err := GetBlobsPath(tc.digest) diff --git a/server/routes.go b/server/routes.go index a560f369..a745fb20 100644 --- a/server/routes.go +++ b/server/routes.go @@ -164,17 +164,6 @@ func (s *Server) GenerateHandler(c *gin.Context) { } } - var b bytes.Buffer - if req.Context != nil { - s, err := r.Detokenize(c.Request.Context(), req.Context) - if err != nil { - c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) - return - } - - b.WriteString(s) - } - var values template.Values if req.Suffix != "" { values.Prompt = prompt @@ -187,6 +176,10 @@ func (s *Server) GenerateHandler(c *gin.Context) { msgs = append(msgs, api.Message{Role: "system", Content: m.System}) } + if req.Context == nil { + msgs = append(msgs, m.Messages...) + } + for _, i := range images { msgs = append(msgs, api.Message{Role: "user", Content: fmt.Sprintf("[img-%d]", i.ID)}) } @@ -194,6 +187,16 @@ func (s *Server) GenerateHandler(c *gin.Context) { values.Messages = append(msgs, api.Message{Role: "user", Content: req.Prompt}) } + var b bytes.Buffer + if req.Context != nil { + s, err := r.Detokenize(c.Request.Context(), req.Context) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + b.WriteString(s) + } + if err := tmpl.Execute(&b, values); err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) return @@ -243,7 +246,7 @@ func (s *Server) GenerateHandler(c *gin.Context) { ch <- gin.H{"error": err.Error()} return } - res.Context = append(req.Context, tokens...) + res.Context = tokens } } @@ -1053,7 +1056,7 @@ func (s *Server) GenerateRoutes() http.Handler { for _, prop := range openAIProperties { config.AllowHeaders = append(config.AllowHeaders, "x-stainless-"+prop) } - config.AllowOrigins = envconfig.AllowOrigins + config.AllowOrigins = envconfig.Origins() r := gin.Default() r.Use( @@ -1098,7 +1101,7 @@ func (s *Server) GenerateRoutes() http.Handler { func Serve(ln net.Listener) error { level := slog.LevelInfo - if envconfig.Debug { + if envconfig.Debug() { level = slog.LevelDebug } @@ -1126,7 +1129,7 @@ func Serve(ln net.Listener) error { return err } - if !envconfig.NoPrune { + if !envconfig.NoPrune() { // clean up unused layers and manifests if err := PruneLayers(); err != nil { return err @@ -1329,11 +1332,12 @@ func (s *Server) ChatHandler(c *gin.Context) { return } + msgs := append(m.Messages, req.Messages...) if req.Messages[0].Role != "system" && m.System != "" { - req.Messages = append([]api.Message{{Role: "system", Content: m.System}}, req.Messages...) + msgs = append([]api.Message{{Role: "system", Content: m.System}}, msgs...) } - prompt, images, err := chatPrompt(c.Request.Context(), m, r.Tokenize, opts, req.Messages, req.Tools) + prompt, images, err := chatPrompt(c.Request.Context(), m, r.Tokenize, opts, msgs, req.Tools) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) return diff --git a/server/routes_create_test.go b/server/routes_create_test.go index e801a74f..9b7009df 100644 --- a/server/routes_create_test.go +++ b/server/routes_create_test.go @@ -2,7 +2,6 @@ package server import ( "bytes" - "encoding/binary" "encoding/json" "fmt" "io" @@ -15,7 +14,6 @@ import ( "github.com/gin-gonic/gin" "github.com/ollama/ollama/api" - "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/llm" ) @@ -30,7 +28,7 @@ func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) string { } defer f.Close() - if err := llm.NewGGUFV3(binary.LittleEndian).Encode(f, kv, ti); err != nil { + if err := llm.WriteGGUF(f, kv, ti); err != nil { t.Fatal(err) } @@ -89,7 +87,6 @@ func TestCreateFromBin(t *testing.T) { p := t.TempDir() t.Setenv("OLLAMA_MODELS", p) - envconfig.LoadConfig() var s Server w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ @@ -117,7 +114,6 @@ func TestCreateFromModel(t *testing.T) { p := t.TempDir() t.Setenv("OLLAMA_MODELS", p) - envconfig.LoadConfig() var s Server w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ @@ -160,7 +156,6 @@ func TestCreateRemovesLayers(t *testing.T) { p := t.TempDir() t.Setenv("OLLAMA_MODELS", p) - envconfig.LoadConfig() var s Server w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ @@ -209,7 +204,6 @@ func TestCreateUnsetsSystem(t *testing.T) { p := t.TempDir() t.Setenv("OLLAMA_MODELS", p) - envconfig.LoadConfig() var s Server w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ @@ -267,7 +261,6 @@ func TestCreateMergeParameters(t *testing.T) { p := t.TempDir() t.Setenv("OLLAMA_MODELS", p) - envconfig.LoadConfig() var s Server w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ @@ -372,7 +365,6 @@ func TestCreateReplacesMessages(t *testing.T) { p := t.TempDir() t.Setenv("OLLAMA_MODELS", p) - envconfig.LoadConfig() var s Server w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ @@ -450,7 +442,6 @@ func TestCreateTemplateSystem(t *testing.T) { p := t.TempDir() t.Setenv("OLLAMA_MODELS", p) - envconfig.LoadConfig() var s Server w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ @@ -534,7 +525,6 @@ func TestCreateLicenses(t *testing.T) { p := t.TempDir() t.Setenv("OLLAMA_MODELS", p) - envconfig.LoadConfig() var s Server w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ @@ -582,7 +572,6 @@ func TestCreateDetectTemplate(t *testing.T) { p := t.TempDir() t.Setenv("OLLAMA_MODELS", p) - envconfig.LoadConfig() var s Server t.Run("matched", func(t *testing.T) { diff --git a/server/routes_delete_test.go b/server/routes_delete_test.go index 33a97a73..2354d730 100644 --- a/server/routes_delete_test.go +++ b/server/routes_delete_test.go @@ -10,7 +10,6 @@ import ( "github.com/gin-gonic/gin" "github.com/ollama/ollama/api" - "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/types/model" ) @@ -19,7 +18,6 @@ func TestDelete(t *testing.T) { p := t.TempDir() t.Setenv("OLLAMA_MODELS", p) - envconfig.LoadConfig() var s Server diff --git a/server/routes_list_test.go b/server/routes_list_test.go index c2d9c113..29e3214c 100644 --- a/server/routes_list_test.go +++ b/server/routes_list_test.go @@ -9,14 +9,12 @@ import ( "github.com/gin-gonic/gin" "github.com/ollama/ollama/api" - "github.com/ollama/ollama/envconfig" ) func TestList(t *testing.T) { gin.SetMode(gin.TestMode) t.Setenv("OLLAMA_MODELS", t.TempDir()) - envconfig.LoadConfig() expectNames := []string{ "mistral:7b-instruct-q4_0", diff --git a/server/routes_test.go b/server/routes_test.go index 97786ba2..17da2305 100644 --- a/server/routes_test.go +++ b/server/routes_test.go @@ -19,7 +19,6 @@ import ( "github.com/stretchr/testify/require" "github.com/ollama/ollama/api" - "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/llm" "github.com/ollama/ollama/openai" "github.com/ollama/ollama/parser" @@ -347,7 +346,6 @@ func Test_Routes(t *testing.T) { } t.Setenv("OLLAMA_MODELS", t.TempDir()) - envconfig.LoadConfig() s := &Server{} router := s.GenerateRoutes() @@ -378,7 +376,6 @@ func Test_Routes(t *testing.T) { func TestCase(t *testing.T) { t.Setenv("OLLAMA_MODELS", t.TempDir()) - envconfig.LoadConfig() cases := []string{ "mistral", @@ -458,7 +455,6 @@ func TestCase(t *testing.T) { func TestShow(t *testing.T) { t.Setenv("OLLAMA_MODELS", t.TempDir()) - envconfig.LoadConfig() var s Server diff --git a/server/sched.go b/server/sched.go index 92b8d508..700642c6 100644 --- a/server/sched.go +++ b/server/sched.go @@ -5,9 +5,11 @@ import ( "errors" "fmt" "log/slog" + "os" "reflect" "runtime" "sort" + "strconv" "strings" "sync" "time" @@ -59,11 +61,12 @@ var defaultParallel = 4 var ErrMaxQueue = fmt.Errorf("server busy, please try again. maximum pending requests exceeded") func InitScheduler(ctx context.Context) *Scheduler { + maxQueue := envconfig.MaxQueue() sched := &Scheduler{ - pendingReqCh: make(chan *LlmRequest, envconfig.MaxQueuedRequests), - finishedReqCh: make(chan *LlmRequest, envconfig.MaxQueuedRequests), - expiredCh: make(chan *runnerRef, envconfig.MaxQueuedRequests), - unloadedCh: make(chan interface{}, envconfig.MaxQueuedRequests), + pendingReqCh: make(chan *LlmRequest, maxQueue), + finishedReqCh: make(chan *LlmRequest, maxQueue), + expiredCh: make(chan *runnerRef, maxQueue), + unloadedCh: make(chan interface{}, maxQueue), loaded: make(map[string]*runnerRef), newServerFn: llm.NewLlamaServer, getGpuFn: gpu.GetGPUInfo, @@ -126,7 +129,7 @@ func (s *Scheduler) processPending(ctx context.Context) { slog.Debug("pending request cancelled or timed out, skipping scheduling") continue } - numParallel := envconfig.NumParallel + numParallel := int(envconfig.NumParallel()) // TODO (jmorganca): multimodal models don't support parallel yet // see https://github.com/ollama/ollama/issues/4165 if len(pending.model.ProjectorPaths) > 0 && numParallel != 1 { @@ -148,7 +151,7 @@ func (s *Scheduler) processPending(ctx context.Context) { pending.useLoadedRunner(runner, s.finishedReqCh) break } - } else if envconfig.MaxRunners > 0 && loadedCount >= envconfig.MaxRunners { + } else if envconfig.MaxRunners() > 0 && loadedCount >= int(envconfig.MaxRunners()) { slog.Debug("max runners achieved, unloading one to make room", "runner_count", loadedCount) runnerToExpire = s.findRunnerToUnload() } else { @@ -161,7 +164,7 @@ func (s *Scheduler) processPending(ctx context.Context) { gpus = s.getGpuFn() } - if envconfig.MaxRunners <= 0 { + if envconfig.MaxRunners() <= 0 { // No user specified MaxRunners, so figure out what automatic setting to use // If all GPUs have reliable free memory reporting, defaultModelsPerGPU * the number of GPUs // if any GPU has unreliable free memory reporting, 1x the number of GPUs @@ -173,11 +176,13 @@ func (s *Scheduler) processPending(ctx context.Context) { } } if allReliable { - envconfig.MaxRunners = defaultModelsPerGPU * len(gpus) + // HACK + os.Setenv("OLLAMA_MAX_LOADED_MODELS", strconv.Itoa(defaultModelsPerGPU*len(gpus))) slog.Debug("updating default concurrency", "OLLAMA_MAX_LOADED_MODELS", envconfig.MaxRunners, "gpu_count", len(gpus)) } else { + // HACK + os.Setenv("OLLAMA_MAX_LOADED_MODELS", strconv.Itoa(len(gpus))) slog.Info("one or more GPUs detected that are unable to accurately report free memory - disabling default concurrency") - envconfig.MaxRunners = len(gpus) } } @@ -404,7 +409,7 @@ func (s *Scheduler) load(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList, if numParallel < 1 { numParallel = 1 } - sessionDuration := envconfig.KeepAlive + sessionDuration := envconfig.KeepAlive() if req.sessionDuration != nil { sessionDuration = req.sessionDuration.Duration } @@ -699,7 +704,7 @@ func pickBestFullFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoL // First attempt to fit the model into a single GPU for _, p := range numParallelToTry { req.opts.NumCtx = req.origNumCtx * p - if !envconfig.SchedSpread { + if !envconfig.SchedSpread() { for _, g := range sgl { if ok, estimatedVRAM = llm.PredictServerFit([]gpu.GpuInfo{g}, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok { slog.Info("new model will fit in available VRAM in single GPU, loading", "model", req.model.ModelPath, "gpu", g.ID, "parallel", p, "available", g.FreeMemory, "required", format.HumanBytes2(estimatedVRAM)) diff --git a/server/sched_test.go b/server/sched_test.go index 4f8789fa..80395714 100644 --- a/server/sched_test.go +++ b/server/sched_test.go @@ -3,7 +3,6 @@ package server import ( "bytes" "context" - "encoding/binary" "fmt" "log/slog" "os" @@ -12,7 +11,6 @@ import ( "github.com/ollama/ollama/api" "github.com/ollama/ollama/app/lifecycle" - "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/format" "github.com/ollama/ollama/gpu" "github.com/ollama/ollama/llm" @@ -115,8 +113,7 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est require.NoError(t, err) defer f.Close() - gguf := llm.NewGGUFV3(binary.LittleEndian) - err = gguf.Encode(f, llm.KV{ + require.NoError(t, llm.WriteGGUF(f, llm.KV{ "general.architecture": "llama", "general.name": "name", "llama.context_length": uint32(32), @@ -130,7 +127,7 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est }, []llm.Tensor{ {Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, {Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, - }) + })) require.NoError(t, err) fname := f.Name() @@ -272,7 +269,7 @@ func TestRequestsMultipleLoadedModels(t *testing.T) { c.req.opts.NumGPU = 0 // CPU load, will be allowed d := newScenarioRequest(t, ctx, "ollama-model-3c", 30, nil) // Needs prior unloaded - envconfig.MaxRunners = 1 + t.Setenv("OLLAMA_MAX_LOADED_MODELS", "1") s.newServerFn = a.newServer slog.Info("a") s.pendingReqCh <- a.req @@ -291,7 +288,7 @@ func TestRequestsMultipleLoadedModels(t *testing.T) { require.Len(t, s.loaded, 1) s.loadedMu.Unlock() - envconfig.MaxRunners = 0 + t.Setenv("OLLAMA_MAX_LOADED_MODELS", "0") s.newServerFn = b.newServer slog.Info("b") s.pendingReqCh <- b.req @@ -362,7 +359,7 @@ func TestGetRunner(t *testing.T) { a := newScenarioRequest(t, ctx, "ollama-model-1a", 10, &api.Duration{Duration: 2 * time.Millisecond}) b := newScenarioRequest(t, ctx, "ollama-model-1b", 10, &api.Duration{Duration: 2 * time.Millisecond}) c := newScenarioRequest(t, ctx, "ollama-model-1c", 10, &api.Duration{Duration: 2 * time.Millisecond}) - envconfig.MaxQueuedRequests = 1 + t.Setenv("OLLAMA_MAX_QUEUE", "1") s := InitScheduler(ctx) s.getGpuFn = getGpuFn s.getCpuFn = getCpuFn