From 6b252918fb5e17f9be5975efe1681a92153b8379 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Mon, 3 Jun 2024 09:49:13 -0700 Subject: [PATCH 1/6] update convert test to check result data --- convert/convert_test.go | 111 +++++-- .../testdata/Meta-Llama-3-8B-Instruct.json | 313 ++++++++++++++++++ .../testdata/Mistral-7B-Instruct-v0.2.json | 313 ++++++++++++++++++ .../testdata/Mixtral-8x7B-Instruct-v0.1.json | 1 + convert/testdata/gemma-2b-it.json | 188 +++++++++++ llm/ggla.go | 14 +- llm/ggml.go | 7 +- llm/gguf.go | 14 +- 8 files changed, 924 insertions(+), 37 deletions(-) create mode 100644 convert/testdata/Meta-Llama-3-8B-Instruct.json create mode 100644 convert/testdata/Mistral-7B-Instruct-v0.2.json create mode 100644 convert/testdata/Mixtral-8x7B-Instruct-v0.1.json create mode 100644 convert/testdata/gemma-2b-it.json diff --git a/convert/convert_test.go b/convert/convert_test.go index 6aa33a49..a3727bed 100644 --- a/convert/convert_test.go +++ b/convert/convert_test.go @@ -1,29 +1,36 @@ -//go:build slow - package convert import ( + "crypto/sha256" + "encoding/json" + "flag" + "fmt" + "io" + "log/slog" + "math" "os" "path/filepath" + "slices" "testing" "github.com/ollama/ollama/llm" + "golang.org/x/exp/maps" ) -func convertFull(t *testing.T, p string) (llm.KV, llm.Tensors) { +func convertFull(t *testing.T, d string) (*os.File, llm.KV, llm.Tensors) { t.Helper() - mf, err := GetModelFormat(p) + mf, err := GetModelFormat(d) if err != nil { t.Fatal(err) } - params, err := mf.GetParams(p) + params, err := mf.GetParams(d) if err != nil { t.Fatal(err) } - arch, err := mf.GetModelArch("", p, params) + arch, err := mf.GetModelArch("", d, params) if err != nil { t.Fatal(err) } @@ -50,53 +57,91 @@ func convertFull(t *testing.T, p string) (llm.KV, llm.Tensors) { if err != nil { t.Fatal(err) } - defer r.Close() + t.Cleanup(func() { r.Close() }) - m, _, err := llm.DecodeGGML(r) + m, _, err := llm.DecodeGGML(r, math.MaxInt) if err != nil { t.Fatal(err) } - return m.KV(), m.Tensors() + if _, err := r.Seek(0, io.SeekStart); err != nil { + t.Fatal(err) + } + + return r, m.KV(), m.Tensors() +} + +func TestMain(m *testing.M) { + var level slog.Level + flag.TextVar(&level, "level", slog.LevelInfo, "log level") + flag.Parse() + slog.SetLogLoggerLevel(level) + os.Exit(m.Run()) } func TestConvertFull(t *testing.T) { - cases := []struct { - path string - arch string - tensors int - layers int - }{ - {"Meta-Llama-3-8B-Instruct", "llama", 291, 35}, - {"Mistral-7B-Instruct-v0.2", "llama", 291, 35}, - {"Mixtral-8x7B-Instruct-v0.1", "llama", 291, 35}, - {"gemma-2b-it", "gemma", 164, 20}, + cases := []string{ + "Meta-Llama-3-8B-Instruct", + "Mistral-7B-Instruct-v0.2", + "Mixtral-8x7B-Instruct-v0.1", + "gemma-2b-it", } - for _, tt := range cases { - t.Run(tt.path, func(t *testing.T) { - p := filepath.Join("testdata", tt.path) - if _, err := os.Stat(p); err != nil { + for i := range cases { + tt := cases[i] + t.Run(tt, func(t *testing.T) { + t.Parallel() + + p := filepath.Join("testdata", tt) + if testing.Short() { + t.Skip("skipping in short mode") + } else if _, err := os.Stat(p); err != nil { t.Skipf("%s not found", p) } - kv, tensors := convertFull(t, p) + f, kv, tensors := convertFull(t, p) + actual := make(map[string]string) + for k, v := range kv { + if s, ok := v.(json.Marshaler); !ok { + actual[k] = fmt.Sprintf("%v", v) + } else { + bts, err := json.Marshal(s) + if err != nil { + t.Fatal(err) + } - if kv.Architecture() != tt.arch { - t.Fatalf("expected llama, got %s", kv.Architecture()) + actual[k] = fmt.Sprintf("%x", sha256.Sum256(bts)) + } } - if kv.FileType().String() != "F16" { - t.Fatalf("expected F16, got %s", kv.FileType()) + for _, tensor := range tensors.Items { + sha256sum := sha256.New() + sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size())) + if _, err := io.Copy(sha256sum, sr); err != nil { + t.Fatal(err) + } + + actual[tensor.Name] = fmt.Sprintf("%x", sha256sum.Sum(nil)) } - if len(tensors) != tt.tensors { - t.Fatalf("expected %d tensors, got %d", tt.tensors, len(tensors)) + expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt))) + if err != nil { + t.Fatal(err) } - layers := tensors.Layers() - if len(layers) != tt.layers { - t.Fatalf("expected %d layers, got %d", tt.layers, len(layers)) + var expect map[string]string + if err := json.NewDecoder(expectFile).Decode(&expect); err != nil { + t.Fatal(err) + } + + keys := maps.Keys(expect) + slices.Sort(keys) + for _, k := range keys { + if v, ok := actual[k]; !ok { + t.Errorf("missing %s", k) + } else if v != expect[k] { + t.Errorf("unexpected %s: want %s, got %s", k, expect[k], v) + } } }) } diff --git a/convert/testdata/Meta-Llama-3-8B-Instruct.json b/convert/testdata/Meta-Llama-3-8B-Instruct.json new file mode 100644 index 00000000..808826bb --- /dev/null +++ b/convert/testdata/Meta-Llama-3-8B-Instruct.json @@ -0,0 +1,313 @@ +{ + "general.architecture": "llama", + "general.file_type": "1", + "general.quantization_version": "2", + "llama.block_count": "32", + "llama.context_length": "8192", + "llama.embedding_length": "4096", + "llama.feed_forward_length": "14336", + "llama.rope.dimension_count": "128", + "llama.rope.freq_base": "500000", + "llama.vocab_size": "128256", + "llama.attention.head_count": "32", + "llama.attention.head_count_kv": "8", + "llama.attention.layer_norm_rms_epsilon": "1e-05", + "tokenizer.ggml.model": "gpt2", + "tokenizer.ggml.pre": "llama-bpe", + "tokenizer.ggml.bos_token_id": "128000", + "tokenizer.ggml.eos_token_id": "128009", + "tokenizer.ggml.merges": "d0cbac1fcc9dcf03724b8db5c9bfb593ae1cf68fb9bc72eb1d15274dcbbf618b", + "tokenizer.ggml.token_type": "d70a88809fd7da6f1f028622685cd64268a7a922c5d343c96f25b66327358978", + "tokenizer.ggml.tokens": "765b529dbcbc42dd202ce657341c63807b51f3b07e09898f6aa6196326865d5a", + "token_embd.weight": "b53102a11d9064bbd404833e3464b1b13e08ce73300b442312cccde2f19b2698", + "blk.0.attn_norm.weight": "7318df3cca9e8d153ff0a503026a1265e63d20b2a8c1dd7a2769585082b5d1ee", + "blk.0.ffn_down.weight": "b950806a1fc722c9fad7fd0b20c3c0a7fb50f14395e1e7663a590bfd62e20900", + "blk.0.ffn_gate.weight": "e73e580af6d4f08e060a74a3c25efdf5d3bed99e183d95a5a85ae859014839fd", + "blk.0.ffn_up.weight": "c8158af679ef99746da1befb67eebb19489e0bbe6ce7d97e13e348508244e516", + "blk.0.ffn_norm.weight": "7ec69c3c31e95e49a3359003b0033f6b9e85561a3e3fd83e7476661ecdd756bb", + "blk.0.attn_k.weight": "2732303257bac969b4964e0e32ec08b5a7f5c031bb02bf6ac4467b3ea0ebcf1e", + "blk.0.attn_output.weight": "ecda1d43b4ccc91cd5b366d7e7a275353990ac78561a07c83d9c77031aba12dc", + "blk.0.attn_q.weight": "569b1f5faf92b6f00910cf7effb2d5862f91038ce5c3b0019fc10e5d79fbd5e1", + "blk.0.attn_v.weight": "aa8416c5ef7e32fb54a1f20d6ac651656845d4af240564b397c39bd83e06e3b8", + "blk.1.attn_norm.weight": "03327e02862908c2a44b2f52decdb924bf4201f400b46f8037a9cb2e1d7a61ff", + "blk.1.ffn_down.weight": "5a83a87603f38c99f8e1e370a2d5f967bb45ac51d881a609304a7811027321e0", + "blk.1.ffn_gate.weight": "31da0572c79e655186c721c231376f85e56cdcc6257c28d08c8c5b40d5c22b40", + "blk.1.ffn_up.weight": "e0c811d64ca155c8de10a868e72015d43888834804614ee1aa2953129ffbc90f", + "blk.1.ffn_norm.weight": "5861f313d6137d6f0f904d423df47fffc6069e224ff746e1b637ac9c7f0af862", + "blk.1.attn_k.weight": "5fbbec0acca6457b9416ebdcd90e526885d0224537b7628f6be376a7f275313d", + "blk.1.attn_output.weight": "b237c9763fa3f75166a6f70b70f1566e77d0d89dfa164ed1b3137393e90575c3", + "blk.1.attn_q.weight": "c0a9cf4a98b4882b16f3eb2b49d933793dcc5357abb246fd3fe3134ed2b12e1c", + "blk.1.attn_v.weight": "96867111727200cac1af7865189dd41fd62b47584e5e5f33a91f1d34509cbd40", + "blk.2.attn_norm.weight": "f392f8a88ee3a95b1cc19c40dd4ef66317037b0faaa1800f610779e129ee0539", + "blk.2.ffn_down.weight": "73823eef46632aedcc8c1cb08a736b6aa97ca97842cd1fdfc5567d8dec459662", + "blk.2.ffn_gate.weight": "f4909ae19fc3848b00bb8b9050122e74f8e903b89e22937036f4cc9fea20a718", + "blk.2.ffn_up.weight": "16f4904a3d814ea68f00519724fc4943e48444a84c786bda39aa5efc298a7d84", + "blk.2.ffn_norm.weight": "e3ccdf56e75cb969f6f69c39caf6daf7c4e70e89e25df0f4d2e4bc60e159aafe", + "blk.2.attn_k.weight": "c3beb1e0a11bcf007ef0f0d8f6bdd3082d8b29090cd29597846b5d51e308a8e5", + "blk.2.attn_output.weight": "bb9f66c32cff51154fea92933c2cd62549236f8cb1a767f9ef28d3f99809b343", + "blk.2.attn_q.weight": "8eba394132eef2a05c5a92d62d2376000f7948448d7a2dc74e6b608203add20d", + "blk.2.attn_v.weight": "88f61f77c53567c617db3eef8f30621109a750e679f6784f7911739bd42c2f02", + "blk.3.attn_norm.weight": "7b996675b7ca75fa24107b3ebe0788653ede0f49ac83b8659d71ff54d591f81a", + "blk.3.ffn_down.weight": "2cb332bc05e4821962fdc9dcbcc7cc12630f32117711b687d18fb53c0bc4fbf4", + "blk.3.ffn_gate.weight": "340b387c7f208c8f0a6db904ef8d87c1e84b7d6ad57177abd32d86c8d18b760f", + "blk.3.ffn_up.weight": "07484433f8a7ee061c55aa0de2ecc009f769b0617c9c0ec096e9bb2946df9f0e", + "blk.3.ffn_norm.weight": "4f1a4ade36b393af341240bc894a2aab09cff7e4d56dc4658445deb107f9371b", + "blk.3.attn_k.weight": "483dcd96acb4528df84b9842970994630dbd82b8715ace394aa8b39fcf8d6291", + "blk.3.attn_output.weight": "beaff0810687923585642ee11d929cbf3b43dc6f87f30ddb552c222ab57bdbb3", + "blk.3.attn_q.weight": "0739355002f6fce520863add697e0ff25fc88215322dc3f993be7bb68dcce7e8", + "blk.3.attn_v.weight": "c216d17b6d90ee3e07f82598b8161fae34de2f392dbb0f745b682b578c324767", + "blk.4.attn_norm.weight": "91ab405bc4ba15bf63af233f266aa43aaab43789a9e6596e14a357c2ac7df217", + "blk.4.ffn_down.weight": "620f34ee75cdc73aecb8949af5fbb0d2437fd81422b6d8eb7acfc52addb9fc68", + "blk.4.ffn_gate.weight": "f6feec7bc9acadf35ec22532f8998d8e50f31afedabb19263590dcf8b9a92eee", + "blk.4.ffn_up.weight": "4a72af7cd28fd07b038f6cc4406678d120517280236ea85d9e76eff40ab2cc22", + "blk.4.ffn_norm.weight": "1805b37b44d5d682bdbd2fadeafb763ee001617d7870848cc487079ee34b21f9", + "blk.4.attn_k.weight": "a1e4f9d97cdf4c1b0d177cf00c4e32d1be30c1984a239b3c9bd73f8848888853", + "blk.4.attn_output.weight": "a1547e2497c423b0aff0eee71d9300d6fdf4e4986679418b6e637b69a9a6720b", + "blk.4.attn_q.weight": "0677483a9264ea6803d03d304d87a54632242cb516e8b76b6e3e8284c2f4de04", + "blk.4.attn_v.weight": "02691ba3af344fcc1969428ab0df811ac94aaa2fd91b0dc4ec1ac0a58806980d", + "blk.5.attn_norm.weight": "ba9c028335e5c895b87a5bd1448ca429248f9746ed97bdcb8679923206117156", + "blk.5.ffn_down.weight": "ccfdc9006acad1940a6bc05042a3947f1066acd671e0bb53b7684e9eea9ef5c9", + "blk.5.ffn_gate.weight": "623157679f1e742ccc3807c0b0153ddc8450104de75ec62f1370ec3807c09cf4", + "blk.5.ffn_up.weight": "05748804c65091f963729b58b085f58351891cac8a2861f5eae26b06aa60b2a0", + "blk.5.ffn_norm.weight": "84bae55af2efc8b8429f09056c8c04990c466dae31cb3f9356038b8957f1b406", + "blk.5.attn_k.weight": "8c766180c726b037d587fc52371de6e3307140c52409011609d1225624b6a3eb", + "blk.5.attn_output.weight": "490b582b3b1dc151ae55aee8b6743dad6c01fb49e43afefb6e68394b74be3d73", + "blk.5.attn_q.weight": "6f7b8ca4d9025ec836a44bbcca46be30c66b471a9fb62943ddff8288b3731409", + "blk.5.attn_v.weight": "9f70df3ba00c9e723214b3da83ff435a2163fff5915f75515c9664c05c866c27", + "blk.6.attn_norm.weight": "1a4a66613a682df6f061fc7c4d986f9f7e9175b62f0c42fc1ef31db536bd5942", + "blk.6.ffn_down.weight": "c56f25e4e49b443dbc82d88311ee63bc1f5002cc67e52f4787fd5f003aedeac1", + "blk.6.ffn_gate.weight": "31a5cf1aa9b831a81588d508550f51fc425f9517c43254d4ef7096d38029cf04", + "blk.6.ffn_up.weight": "ce135f3a1163e0c9297a615bdbe68a67ead21edce8debbfa9f6e15e6af8d4c94", + "blk.6.ffn_norm.weight": "4e328ce0648c94e732bc40501858ef6262ad1161e2e407b0cdcf4813fa9d45d8", + "blk.6.attn_k.weight": "1eb1c4c9f9c4c7ff7f5429075e0dc6a7782bed55109fa88df209a817dd8ef960", + "blk.6.attn_output.weight": "3d32986b56873b88655ee1edabdd413fdd9ab18b82108c9ce90bdbc2d3a6f3a3", + "blk.6.attn_q.weight": "8432f583b3a2809c99c393f9beb077cb0534dd5d247c17108f2986cadc6651f6", + "blk.6.attn_v.weight": "5045381513815bb91839dbac8335ffe49bbc7b0008369de7ea97eb676c5e2b36", + "blk.7.attn_norm.weight": "3dabd003638ec2499bfc8a48c49eef34276caab4fe76894eb963207848c2fdaf", + "blk.7.ffn_down.weight": "194fae858608bdcffd235be59ab119d0b91c8549f864ea06dae69249e099935f", + "blk.7.ffn_gate.weight": "00b24c29c30246892bce0791be804a89701d4c1332777e0bcdad5d9d5666604f", + "blk.7.ffn_up.weight": "44d7082a5280080c90cef9e19d410391de34f212ca0736377769b8ddd0c82d5e", + "blk.7.ffn_norm.weight": "21fe8a7fd6911c64e0d15a788b3b4cb6d71dd6ec51de65f760ee89afbb6ae53e", + "blk.7.attn_k.weight": "57a149eec5f6744a9526cd3925ac073f9d12db0fbcb5afe042ef4dc846458c44", + "blk.7.attn_output.weight": "0e9c28a3e81a2880251ce5eed77bcb8be8aaa1a51c9cb6de820b47ed83849fc2", + "blk.7.attn_q.weight": "15ee75263ee4e2a43eb322bc159ae004bb7d77e3a7e63ee4ddab700430693fff", + "blk.7.attn_v.weight": "440aa970bba4bff429fd7b7b1de21f2ad14fb2952b776cfa4acee68d7c6e9b8f", + "blk.8.attn_norm.weight": "af5b44825633c42c1ae964c82bb2be6a242d3a751f0a91f1bae4f593e8f5b6ec", + "blk.8.ffn_down.weight": "b11c14c76adca94fa200496dd2c10743becb23aab6642443ef1ae6d8710edbc1", + "blk.8.ffn_gate.weight": "7bb03d3325bf8637ae2fa1296b0651356515578d46a7c5ca65c7a923d7de27bc", + "blk.8.ffn_up.weight": "b956ef0a0669b5a9c9bf3a8da2d1c24f52d331cfb7354f6d7c51bd65be355e30", + "blk.8.ffn_norm.weight": "c78c3d748302edfef76f71ea5cb2055c94352122eee8b9b1173779a1814d224e", + "blk.8.attn_k.weight": "c0fba6a596ed9c1c32a7055c31a935a8b31e42b77282ee47c1f03ee3bde736b5", + "blk.8.attn_output.weight": "83cf9947080c5d8d571f04a842bc3dcfe7bbb0195fb25b346e22635e8649f2d4", + "blk.8.attn_q.weight": "47409350a576b333d97b7c877d69f47f46df504f3765102dfc0be9e521c7ecd6", + "blk.8.attn_v.weight": "1999dff91404fdcf1ecb34d9eaaaa9244ec7658a74dec8feb7cfd1fddba0347e", + "blk.9.attn_norm.weight": "1e6e29d5c3889ab4e1b0a5b9998cba60179b0f1fca133515df49cbc19d092593", + "blk.9.ffn_down.weight": "acb898a6490adff592e10b4c62d70edc5941661ee6da44658500e9205357c8e9", + "blk.9.ffn_gate.weight": "4cff63013593aadc3ffbaaa6ed70ffdba1224cd43c3644bf6f4162b5ac1ab542", + "blk.9.ffn_up.weight": "f985b5a2d6cf4fe32c7256301c3c89b8ad22b59e516342c52da42d8110766a4e", + "blk.9.ffn_norm.weight": "0d659c538bc6b21ed0018f107ab674a7424a00a42946c80e07208b479b21918f", + "blk.9.attn_k.weight": "f67611d888780d1b38c1c146b361c65310c8183bdf64fd73e2259985c6e8517f", + "blk.9.attn_output.weight": "f12ca1fa62a02ddc3f77f798bfb5707e0c50bf18ee0eaa67025521a98355f26b", + "blk.9.attn_q.weight": "3865185f4361a645b086ad47b72904c095313fb1c624e511647bf1a7dfc1c476", + "blk.9.attn_v.weight": "92125bbfed63544ab56052bd1e4aa453bbf34c795249ee54cde54907c8c6d1d3", + "blk.10.attn_norm.weight": "5d6bfbe545bcc2fcb2fc75c68f64b1f4c918badaf53e0156fe2d88aa977b2f94", + "blk.10.ffn_down.weight": "1dd9da8b0d2696ab5531fbca8a29c7d67567620a9d3e5fc2a19ec5d7e4c6cc8a", + "blk.10.ffn_gate.weight": "6e55e7f014edaebda0ac6819a426221d3b025c27312a2e18cc5806f31e3db226", + "blk.10.ffn_up.weight": "d80dde54af5db51241345ee8d64c1972608644f4deeac1e8195dc423bf27474a", + "blk.10.ffn_norm.weight": "f6ca65951d58ae3379eee8247bec34ebd0db05674cc9295593573841b8a55df3", + "blk.10.attn_k.weight": "b58e350bd6b49aba0fba4e4dd6865de3a2a0651ab865dbf2419b627b53ffc187", + "blk.10.attn_output.weight": "6b26a986e12fe66ec286a21d7d5af5eaa1bfe6f2bf502165d270e4497235a54a", + "blk.10.attn_q.weight": "3440e0e5b7e0d1e426424ae5a33f4e057be623249e9035ea12e57dbe5d3893c4", + "blk.10.attn_v.weight": "ebfadcfe14bcd6dee933053df0a67e12e7a196d5cc45728c1ffb2a2daedd5ca2", + "blk.11.attn_norm.weight": "3ed057b9576cd2de84507ef64c7646dc478c651efca4c2024cbe91a4f3fbf0bc", + "blk.11.ffn_down.weight": "8ff1c2487d22f5c499761e4eb721418f141f960160d0bab779595a34e4d68898", + "blk.11.ffn_gate.weight": "9c74e4507c7e45bf39b7cc7402198cd1dd77e3fff8c625b0413acaeb16efeb9f", + "blk.11.ffn_up.weight": "4367158007161d29939e00a322bb6776016e43f648a94f9b08a96a477aae75be", + "blk.11.ffn_norm.weight": "1cc0288c1491072121f4c9a0af20be0e13af49895696a3320e4fcac608768de3", + "blk.11.attn_k.weight": "066f5b3c144fce1366835e1ebf376f768b333b8ae29f5b478c42d1d0c809c855", + "blk.11.attn_output.weight": "e0d9f3d3f2c54aed59c02713ea4fb562799ddbacbe67ca3998dfc887bc44e47b", + "blk.11.attn_q.weight": "28d3ecc8a88cb3815e89a7f7a7d043da7a71f702b337a126e4d3a2ac1cd6370f", + "blk.11.attn_v.weight": "7c5cdef10ee73bca0a3b9f6ece5f0a0155664e0ce3d8de90ccdccfab5545e5e7", + "blk.12.attn_norm.weight": "973b133301a1af760cd7b3a7955371ea0a750808b442deb6adaf7b98482bd0c6", + "blk.12.ffn_down.weight": "d6c87b4b4ca03f75546ddd6a9e7fca720585a309188723c1ace8122438d4b200", + "blk.12.ffn_gate.weight": "2189a6e0cab1540bd05d6089b922aa8fd694be51255654933c165f302a0c955f", + "blk.12.ffn_up.weight": "5affbec19b58d092b9305721e3552481fe2eff51269ea3ed91cda3b9ef84d4df", + "blk.12.ffn_norm.weight": "f650fd42a34e950f758b4a130e7b8b1a712b1dcbede0291bb8edde47aaed0ef6", + "blk.12.attn_k.weight": "59b1e86f10450a7cc188beefc0856d2dcf44e8d7fdd9cd8859c30ec1ebaf24b6", + "blk.12.attn_output.weight": "446b0d36b2f66bd72a2323f4f4e9d85a0f621e9a58872e89a27248d6b1123238", + "blk.12.attn_q.weight": "3ed6bfd39f040301ed99fad882d3e569769d594259f9948445bef0e44ec881fb", + "blk.12.attn_v.weight": "e73652cd5d0029b1931be3ba9d82508f6696dce5a29d085476a54fb7a2ddbabc", + "blk.13.attn_norm.weight": "491b85278c0bd67bd31b9b8a9720902c244bd067e53a4a03641b7c0994782e82", + "blk.13.ffn_down.weight": "ad71cc248a85e9ced49307a24a9bfae01d387e979a7689c82ff59998e09741f3", + "blk.13.ffn_gate.weight": "0a55984d53971fab97575ee0ef5882013be7fdecfa76e3fbebb5dc85a07a14d4", + "blk.13.ffn_up.weight": "378b697b35e2e53c0de98e8e29b73d42ae3ec112ec16129aa5997a9e2f3b5943", + "blk.13.ffn_norm.weight": "f8aff2f69ab286210fad45a62b03f8d10b38f96a420d7baadf6b95d7b0b0bcd2", + "blk.13.attn_k.weight": "25ceb841afb1034831bea7f4d6a6c578def2ce4d4c412c780ef147dc9a598360", + "blk.13.attn_output.weight": "a242b322889c6bdaa14b67a7bab593db39df8eea3721638ef639abbb74d482e3", + "blk.13.attn_q.weight": "d80be9945a369439e835c55cfb0e97828b8a66bb7ced534d9059c92487bf20a9", + "blk.13.attn_v.weight": "ac33274cf9b67979d9ecdc967a55175afe0c9c4aeeff6391433cd9840c818706", + "blk.14.attn_norm.weight": "12a1e1091de5b2da12c9e7c0b1c8e6f09ce2a749733cf7d5240445b8e21cd093", + "blk.14.ffn_down.weight": "cfd41965c88266e32bc2dcdadda512499c35519e8686fefb9a7f249ab2291eb5", + "blk.14.ffn_gate.weight": "8dcfe774f07a095c7c6cf0a901c9df70d938bad7b5ba347fbc8f694e7603c0d1", + "blk.14.ffn_up.weight": "c7995577fe4a72ea0fb17c4a7b6b87b959072bbfdd5edacc6c367d43465809ae", + "blk.14.ffn_norm.weight": "81c41ebde41739e7016ffec31d2256217b825dc3cae049a935f5f61a60d22003", + "blk.14.attn_k.weight": "fb708bdebe4384f5c4b479c110028554f4d122f166b8091eda7d8d65e6780eb8", + "blk.14.attn_output.weight": "f5295caf2dfdc60553dcabe17537a80577e8b153c902247daac058df23542514", + "blk.14.attn_q.weight": "c12b7a3601c68c63ab5dc9d2599ebf3f3a10abc2c59d3a2126fffd5818f2763b", + "blk.14.attn_v.weight": "1ce968d9149bf0d5e237d52cc6d6433565b4bbf03252a736262bb00a2b34a687", + "blk.15.attn_norm.weight": "266fd2c36d7dcefc6b6bb7f1c9374c41f2bab5d6c84a063b6f91c4f682dad3c4", + "blk.15.ffn_down.weight": "6154886e9ef0a6cc08ab0d264a35f497e6f0987efdac992ed04e87088bea7801", + "blk.15.ffn_gate.weight": "183d9fd3c1b5657840099053d2fd3f72ad953b1de523296159b7761f20491a76", + "blk.15.ffn_up.weight": "51546d4498842ae2340ee226a0888d5f61e7d2ca4d052dfa06a77b0451242d3d", + "blk.15.ffn_norm.weight": "ef7378091a41a25a5f58bf1bf9d3bc64ea562e7f421e1c232b1f177c30fd3500", + "blk.15.attn_k.weight": "8d556ab8d9639324141774999b6eed0e91d7ee645bf3e7a3dcd200b2e7a00751", + "blk.15.attn_output.weight": "54aa6ba87def7cbe18b0c6ab3aff5c351cb3b6ca4a0d7b2cd5f75a1312991429", + "blk.15.attn_q.weight": "10731b0dc031ea8e0ef37bd7f010e0a78518a10a6df05a8bae48e3148b73ef3e", + "blk.15.attn_v.weight": "cbbe50c2ed7224866d3cf9b489c599f3ec41a4ea1aa3181e9f4e87e1fa0cefec", + "blk.16.attn_norm.weight": "387058eb39d4b28c04cf1368247417f1faeae8ae79d894c9f293457e0eaa00b0", + "blk.16.ffn_down.weight": "2cb26ccee585e933401ad5c82ed36ddacb3289efa0b28f8cf91b020ffbd9c333", + "blk.16.ffn_gate.weight": "d745985efb5bab42304e5d509024631efe35f92f2b2ec4931ead6db97ca9727e", + "blk.16.ffn_up.weight": "7a67bd195e0642828ca36eb7818149bb70c2c25f82de07e2b5807c520daf540e", + "blk.16.ffn_norm.weight": "7cefd061c8182482a89272f8a4e88a954b12609a62716923ca1cb3593b1c1651", + "blk.16.attn_k.weight": "d7968a2de67e755b4533e061aaad1cb62f8882af92dcad67f99d6d5112513439", + "blk.16.attn_output.weight": "9e9ab5788272ca3394ea89eadbce8c86ecc3fd75b7899184d6191c134ad9aae0", + "blk.16.attn_q.weight": "ef81c261b536c1a3a093b33f44cf2d42b86e5aa2d821674f07a0c80e992ed925", + "blk.16.attn_v.weight": "aef38e7958301b4a437cbdd2fbae6197f677b09269ec1eaf63188cd5da428d25", + "blk.17.attn_norm.weight": "28f6b289f1bc3131041e9f791b7a2a3a48baee0dfea27bf7051ebbb7ed364d80", + "blk.17.ffn_down.weight": "1a502829aafc6a9bd6bc81f12573bf8632d5c8c659f0dfb13c8b2411f3b1ec05", + "blk.17.ffn_gate.weight": "ddfd8aa0eb98846ebc9afe31366249159f46ae9815199dd70161527ed241ac4d", + "blk.17.ffn_up.weight": "4211a3cc247071bd361b30de2131d02382f552855062bf3b3e004c17992e5d09", + "blk.17.ffn_norm.weight": "647e5fa99a5b0d232af36d15816539f4d27e60a50a341b00aa88bb6e4474f8b9", + "blk.17.attn_k.weight": "d9125ff33a19c502c0f8846433ffc24395048582fc2f463d34a0301a82156f02", + "blk.17.attn_output.weight": "3d64fbb1cfef04444827f37c35fd9ad3413eb2165094d339ef89f00503f09de4", + "blk.17.attn_q.weight": "e5b29424028f578beca385fd82e29f37adedf3037cd51e5889d5a1ffb0428ca7", + "blk.17.attn_v.weight": "1809c5aaf2ac04c5d65539097564ad62796e87d24bb8b9ce5b095561a61d908a", + "blk.18.attn_norm.weight": "99daca58d001c627523d3adfbca1d95f04e590382a326866544d57989d5f4835", + "blk.18.ffn_down.weight": "84f30231ce6ca0f10227541dfc602d6418c1a210386b0c4926ef1656e7d4635c", + "blk.18.ffn_gate.weight": "ca5bbe4468b541740e54f69b9e08fcc8e478c344b70551dab21b1206acfbaadb", + "blk.18.ffn_up.weight": "0b3067b9dded31686dcfdc1e247eae3974a28a61ac59e9862758dbfaad64e8f7", + "blk.18.ffn_norm.weight": "8154a102232dbc0f90ce77ae5c1ff8f26f8b6e4dcf326e9ec1645749669e7960", + "blk.18.attn_k.weight": "25abb26021ccc481471a30e0d4cbeb7e1db29828417ec5136edeb93fecf09ac4", + "blk.18.attn_output.weight": "d87d481d9b046b68efa06ccdd4ed8cbf61e692d61114b75b7fad5ed75f5d87b2", + "blk.18.attn_q.weight": "cc6400379e15766992ff1293be79dc67682c28e9e15155a78109f4b64653b164", + "blk.18.attn_v.weight": "45c75cb1dd496aea3173aafe2575b841dd1d02cbe010b3198099731eb98f531c", + "blk.19.attn_norm.weight": "65389efc75297684773284ef8e5f8789a4504b636c9f33b8a32e0ee42499fa72", + "blk.19.ffn_down.weight": "4eefab7e939f64a17e4a214ca3c77a6fa110d94f677e2d6401086f70fc538b04", + "blk.19.ffn_gate.weight": "f1c0a59cafda66f466ab585b0b8b4861b58abe87a67cea1f6a488492242edfdf", + "blk.19.ffn_up.weight": "c42d045eef588db4a0e56960a57e110e1ff92eb8041107d19899165fd3b90f17", + "blk.19.ffn_norm.weight": "a8f33eda6d5d62ff5f333ad9771783caff556641f4e7df713451385676f441fa", + "blk.19.attn_k.weight": "0bab5d9e9083492bfb05a5a3bb23b79c0e7b99ef6a6644817b4d57d5c453b8a5", + "blk.19.attn_output.weight": "c99c551d70eafad0f7aea98fb6f9251635897168eb3895f76abf0d4ea3b3aa6f", + "blk.19.attn_q.weight": "c98bde95627c3b54c9443813ca50b4e14f518319681db6bbf7b2332ba26e9a60", + "blk.19.attn_v.weight": "ff3a490518cf64904db89ce0dc7d6eb89e870f1440e41883c6b55a221f82de84", + "blk.20.ffn_gate.weight": "761f0e317229cafe9d3754048ab038a0a84e9a287b196ab65f633139f2d29aba", + "blk.20.attn_k.weight": "45d13439b41066d282e8490a726785abf513605f46c79bd0c840f6419d27e790", + "blk.20.attn_output.weight": "a3b958d84b4a097844179b7d55c18fd0e4f319cb15e918c6fde33b68de1bcac6", + "blk.20.attn_q.weight": "127ab8e7d8c3f882874904196a02712bab42e6744fde45871b67350609d19f5e", + "blk.20.attn_v.weight": "5f0ad2d14a8ae42dd3bbeccfb33295687a14055fa92c54bc946249373c1c9f17", + "blk.20.attn_norm.weight": "77300b1755edc8c70089e0f45efa646056b9add7d8568b2324d2f3e62b64971a", + "blk.20.ffn_down.weight": "ab93d0e075b42e9017b701a070d561e698050d90aac4b4b9919256fbe50c3204", + "blk.20.ffn_up.weight": "4fd6628a07acc57a48d1ef83f81b7d7aa0bce569c1160a99d307284f8821322c", + "blk.20.ffn_norm.weight": "2a9e46b9e48e8e55215de56592e1f189530037c1c94a1428e3d6f106c7f26fb2", + "blk.21.attn_norm.weight": "4b3b5912c7bc61eb9da8e47d4651f896e85d9e59c4ecaa65df7acf3c21737298", + "blk.21.ffn_down.weight": "7146f931663d93b8771cd84405cd4802ea6560d0729b0d6d44588203c095bc53", + "blk.21.ffn_gate.weight": "b44ec5d64388fa40b90b3e9976d97a8b6800fa3b97584f32e64b03daffb8601f", + "blk.21.ffn_up.weight": "0cf3643fd23c685e17062cd11e116e17ce57a405e5e78953bab94cd62fe48789", + "blk.21.ffn_norm.weight": "4ef2cdb53da166df70b39f3e6b17af51848cfa5ea3c27ad6a1ae2a1bb1da1ce9", + "blk.21.attn_k.weight": "5d40f32a706f670c19972b14176bf660d5b045e3637b110dbf8d7de4ff32101a", + "blk.21.attn_output.weight": "18afaa916752ce16c9653ec0ec7e2fe60be55faa2aa5025d147be184adb75cac", + "blk.21.attn_q.weight": "2621daa5f858931514a4b2f0fe8d81cf9b96f541e6af99bfa7539e9bde8e34ee", + "blk.21.attn_v.weight": "63226dafc54c899bbce4aa49efceeedd8908e94faa613450fdda91f332b62864", + "blk.22.attn_norm.weight": "cf3058daab4d2c04387e7d169d1553bb8e7358eea66285ec067703f6ce62043a", + "blk.22.ffn_down.weight": "6a58d5fd220abdbac6cee7ba048abab794731af318f04982c2506df59413d0b3", + "blk.22.ffn_gate.weight": "d5614535324b03c7b91727a903b2a72f8d07ad17f7aa8b61ea173cf9b895069e", + "blk.22.ffn_up.weight": "ec20da3949566e93f66cabb67f8cd7eab399047ec6ebf5d43edfaf3669b82296", + "blk.22.ffn_norm.weight": "84c82f38f53a649972a44466fc476bf764e064ce18de870291edc302f3700e28", + "blk.22.attn_k.weight": "a3d2ecc37fde7c201176bb8abadf27f0d8ede9679a6034913e03d9db924fda12", + "blk.22.attn_output.weight": "5a3b8bb433f43a387df43dd371bdf80ddfac986dfeaf38e9bac1d7a0ec6628de", + "blk.22.attn_q.weight": "3a875cec661b4859f30a8fd2c866811184b25b68c9e36fe2663d299caf8b59c6", + "blk.22.attn_v.weight": "8717a83b79035058dcfd3ef6f8e5b36e71d77379e5a239e1899eef8766fb7703", + "blk.23.attn_norm.weight": "2b4a68a0a2f023dd646e4755c9bef17c2f631901154afd839edac7ac006ec99c", + "blk.23.ffn_down.weight": "29499b1586c6fc4883c9b7a9c8cf388035146b5aecf90c5c4c8c8e082c71e7d7", + "blk.23.ffn_gate.weight": "7d6554036d21c587b9b556428054f9c15cbef96d24b257f906fcef4ae38bd9c8", + "blk.23.ffn_up.weight": "19761ecb288d6ebd44b681c4535661583b1e19dc29e96d0c007333cd8f00aacf", + "blk.23.ffn_norm.weight": "37dc35500790a4ca33807b39cf7af65065e535dc25b9e94f3ed2759f61887ac9", + "blk.23.attn_k.weight": "717547d00323817b0cb40a72ec5f8cf42ecd1f9e3e42715c2cc5e38f07fffffe", + "blk.23.attn_output.weight": "a24786feb6a905fdf166d7500133757cbe494779d4ebcba9eb03046b319557df", + "blk.23.attn_q.weight": "6a2c4a98f138b928d22136efa163562691d3b4ed526d52d46a2fa2694a8f3965", + "blk.23.attn_v.weight": "c6e6081eb9c38a7fda023085957b460e9ea321e1fff408b38c2b58595c39979c", + "blk.24.attn_norm.weight": "5e6283f891e538670425f3e244b08dc6f96f33dfa4aefa913f8eb17212421850", + "blk.24.ffn_down.weight": "e09eb170f389deea0a4a1cbfdb52c12490768a2c60491b7bef8a4c445e2a08f5", + "blk.24.ffn_gate.weight": "af29d815cf49a38fc2ebd0bf9b2dd9933d023a29f2d766981acb9a1b53f09117", + "blk.24.ffn_up.weight": "36ccd9333426666de9d3088bd4dcdf5b624b09dca9e3a83a22fc0383f2d950fa", + "blk.24.ffn_norm.weight": "a88e1692318826db6ac42582d182e51a3c698c655d0e21e04fa086318832d07b", + "blk.24.attn_k.weight": "f7d61d6d1225289bcc502e3bbb0168b4584add0253218c1b77ac92ccef9a1c2e", + "blk.24.attn_output.weight": "85a1363b3ccc87312094c2195022687c16b0dad7fafb9e80bb4ec474d53c29ac", + "blk.24.attn_q.weight": "53482a2c008f42f4fad779ca323addc3712040149dfc12f782417756388a72bb", + "blk.24.attn_v.weight": "67498272369af7dd10097c73b07f731b565cfc9a559e711cc0d526389e7b44e2", + "blk.25.attn_norm.weight": "98dd617def5cb7825ee4833132ca2da2121245921585e1d9e36b93344adc321b", + "blk.25.ffn_down.weight": "7fd477d6c50aed5f424a878dd284343379cffbee8a34c0b6e55100c8305fa13f", + "blk.25.ffn_gate.weight": "f892c9806c8ec22e8aa746734ac9213428c534921cf161239e1d249fdb5d1ec0", + "blk.25.ffn_up.weight": "528bed14c9bf9762f790525ee40412545221f4321d2a2323fa8e73c58b7643c5", + "blk.25.ffn_norm.weight": "ca5831966672e7be6a578feeb631ec3570d3b5afe12860819ccb96e896ffc346", + "blk.25.attn_k.weight": "610d3068cc9b20401f0c3a0efea39a279dd9f564fde19baf3403b2ec2319e4c4", + "blk.25.attn_output.weight": "798aaf702e53b657265ac3b5e6caf3a0ab515bdadfeb1a3a156b4f3bfba76666", + "blk.25.attn_q.weight": "8a7fa25248de83029fb97b51d036a01baebe31fcb4be121ab00dd8b7de209b10", + "blk.25.attn_v.weight": "2a53d5e9f8a1218c66958c6388d3b37400a9af7956c785024ca44bfbc3c7d371", + "blk.26.attn_norm.weight": "5f44fc043481eb0771f3e6d2420bcbcf73140afb9a9feb8eddb6575452acebee", + "blk.26.ffn_down.weight": "944a60a409d0d5b6a851e33c69aca152454b691711a8b96f5bcc488772ab2833", + "blk.26.ffn_gate.weight": "2a0ca4abb3de5593e6693d8be69b63d6d1a639855ac8332a75f520353f030c62", + "blk.26.ffn_up.weight": "0b1df496163f9ac07bf89375d3eb441b51a81d41b47d769a04a61efc18dbe35b", + "blk.26.ffn_norm.weight": "56b8dd046e9be6ea71f7efd80dbd14e7fb1aa020d3cd38e063275f3873fd12f8", + "blk.26.attn_k.weight": "b1dabfabb970e6971c7ea6e53c63cf7ef56341e6a2edd9cf177785cad9af2f9a", + "blk.26.attn_output.weight": "39532c7e836baad164a655fb97ec5114ea4da37ffba9fdea2684f6e4450e6f84", + "blk.26.attn_q.weight": "8f48bf6aaa1252bc149e98af2be1777a5c0d2c3274c6d314171ea9344a41b604", + "blk.26.attn_v.weight": "02fb145f7fd905133750e90571effacadddfd3f4966552dc59982ac3900ab8c4", + "blk.27.attn_norm.weight": "654d168fc3cab716d91261f5719f180b7d697218401633b4878a759f1b5283f2", + "blk.27.ffn_down.weight": "2823272bec3a1c12f02cc4cb24aa4031abd7e9dbe0b02676e2305b21671818f0", + "blk.27.ffn_gate.weight": "b1a1d40cd02f97182cac17a79971d1934ee0daf3aa0bf11303568c636e208a64", + "blk.27.ffn_up.weight": "ed62ec72a020d070e64eb7b50237b32213944727b5b2427f45d989f50df5fb2a", + "blk.27.ffn_norm.weight": "c69649ac65d694b306a905dee8b03b89eec1ed188b1eaaf38f8e29d4b12e38a0", + "blk.27.attn_k.weight": "cc57bbf413f1fd227128dc66efc8590c73634cbd6f96d01ec4878b5e7ca6a925", + "blk.27.attn_output.weight": "cac407ad02361d53207b3c7e25ceab84dcb4347b8087055162e2efe14d11d84a", + "blk.27.attn_q.weight": "0af18e07cee12015761c07c94407024f4f4d77d97bdb24163db0e16669e2cef3", + "blk.27.attn_v.weight": "a1d08fbdfa40af773c5adcf93bd68b78a44ed144e3fc6bbeb8af02e937527eb6", + "blk.28.attn_norm.weight": "f39a51f814512b040a1082143150e4a49ff730f85cef49d7f77fc79d83e91f40", + "blk.28.ffn_down.weight": "74f29ed51055d1c1adb8f0660bbe538a27e016c65650f2d67efc6f1c84fa1b45", + "blk.28.ffn_gate.weight": "ae48bb16487ded6781c60aafc0bf738fb4ae15729952906f247d216592ce249a", + "blk.28.ffn_up.weight": "543009727718ac22f11ee4b17815f68ea6f15ba1f3e7ed5ecdb755cf6417565b", + "blk.28.ffn_norm.weight": "b8f9e54c322079ff20a82b88948cdc2916c22c7db40b9a9ed6d3cbe89efb727e", + "blk.28.attn_k.weight": "55d055ba653b728d6e784f9e013786fed07115c9fdf23367e3941386d5e77db8", + "blk.28.attn_output.weight": "155101c03ddbf18f4fd0694bfc982f33c7bae25c9b087d6f5273c2bfbffcf2c9", + "blk.28.attn_q.weight": "1ed19bfdd22e9c14eca014739982492e9516d411515a8585f65cf754d849e53f", + "blk.28.attn_v.weight": "11ba854dd575c025d37256eee9041f6d1bd2b549a083d6409a09bfc1542913f3", + "blk.29.attn_norm.weight": "02b0bf5e2fcefd11a153cc988c81ba672682e4844fcf6442423e21a0e10d566d", + "blk.29.ffn_down.weight": "594bb692ec2779938721ff4748666ca8370e0e4fe85229503f616438b8884f5f", + "blk.29.ffn_gate.weight": "8bedcf47e91dcb2cf4093de56b048ee411faab6ff472f89ab2c9c113a08e6967", + "blk.29.ffn_up.weight": "e241a547b5fd6dfca8200b8141e21c1c487a96cbc4e5855f181a7ed1be91b642", + "blk.29.ffn_norm.weight": "e63eba5e4c6b288bfd9f15e46e236086456c8b7f1f9c732c0b5de84962a2e7cc", + "blk.29.attn_k.weight": "afe5979d5bcf211aebb526620f5974bcb0a2c39c8be71e815575c55d6385e3aa", + "blk.29.attn_output.weight": "9c944ed44b124b014906fc240afd3b90aed56bbd9567f2eddfd5b7a685b3cb48", + "blk.29.attn_q.weight": "e234e08e5c1bd9245a2edc8d63e9933b6b879f97c01392209cad4f55f05f3ada", + "blk.29.attn_v.weight": "5cb8e3e5f954e775c5a5e4de7a9a62b17e9c6931bb0ff0e2f82c4126fd3e1a1c", + "blk.30.attn_norm.weight": "a65483ee51a0b214144ec8a14f28ea5437586e9e12ebe342a57d1f8627ee12af", + "blk.30.ffn_down.weight": "417959da77ceb33ead4271cbb9428b195196173a893c44e52880a7ec61b4856b", + "blk.30.ffn_gate.weight": "a0d503ffcbe45dc927600bb98c9f6082487e65cb577ab545add400d666a87638", + "blk.30.ffn_up.weight": "f8ab957b82ffcd10b21303cb5e866209b6fe95f827b1b94e9a949207952d12c0", + "blk.30.ffn_norm.weight": "210c7ceb0514a9ef27b5d4d1b3aff6dde43f1af0345a050d71097940e0e73e03", + "blk.30.attn_k.weight": "16861b9abcf5a3fe73c93d977ca45a1e6daa65be0fd85c2cff53486ce2033afa", + "blk.30.attn_output.weight": "ca541fb2e57e2257118c35784845b0c731278af8db3036ac53d71aa1681fdbdc", + "blk.30.attn_q.weight": "f7834917748e26bb456b945e230bc926c228e93696bc01fbc2b134bdeeac71a1", + "blk.30.attn_v.weight": "9292783171dbe5eb689d17c9bda11e537f0e9b328fced6986c938d61ed590e81", + "blk.31.ffn_gate.weight": "e4766a04bcd8f937ba883c6a144101e546747804ca66c35c97281d6ccb47b566", + "blk.31.ffn_up.weight": "cc1e666116f7e6b06736db4aa4b81003c583f54f4d9200bfa48842249940e16a", + "blk.31.attn_k.weight": "fc80b57557687504efae7d24265cb7dc39b8f826bb3d897a11783012dbedc44f", + "blk.31.attn_output.weight": "215617f50a1f5d9b2250b82f3652b35a9e9aa0ad9ef2b485d73965a14b2b872a", + "blk.31.attn_q.weight": "274b4f1dfb0bdec28632705677049fb3e327ce6d9e1f3baaad1560439039982f", + "blk.31.attn_v.weight": "e641b8b926f9dfcbbf6b6da1c02555525ac4b1c306d96f20cfbba7d6662c4e56", + "blk.31.attn_norm.weight": "b3243c361d4041ddb892ce6862dd5091f57d87357e3c67e177451b85d8baf34d", + "blk.31.ffn_down.weight": "0a00cd3ecd5e91624a27f9e239b1de425d5ba3cfff82c256a11a4ad434abf3c2", + "blk.31.ffn_norm.weight": "2a0d67ea2bb1303975712243f07273c92fce83baa11b1cd6d8e42e74ea3c810b", + "output.weight": "768615f077fb797967844571c58b94d7c399d884d115be3ab4b0154504cae892", + "output_norm.weight": "7cc5b7ce10e5082000fa00bfa68af8c7c5da218e59e2c41cf2f1499d40ca229e" +} diff --git a/convert/testdata/Mistral-7B-Instruct-v0.2.json b/convert/testdata/Mistral-7B-Instruct-v0.2.json new file mode 100644 index 00000000..1da4d2ad --- /dev/null +++ b/convert/testdata/Mistral-7B-Instruct-v0.2.json @@ -0,0 +1,313 @@ +{ + "general.architecture": "llama", + "general.file_type": "1", + "general.quantization_version": "2", + "llama.block_count": "32", + "llama.context_length": "32768", + "llama.embedding_length": "", + "llama.feed_forward_length": "14336", + "llama.attention.head_count": "32", + "llama.attention.head_count_kv": "8", + "llama.attention.layer_norm_rms_epsilon": "1e-05", + "llama.rope.dimension_count": "128", + "tokenizer.ggml.model": "llama", + "tokenizer.ggml.add_bos_token": "true", + "tokenizer.ggml.add_eos_token": "false", + "tokenizer.ggml.bos_token_id": "1", + "tokenizer.ggml.eos_token_id": "2", + "tokenizer.ggml.unknown_token_id": "0", + "tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676", + "tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e", + "tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6", + "token_embd.weight": "cde834ccac5e94324b25cb81b02d27312cac0c551b55a7e1d555d90bf6cb6e81", + "blk.0.attn_k.weight": "458bfdd9715c66e017c2447b1ed3c582963a3111479314e664faad8c914f42be", + "blk.0.attn_norm.weight": "e1fd60b95f713bae7b7e3ca933c64ae6c9cd1e8d808000204bbfdc19f0ba635b", + "blk.0.attn_output.weight": "df13b6a157d9d4f96c53b012b3b9bcd207d0c94144cbd22ae3ec13bb07d6c373", + "blk.0.attn_q.weight": "13b4126b4245bf06c915a93317c42b8174e05053535ec99dc576541e4cec7c25", + "blk.0.attn_v.weight": "5b1781d3a341214511b27eb4e268674ea3ea829dbdf8ae5a6bb89b3c0b33fafd", + "blk.0.ffn_down.weight": "49186f5d8148d316b07458841d13a2e66587f4af69b776188a809591ed9c070d", + "blk.0.ffn_gate.weight": "4397e30ece09136f00f4ff84ff49e5241b765a374deb8c5a12e897e2bf73473e", + "blk.0.ffn_norm.weight": "43260589aac3850a779bca3f9649f793bbfbe5db538361cb743b3830217f8287", + "blk.0.ffn_up.weight": "fd7ac918240a07566f6967527ffca58fcf433a30b78fdd6d84b2136d4ebd9987", + "blk.1.attn_k.weight": "209839566c7d235bdc20565a4766378b6ee8553133a5a3315abe8a85baa80712", + "blk.1.attn_norm.weight": "58c52986f7c69784ba327cb7f350923420782bee17fa39b1fbd13839d4005357", + "blk.1.attn_output.weight": "5067cc628449682665dfcf59b16e58fe2a9d2a81cb099f0fcd42f4f8670c6740", + "blk.1.attn_q.weight": "f410f9f0dd5edc09401af597d02e2a4c727f1502ec3ec3898321617b36c6df6b", + "blk.1.attn_v.weight": "d40fa49e07c102c0644e130e7909eaa93ed0d54e2edddc0759e721d58a4e4f5e", + "blk.1.ffn_down.weight": "594b1eff6ed4defbdd819fabbe2d48764984f08878a860bdb808511d5a25b8db", + "blk.1.ffn_gate.weight": "4cda97541e388a5bb607ce4cc8b3db1da7045830a630e7ba4d17807befcff346", + "blk.1.ffn_norm.weight": "66c13d7481be65b97aa474735ddc9674f33d512ddda76fa6fb45c7464b09f1ed", + "blk.1.ffn_up.weight": "1adc6de288ba4cc1237833ca8b4eb81107149842e38bc452e18e5cfe284338a2", + "blk.2.attn_k.weight": "5420423559f236ab22d85a00849f31e0cc6e9c7dd879de724393d8cd2b379153", + "blk.2.attn_norm.weight": "495fe1ab40cc52aa054ddd4f0c2d2790f4326c8d103296b1b38f3b1060db2a24", + "blk.2.attn_output.weight": "ccb83e7085381f558bfd65588c525ad2671feddcbc3887afb4038ad9c7aac348", + "blk.2.attn_q.weight": "2e8f77478392bc93c2a391f2e0f4a173a952bbab88a7aca099c6ee909726409a", + "blk.2.attn_v.weight": "d64512590f3b7ebbb9e77c2eb97fbda90b00d45c944f2b174f03a2cb11007567", + "blk.2.ffn_down.weight": "1de5084a05dcaa6b1bd926e83517dbe9ebe7fde79235fe56018b3028b1aa6397", + "blk.2.ffn_gate.weight": "cbea526b557f49aad8c976973cf367fcd12175b900f551984f498b9e07e4b7fd", + "blk.2.ffn_norm.weight": "530aa49b10c7eae08899d143409240deb95dae4e1d5bf78cea3b26393cff3ba1", + "blk.2.ffn_up.weight": "13a5fc19b96b4dcc1e9bd01998c8272ebe52034c1933ed123a506b711fae9a5c", + "blk.3.attn_k.weight": "1913b63a73305941d8cdc472e7f101c633d3357a78602eac0a4b49a744261075", + "blk.3.attn_norm.weight": "9c11bed5ab41f4adbfdae4ead65b525c8f19443e656a8c61ba412a4e1ad1193b", + "blk.3.attn_output.weight": "bb0b42c1d34779c5943272ed71f1dbb31ad8edd75f8bcd5c868f88505ac3a610", + "blk.3.attn_q.weight": "3461a1fe4e49f5319ea047cae98ccdb46528a3ec23831183fe87610b48c94948", + "blk.3.attn_v.weight": "82aa30be6a61526a41fb79bb28a2617416f5909f0477aa9e95e16be9370fcb38", + "blk.3.ffn_down.weight": "68521011ae03f5e3b0966127111afa8ee9f2eaeeef8d3a0b86b633e0332e9fbf", + "blk.3.ffn_gate.weight": "1e89e26338fd364bb679695968c65106382f15ad55c95cbb5ec9bdfeb766f432", + "blk.3.ffn_norm.weight": "c81932529a5a8c417c27b888dbe95fff8b447c2ea5f6f560444ec5d50b93832c", + "blk.3.ffn_up.weight": "305021735afd8669afefd713f56137248d5e817e60471a112ad06b7fa07ffe88", + "blk.4.attn_k.weight": "cc26ba5c5c28082a79e6abfe61186029e80b145252ca6a7924c437f0bcf2d51b", + "blk.4.attn_norm.weight": "302d251fdcc91f7468cf33f80b49484251d8917d7018ad264ab3a85c8ecf9ddd", + "blk.4.attn_output.weight": "a012f5bee3520cd4ce51f0076c132ebc3653309f304032ad051aa308f55f36de", + "blk.4.attn_q.weight": "3c8d607e447f5ef21e73af71e3c0d32fae16f91f31faae34ff06912cf9cb68fa", + "blk.4.attn_v.weight": "49f6c81a634ce46d71c2350206ecbd231b1732af96e4e4e67693c41a07e007d8", + "blk.4.ffn_down.weight": "e89504f311a4a34dc819a67b761022f14d71c43df3ead4f892c87aaa8e9f0adf", + "blk.4.ffn_gate.weight": "18b22f079a2fbaefe3572eec61fdcd996fd747724e2f0ff4f08cfcb43eb7bfb6", + "blk.4.ffn_norm.weight": "22415a492c168a0878912b05c854a631228b01c3ea8842e1d75989ec46c18a65", + "blk.4.ffn_up.weight": "f57379eae2874d8853f14ddf0f0fcc4ff1338574d5ed5d7e88331d5fb84f5642", + "blk.5.attn_k.weight": "d627af853c40bddf9762ce3988008c1ff17f2686fa8f73a0b5da38010147c316", + "blk.5.attn_norm.weight": "9ce01092c7f7f1c3ef72d6b794da12d77aa1f6a24fb96ba1b9bd5a0bcc3e2443", + "blk.5.attn_output.weight": "0388da8064c4b6b795ce2d8079e8a36535e82b2c9cf794e38ce8ae460aae726d", + "blk.5.attn_q.weight": "039b7ce1c909761fdf475c06cf14cabe5a90199282c89e4dcf460e95a4b6275d", + "blk.5.attn_v.weight": "c47bfd8d2496bdb6e00e03b903e15fd0ee806a515094ec257e43cc433147ab7e", + "blk.5.ffn_down.weight": "1d62e6708974bae318cbf00a8bf621d9ba0537e549ce4710a536520a8d14168e", + "blk.5.ffn_gate.weight": "8b42b1b11c92db19985094cbb50434e3a7c9cfea71ee6f21ea79eae7c49284a5", + "blk.5.ffn_norm.weight": "e0bc520f1505e687ec391d632a381d38d8ebcdec19f614a11a2000ab573e8b7b", + "blk.5.ffn_up.weight": "8cdcd17d2ea89bb9ab902dbc6bf3f827fa4ee029c6bf19eecbdefd146d8b6f2f", + "blk.6.attn_k.weight": "5dc6bcff89794d1756bf57ec665b58622d9352130d31082a6c66e1a079f99932", + "blk.6.attn_norm.weight": "13b26008abe0f119b5104b9d78ebd5e797d3cdd68122b93d73a3b4831a54d085", + "blk.6.attn_output.weight": "f5a49917ea70c3fb311ccfffbfafa63ab18416a5d55e5429b70ce8bfba57c075", + "blk.6.attn_q.weight": "d9c2f652c87dbd09ec3822e12876648fa32e86553ac25afab723b1cd9f8cef90", + "blk.6.attn_v.weight": "5ecc5fe67609a35151011cb526f45c56fc0a999079ae0ff37c755ca03c68c555", + "blk.6.ffn_down.weight": "0ec125ae0ecb2d9277fdb1b04f17efee94e37d0ae37311057c212ca2db3fe6d1", + "blk.6.ffn_gate.weight": "fa4d6d38355ee8aa3b80b476d65ae7e343c9b7770d7b097fc848ee8a6e091d1f", + "blk.6.ffn_norm.weight": "30e8f7defc627532e1739dc76d31223d45767391a431f925b63dabe334b0f392", + "blk.6.ffn_up.weight": "6b97cc32b290fa9087806b5d65aa6dc1760737730c8c71394cc4f30c2157f9ab", + "blk.7.attn_k.weight": "0231cb127cb7c3714cd72b8f39343891d7715a9bab2237ade9e7bc5f4ed2e68a", + "blk.7.attn_norm.weight": "7c3187f07eead7d219d98ab2daf87905e88d5f1ace109b6f5fa55dce3914981f", + "blk.7.attn_output.weight": "2f30ad972c284ae7c8eb0482053433495ebe8fe9c5ee2c28b4bc4ed1f33050fe", + "blk.7.attn_q.weight": "3a2b4b8d61cc9956d304fa9f82a9e65b4bb9fda2196670b16df7e0d8c43eff2c", + "blk.7.attn_v.weight": "d2aab97d0dcf0f61dd2f32848f7a8a99c423a4948a660a660a03a546972b8db8", + "blk.7.ffn_down.weight": "2270d520468c5549cd30023ff9c452a277058310104c4239a616373fc5a94387", + "blk.7.ffn_gate.weight": "4134a3ef71b3eac8f76b6f1a2e58625b3bae48081f175994bc3ed7d8b0d4f2d0", + "blk.7.ffn_norm.weight": "42df4abd4b8769b16f3930068f96960af1b061f1aeb7505384f272233b2badff", + "blk.7.ffn_up.weight": "c920549054ec16ff8c73a72f5d837cf4e11885e44db57c1c1c584c18fbd7a9a5", + "blk.8.attn_k.weight": "01c609bd3bf31ce65688f1f640ee413740e821330134d4ed1877a3065d1527d5", + "blk.8.attn_norm.weight": "48857411f769b00290f4e4f2e593e092781fdc2503f80c1e3eeda1b85a20f74d", + "blk.8.attn_output.weight": "90fb273f8df83744554bd59236515c16c5a5a698ca3fbedc17cc89ddcee354ff", + "blk.8.attn_q.weight": "ade617ac4653c7f00593dbb51837a468afef20a14eaab3780fb96ac3d6714369", + "blk.8.attn_v.weight": "c2c37496494864fee5c527d1fe1f88529d31c73f9cbd02ef9b2e9b23611ea50f", + "blk.8.ffn_down.weight": "2da58572e9ad79087c03cbb0c23c9ef69f93ec221fd5fe4ed92fb93871d23ffa", + "blk.8.ffn_gate.weight": "4483294e628edaa4901708e73e92c917bdd93b780fa01aa74aed57166f2bbf0a", + "blk.8.ffn_norm.weight": "c0cbb7a4f8123b62f0c4652a687f3b394802bc32870dc446eefb709e42043a7f", + "blk.8.ffn_up.weight": "9eaf8a2060cb9224cd585997cd671866c4051ad885c2c6d9fdc7056c2a5c0d89", + "blk.9.attn_k.weight": "5dd36c45fbc9c50fd35c36cd75576288506971eac5c5311d4f5c16ef60099645", + "blk.9.attn_norm.weight": "3c8ca64f2f75ed7c8fc1da010c23be787648139a96ca0ef3ad10be7b14942b8d", + "blk.9.attn_output.weight": "6277e1f833024f53c409be919ec76d34464a78b278c8f9dbf79e777746e3b995", + "blk.9.attn_q.weight": "87352b70d9e328c2d51d59090cf5ea5a046529864a890d0bc8986447a0a5c006", + "blk.9.attn_v.weight": "2efdf01161d7a82a9117cc2d87d37dba5ffefcf730781cb94fcc95130e48ff9e", + "blk.9.ffn_down.weight": "e7658a2ca984961c7ace16acb679387bedb1fef656b5330bbbf588db19673a75", + "blk.9.ffn_gate.weight": "773cd330d4ff5d64be8af00adf2e2722fae4e33fc26bb9d03549f6f4b3b0fe57", + "blk.9.ffn_norm.weight": "c8b86cd5c43b332f72060b807091c33a258e5dac01358ff4733b916cd34c9c97", + "blk.9.ffn_up.weight": "d8cc3bcff18bd46124ba2aa7caacc71220b44eeef6fccb993b4c6cb53e8f2c3a", + "blk.10.attn_k.weight": "964bdf3b4e77b915a216f750ff7b0f2eb1dd6bfa071358aef21010b90111044d", + "blk.10.attn_norm.weight": "59ed411d91d14775764eb514acb0895a75a10cbbfbc1c15d453bc50f8046cb7f", + "blk.10.attn_output.weight": "4d35a2a44cfe4ac0a83fd3ab0dcf1f5a0bf54cdb3b7be9fc353ed32c8a3eb81c", + "blk.10.attn_q.weight": "defff5339450dd881ac352f5c459293f39e07b9619ebd10ed632d79a3f310278", + "blk.10.attn_v.weight": "b9803e8d6a54acea58f662d4c0a5c8ebdf986676de7dfe12d4b288937881ce93", + "blk.10.ffn_down.weight": "eba856be64e4be20b92fb4639a783454dd92427250759df92a337e39f1971c08", + "blk.10.ffn_gate.weight": "2d5c509b066584db4de3632b01234e86edcde35409c5ebce18957dc80fe465e3", + "blk.10.ffn_norm.weight": "ecb9a8679945ff0273856624ce435dd250ffe5a440ea0861a5c84f0e4c44d2c6", + "blk.10.ffn_up.weight": "e76ec7e993f399af02958778c643aa78368e3067846714165eb5aba9d5f547f5", + "blk.11.attn_k.weight": "29c6d1f34bd3ba2f0904e57b32a5bf8dcb2834d439159a33edf234ce0b775677", + "blk.11.attn_norm.weight": "b5817b275149cd2abe18a6a10e19854605fc58fd364666744362ceee8cfe49f4", + "blk.11.attn_output.weight": "1e05653220e237cbe0cc770033e183c9a0eed5680510997409b16186c6691950", + "blk.11.attn_q.weight": "03db725ae669151e4d536e50285b3b047ad097f52475df208ed3e790e31a44be", + "blk.11.attn_v.weight": "27cdf1d4e971326c451a4615a0b79a8c7fe9508f9b76c0d52fa01971fc7eb403", + "blk.11.ffn_down.weight": "176938cd7c2966094f614cace8ba568b10532e45a0d438f80eccd19b6c2a7f87", + "blk.11.ffn_gate.weight": "9782339915dd6fa70013628a01524ee1d01ad8beab04068da7ac6a5ee7603a60", + "blk.11.ffn_norm.weight": "8245f6391e3be97811c0ff27f0d8f484ecc82a468a837c893f059745bfcd95eb", + "blk.11.ffn_up.weight": "15616ddde096d0d25e906375c548b6de4bd5576d1f6b68eefdc29f14e183af42", + "blk.12.attn_k.weight": "66dd21604993edd1b1fe547bcaa06f5bb7e31c9204902d147a227e4badf7feec", + "blk.12.attn_norm.weight": "23a69f85dd8a0904b9839cc5d0afcda299b74e82ae2642106224a1c820f2b761", + "blk.12.attn_output.weight": "4a98d132e376beb274a39d4ea9b6a1b870ad5c66625439d7ff6f45c229c3ca04", + "blk.12.attn_q.weight": "1c6c309d63afcfde32fe37257e300a78e25d01117e33490801107c0e75d1ea66", + "blk.12.attn_v.weight": "723d9e4ebe4e2b1974afa01d8f512b52933698fa36717dd47b37b07760c50a10", + "blk.12.ffn_down.weight": "00e0fb09e1f1fbbf3803f1dee373eaae7a93756b6e13063ab77f9927bc6f996a", + "blk.12.ffn_gate.weight": "89159f7f97aefb1e100107e3ac2d694e1008ad873f79bb953d60c2c1bb22724d", + "blk.12.ffn_norm.weight": "5f70aebd0e43a39d6373d8658cc670c13aadd7818831d3d84f761d5f688442f0", + "blk.12.ffn_up.weight": "faec21b446f061eb4dca561a3180712724347b77a71eb312e7afe9be9e89fa04", + "blk.13.attn_k.weight": "3d440825d19eac3b1753b34d94fee2b3a3cb6636c10b2703ffcf688d3c1eded3", + "blk.13.attn_norm.weight": "47b575e57e410738ad13fd3c74bb49c06b3d31030910834ece509cd1a5c6d9be", + "blk.13.attn_output.weight": "05436d8e613f4475741c1798a7c371b53d61b229507fa04fe23c504ba1f0e12a", + "blk.13.attn_q.weight": "002b5024ce520da41256e3ded5cdc60e5ae07ad9b202cb19d76ab511efd02b1b", + "blk.13.attn_v.weight": "c1f2d6763587c50312cee0d7140fa2c7ee326f5b172bc99b2d8946e08329cabd", + "blk.13.ffn_down.weight": "b5c4e0d8a3ff96cd76a135e415b89f02d28c28f7f3c16a36af31ef0ab8773da5", + "blk.13.ffn_gate.weight": "ae06e9e3d2e1f64c7ad23a4009dc904c2eccd7241f9f91c4974ab2504f116be0", + "blk.13.ffn_norm.weight": "e44a22321bcbcb4a3c345b504e939e8071370f54a8cd702fabdb40b97e0d7683", + "blk.13.ffn_up.weight": "7e6f366d538e21ad431264b12c011892d0be9dfe4c4da9f730af677f920641ba", + "blk.14.attn_k.weight": "95492d6417952ec24b2cab87bceb750fc7e95ac6b1944fc328a3852d980164be", + "blk.14.attn_norm.weight": "6b7b09e1c51addcdbb160ea59edf032531421c520ec5645fe1ff9ca4180cef54", + "blk.14.attn_output.weight": "75887474e4d72c218e6ab0f69f1bf3ec3dc414d51b36fc59df00cdb23421bb6a", + "blk.14.attn_q.weight": "940e33f76e48c21215d19e8a21234c8246d4d084381a7d9806aecb24b071d5bd", + "blk.14.attn_v.weight": "c58601cf5a9833f80f7f9a5b2656e8eab5eb133211446ebd48f8be15fed4ebb9", + "blk.14.ffn_down.weight": "f9f886e7f9b2a54d717b08947a25a0a93e8c2a5b8bcd5a907c06817c8ee3ac11", + "blk.14.ffn_gate.weight": "727ed0ee68594a3f59d704ed3240b6929f083b9c36650fb848d182315737245c", + "blk.14.ffn_norm.weight": "bd2471008ff1b2bae9aa26bea019393fb2bbc5b9493b8cec3ebd2c280fca24ca", + "blk.14.ffn_up.weight": "b006446769f51e4f93b503c4727deae897bc1fc7f4fad49f85024b63c4548d38", + "blk.15.attn_k.weight": "23bb70f9035356624039547a603e46be7d1e4403616eafc2451cc09c5373d522", + "blk.15.attn_norm.weight": "718cb371ca052eeb3bfac6ac506abb887df125271821fd171797a7f2d8dd6313", + "blk.15.attn_output.weight": "c76a2695a204b43a8e5acfa5720590b5d449a9ad9e082cbe3e80fab5903ea16a", + "blk.15.attn_q.weight": "2b3e4037b9e91bdd26d6e8d904cf39f948192dcf09bb6445cb55ca058d4f4626", + "blk.15.attn_v.weight": "7c15e89b6acafc8619e86aa9d412f5893ab17843ff2cfaf40eea9637b24910c6", + "blk.15.ffn_down.weight": "e16fd4bdc6d1c1209c6b633454df4992870c8cefb2cb0e8c92a7e489e9fb5d19", + "blk.15.ffn_gate.weight": "95a46bea366c260337c537fde06b4cbeaeec52484a69c3390bb1d178eb0525c9", + "blk.15.ffn_norm.weight": "37730293f704da265dc6d1896b3be00c39c0a41dab07f573af39dc30a481d623", + "blk.15.ffn_up.weight": "ba74a199da2d0875d7410824238c4ffafbda3993568812284a72b8800df91f15", + "blk.16.attn_k.weight": "f58f79a2a91c9a763adefce0c53a71eb5ce6bd8442f4af554b04b58083bff27e", + "blk.16.attn_norm.weight": "0c16e41b95e81978e0e0e3b338e2afe2d297426578cacee94de15df74e94eaad", + "blk.16.attn_output.weight": "ead22fc337514e4add49aee19720008558e52090466866e849671953a1fccba4", + "blk.16.attn_q.weight": "ef59c4e8fe8918c1add43d7e9c6fb3ef799dd3e1bdd731ec7b6a4a6f97c86048", + "blk.16.attn_v.weight": "902e6b84c2b64241470b13e6f412f859f66b4b223bcfb9c15d5cb1106b07ef3b", + "blk.16.ffn_down.weight": "2ad6e9eb4d8372c32a554395d460d17cfb02d6dbcb757cc962b6bfa36db4f5ee", + "blk.16.ffn_gate.weight": "825b2d50fcce3dbe6a5d8d8a50a95466f83ca4a10343efe67894c20b4628fb15", + "blk.16.ffn_norm.weight": "3bf6ac90befb0e17e077c8ea9454a8485a30f89f2d761ec7751b60c90aed1af9", + "blk.16.ffn_up.weight": "9fbdd08739b32411f5ab0252174d386bab19eb0b17884862f760429b7d41d78c", + "blk.17.attn_k.weight": "4033398718bf3674830ed1b73071ed8482b6dd4ef27f31a6c5fbb998321b6c07", + "blk.17.attn_norm.weight": "714f2e8ac9592966a0f1c02ee979eee8f84586405b992e8ee9543e840199ffa1", + "blk.17.attn_output.weight": "b6bbb618597d767b8f535117be68f92911e4a71d4eb4d8b5d943444151445ece", + "blk.17.attn_q.weight": "b84a0dc00ceb515faa2628125dcec502eed923077b21cfe900a4ff16c2e5f9ed", + "blk.17.attn_v.weight": "4387c7d6a17da9cc7a6bca8f4a75618b20407d570792056283a8e93b6ec65f18", + "blk.17.ffn_down.weight": "47db95c6f1e12b399c3eaf9ddba261782dd71173dd163b52af96541cf87b5196", + "blk.17.ffn_gate.weight": "59abaded0aedfd12f01df81f7a811e84db6a227f51b60abe9a247ca726e87392", + "blk.17.ffn_norm.weight": "b7e86445be5c7b722e01ddb98d5c7527ca86cb827ce0354f2c269e0f2558751e", + "blk.17.ffn_up.weight": "8e31c293bac649d2f60da4b3fc4a3acdce1111ec6058d8805eeeb242443011de", + "blk.18.attn_k.weight": "5ce762ab7b032511c131df81093b587871718c7097f79d8e07d707571f18a47b", + "blk.18.attn_norm.weight": "1f52cdc7af1f4dc1f0ef6ad1ad02e18cda32133654e57cfa9c72ada9c0b1d995", + "blk.18.attn_output.weight": "6486957f30bf8a88516e25772c6650f98b13923f490a2865a8752e36439d1cfa", + "blk.18.attn_q.weight": "93621c8abf69d2ca29c5207180eb628fb2b544d89de6c4a7fb0699be95534899", + "blk.18.attn_v.weight": "11604083b5a74828ac1d226af015ad5dc0215a1fdca44fa7131c2163c02d8156", + "blk.18.ffn_down.weight": "8f9997feb94385f106915df810239c9753b31efda2bf14bdf18a9fbbeec8233d", + "blk.18.ffn_gate.weight": "427c213b3a4e94af703429daf2f65766f70424d8230c123e7e712a18bceb5ecb", + "blk.18.ffn_norm.weight": "c45d305c4ea6a54013ba112f12dafaade064a32cf01317373464a3618d8ba44a", + "blk.18.ffn_up.weight": "a2811f2e73ac9eb9cce91a21a454e84e230a155244e2cd73f2c12aad3c9b8cfd", + "blk.19.attn_k.weight": "b2daed159925eac58c291e2f1e2000beed21002b03c9e1bc7e7a52e22240666c", + "blk.19.attn_norm.weight": "6307306ede2ab5bffa1bcac3f8b139354678c0376b1d9f5530c1fcb4268cfeb4", + "blk.19.attn_output.weight": "ebb98218b2a9c84d3fb6baeb02c5df264b7ab80d994d1098ba1cd47aa398effe", + "blk.19.attn_q.weight": "4f10df2ad09177e7528e9456039b670d07db22940a49417101b725d239c16724", + "blk.19.attn_v.weight": "30f1efc5114badaeaafa91fa466dc7fa14b1616db433c6f563ab851f7333a5dd", + "blk.19.ffn_down.weight": "be5ec7fe6b48855cd0015b0e430d1b70c620de87a7ff188c7c1afef546d7b6bd", + "blk.19.ffn_gate.weight": "10dffea4213881f8a9b583ee0fd370e033756d32255ed15053f794375b9400e9", + "blk.19.ffn_norm.weight": "e75cd24ade45dca78fdb0cbcaaa2d4a17d83a5a73dcc94ce0ec2d68fbdb2a881", + "blk.19.ffn_up.weight": "63e81bdb951410ffa81bcfba1b94a679ec9ebae59cd1623ce2651ed5d4c78bfd", + "blk.20.attn_k.weight": "c2fc5ad39e9bdd45e73c6e54aecc474388d944c4be1ee1921b7fcd035bad02e0", + "blk.20.attn_norm.weight": "aaa9169171937bdce20c1f057e94e9252f221cabacf1ced12e11b9586f23d308", + "blk.20.attn_output.weight": "a9f4fb496e4bc053e3f6cf2e72e22d4cd2b545ef6c32f7e782c2ef6ebcc21d4b", + "blk.20.attn_q.weight": "5a07ac619ed251494170b213921ef3fcc4c2712839da262516d9d5b8ea1ff185", + "blk.20.attn_v.weight": "d6689473105d241eacb17f09f06000ee237336916cf5ec4f48271c5b41bcb8e7", + "blk.20.ffn_down.weight": "74be38db51df736f26ede7c6b52ea787e385f181cb66231e2cced4556a25c9b8", + "blk.20.ffn_gate.weight": "ea91e06dc3d051c0ba0243b5a8bb40edbf254eadfb54fda7247e05cfdd88cbe2", + "blk.20.ffn_norm.weight": "5fbd357b3d6f44a7a91e8a4fc246b24303891b7957e0f3c32818ae5dc16ddd8d", + "blk.20.ffn_up.weight": "fe3290333e056af4ed12942ac72aeba97a6b562e2db05e79cd35dd07eab5b101", + "blk.21.attn_k.weight": "201ec6ee95f06ea5eb80fe86fd07bd016d3ae9ab6abd25d631834414e14a010e", + "blk.21.attn_norm.weight": "ea8154f93e06485828475a00b98cc397ac84768dd70e06ecc0c075b5712d7276", + "blk.21.attn_output.weight": "9f8af74d531478fd304723fd8e4e01578db598441b80dc7c960cb801dbbc501e", + "blk.21.attn_q.weight": "277de9953a8d3cff894ffd06c15ad0ee1407e319df0c1a693d4f45fa9c74ac7f", + "blk.21.attn_v.weight": "6bfdc16cfb898909b7788ddd39dd04b928f31d6732772195d53c558004638dca", + "blk.21.ffn_down.weight": "173877146cb94801157796ee9e5eecf3f46acb3b5e797f90b83a3fc22395eb30", + "blk.21.ffn_gate.weight": "53146713e2ca1be80496024077a028f6b6d749b02e71003c349e113b436f48f4", + "blk.21.ffn_norm.weight": "b28b97e18ab20a5c553ba422f7d7f6014f5902f1d62a69abd20d9fe19a5f9462", + "blk.21.ffn_up.weight": "5c39d0ac4d602b8ec8909dade93b2efcd6b6d9d84a19b252d76bb66dcfaab87c", + "blk.22.attn_k.weight": "01f26272c82917a87a3ccf922fa1d521a952b05de878241b7efe3525b617ac87", + "blk.22.attn_norm.weight": "5ffc96249d8873b506e9eb7158bdfd07fa1429e53c1951430ca7505d25f11c76", + "blk.22.attn_output.weight": "9c2201569358f720244b9c9497e4da02585a167b1414c8a506b85ad75ba990d0", + "blk.22.attn_q.weight": "906036eb4ddf027f6d920f9356a6a2a5e529b96f4e1231a0496d46b4434a5842", + "blk.22.attn_v.weight": "30ede8b0d166003a4b8a81fc99437f557719fc36e5c4dd510c9f161f36a47e73", + "blk.22.ffn_down.weight": "d04c164beabab30e1837b843e18852260efccfbb9d96a34ddd816e6fb3ba23c5", + "blk.22.ffn_gate.weight": "19c889db6b19179f0a62d5981a1506592c65de83760d67afbe00d202202750a8", + "blk.22.ffn_norm.weight": "4885eff2d851b32dbd306bd632c725857e6d164f0fa8b3d5857e572e6ef98ee9", + "blk.22.ffn_up.weight": "365594d8db8e95cf87cc33ac23947942dc326110175cc8ec5a07b5c7059089a7", + "blk.23.attn_k.weight": "badfea1569da0fc6ab817c5727ca3a69b07d9cfd622fb8be5e66678d5b3f7ae2", + "blk.23.attn_norm.weight": "8968f78a379ac3ca5458b4ed4251e8d9112aca6d6dd1ef6440b4bb0b380375a4", + "blk.23.attn_output.weight": "93e43393c03956287b1fe31e9735ff1cfe84f4ae56b83dbaebe96275e4e11831", + "blk.23.attn_q.weight": "aaff73c725a8700ae66bf26ac8869dfe96738eff23a8ff340de2ab53400a5795", + "blk.23.attn_v.weight": "3a86a8dcf14a746ed1411f5a7e634064bc4dfd6511c24cfeccfb2c9ebb6b4101", + "blk.23.ffn_down.weight": "d4da6f37bd7ef69bb203f7b0dd59f50bce37432c70627e6cf274ab81548af5cf", + "blk.23.ffn_gate.weight": "5b6072936c4a693923bb4e3d1473fd45545cb02fc07799aca458ef0449a04061", + "blk.23.ffn_norm.weight": "cd76e37025f84773180298ddb15e0d4ba9cfc7d832e19c791049daa47c6d9c10", + "blk.23.ffn_up.weight": "cde43b99b83124a13b2e4753d12674b3a61dfb34c04703007ced3e8e2aee1801", + "blk.24.attn_k.weight": "457379edc4cce4cbbe107385079019bc922264fdfc7bd1d1ae84343a81460c66", + "blk.24.attn_norm.weight": "0ce0dfab2edeede5da419fa7833db78e36222cf25c358d08f3ec664310f031fb", + "blk.24.attn_output.weight": "0cf91c2fd40c204d2fd4b9c85b69281e5ad4ea8442972fcd44b5fc8e835ffdf8", + "blk.24.attn_q.weight": "87ede30c09eafec6a4e6285674c1bc4637140b168b2da4ed34f36fdb6e176cc9", + "blk.24.attn_v.weight": "4c0b078b2798ca35d6d2c2258fe499820d2bc88700654ba4016e4b028f563590", + "blk.24.ffn_down.weight": "cdb8540c32b1ab988f984484928d39f6841f2131c1cebe90ad9456737fccbcaf", + "blk.24.ffn_gate.weight": "da2e0e913648b5526bd2bbb344038dd067639343aed3b413662b064b0db7556e", + "blk.24.ffn_norm.weight": "8940bd781c610d75eb2be63cfc8d869a3af05e53c963dc7fd4c6f653df5a80ab", + "blk.24.ffn_up.weight": "90cbac2a58801abe11ed6c24560aa4acb949f79429f2aa8ff129ac05868bb87d", + "blk.25.attn_k.weight": "90607131e36998e990ce718ad05cbecd1bcaed010931401ce6baa3b0d93ebce6", + "blk.25.attn_norm.weight": "fbf679c85656c04a6cf8fedd5412c1ace22960e6c2d47f2d43997827811fbb97", + "blk.25.attn_output.weight": "08412724ee7a2086514406e6f68fb9f622e10bac25b0c373b294709f4b09bd2b", + "blk.25.attn_q.weight": "9c1238e98a2747654a0d4371d3e7ea8b979867f609dc42482544f25591e85c7f", + "blk.25.attn_v.weight": "a57796a535c6cb09581cbafd6a91dc14adc8cca2a2465a7ffd0aec546cd84074", + "blk.25.ffn_down.weight": "f7e34e8a6391b480da08b52640613ccadce268373934b409759743a1735b74d6", + "blk.25.ffn_gate.weight": "b8d0b2f4612678b5ce42bd4a683f8024514b75fb5ebf6b22c600811e95582ee4", + "blk.25.ffn_norm.weight": "cde1fdba2369d315f3c6940a997c471ec891924e642505db580d732763bd7b75", + "blk.25.ffn_up.weight": "72e700c32ac8b9c47559c2222e45888a480b527ea512075423c5dc01678e2bb3", + "blk.26.attn_k.weight": "6ac83b3414ae75bf3a9055c32e49d2c40fe611ab21f8444f03d2f465d18122c9", + "blk.26.attn_norm.weight": "55f9d6dc9d75973dc75136ecb9d991b4398097ac133070873fb96ec76a6f60bc", + "blk.26.attn_output.weight": "ebc4fcbd15b33263e50ed2ad45740867cce15bc90e1216623babcb1820734509", + "blk.26.attn_q.weight": "080f057521073e412936fe3fee64fd574c8128fa4a148b879d3e598fe4954581", + "blk.26.attn_v.weight": "0fa2830d6746487ac91b243716e4302361f891e4e008eddd14abec47c7809d5e", + "blk.26.ffn_down.weight": "cb2ab8af1653adc57111ada49d2825c6995e338c8208455b92de10e580f60f31", + "blk.26.ffn_gate.weight": "231ce30966086bce2dc0e0afd34a22a1958cfda7a57c41b3b8e9444c5dfde8a6", + "blk.26.ffn_norm.weight": "35d959d25d17b00617590f5d5831bf705c385c51e46297a14375a700effca6af", + "blk.26.ffn_up.weight": "367680c8d332538b467d1ef87cfeb36cc5c6af564c5023c5fb50e728e3438287", + "blk.27.attn_k.weight": "0bfcb351c6d17aeac5b55a915074fbdf00f11c4bda98babb196ac8804805746b", + "blk.27.attn_norm.weight": "5d598a88c2e75ba59dd7ba4fee940bdec92d72038f1286536d2dfb71d008a09c", + "blk.27.attn_output.weight": "23a9da7347336479f6a10ded14cb3f46e06b5bd56dc4b0fbc526c688552ec840", + "blk.27.attn_q.weight": "b83319dba9055f069208e9c9d66da08bc6874f23e575288fcd81697d1777aa54", + "blk.27.attn_v.weight": "36ed34ccb2f36fdf16b2c2dd225a98ea6b7b0e376e7791191136ccd7bd7a4add", + "blk.27.ffn_down.weight": "5488e1d3a58c71b5e9ddda430540b4776b268cfe1457cbc1c2622dedd9e4526e", + "blk.27.ffn_gate.weight": "4ff48011ee0bac39af704849d9132a2410392c87a509c684f2062f6b76b498fb", + "blk.27.ffn_norm.weight": "32afe99675983da3de2961d1b5ca41c98970a356823597fe29e91f6e86abf0e8", + "blk.27.ffn_up.weight": "1eae3088a75629571fdbf6a20f141bc2bb2ed3f5ba2b9fd1d949f80695e442a1", + "blk.28.attn_k.weight": "c4e80af714962d6f9040d2c09f316f4a1cbc3a2e994e19902d7c653cf3c73dba", + "blk.28.attn_norm.weight": "c1ecf85dedc1c83d5d402bb7c94fb8b9c11f1a3e5f64e7680f80912d4a560794", + "blk.28.attn_output.weight": "72ba47c061b21f5ebc5213a455eaf6fc49c8f8e04ff9ce37e6ed4921b629161d", + "blk.28.attn_q.weight": "c4abc47234307f44b8ca789aa6668e298158fa4b459b2c1e84bd581806591cc1", + "blk.28.attn_v.weight": "aeba950799d4950e491ad0fcbe30334e39b8975177990a2cb339031c45ac153c", + "blk.28.ffn_down.weight": "4e84ce382a37b994fb8608df451a60040559e3f4f3241c3b3cb8989a3ed50d83", + "blk.28.ffn_gate.weight": "04df157acdc8e8534ad60acc2d2a4dd3a7a6610f6382535ec728994fa6f83f83", + "blk.28.ffn_norm.weight": "4d0386dae2bd1c1a9d0f9730718333e3a486c3bc6a5c5d482193c75d39832c80", + "blk.28.ffn_up.weight": "fec60bb0a3daf182a14bd8311fe6dd1e3fd020c5fc273e2549cdb1a2d6b79b05", + "blk.29.attn_k.weight": "b0532a263aa5a4e2a7a80adc83fc5dec974493bd18da7f953e7ebfc3f3a19aae", + "blk.29.attn_norm.weight": "593fc3b4000c35b7a59dace09ca1756c08be0105b2edd354a0e1c16c82898859", + "blk.29.attn_output.weight": "315b896f9f0cbacd0ca8937384c3a3a227efa908cb8c3a9125ec00c480e32b9b", + "blk.29.attn_q.weight": "d482d45386d4ad3394f08e9dff233ee3a70d0427d65c0b8fa05905da7e25ca53", + "blk.29.attn_v.weight": "cd3b5a6e2852da796902930a6a84bc87fc6a7c7bf51f8fc23758d12a39013b36", + "blk.29.ffn_down.weight": "5b3dba6f9753bd1b1ebcba65ef5373dd62c38e755c44b7231b95d93d45761f89", + "blk.29.ffn_gate.weight": "8610d9d2db15c256243ffcca3ffd31786d0ada0af0e7c7aa3fd20524370ab036", + "blk.29.ffn_norm.weight": "1a2ef2d38b7ac3e51190b9ccb8b6552ba83ab290e523356a7f851ddb35dedca2", + "blk.29.ffn_up.weight": "a5fdd15811bde16dc27677cf1a4c97daab4c28cb12a9530f1a0e573134fdb69c", + "blk.30.attn_k.weight": "1efeb0b5f4b45a85cdf47300f892ac77ac1f38000ec3653565d1303d1fb8c743", + "blk.30.attn_norm.weight": "c73934c182c7fe80838ec1d0b92f50a583f75f7a3d78d822f009b58ad2c80e65", + "blk.30.attn_output.weight": "3a0fd89de2d274614750345d827a9c886a4f97b343a13cdf680390505df596a3", + "blk.30.attn_q.weight": "711e113362bdb067db843c66236704eb1cd3fc5f40e3767143e96d510686ef4e", + "blk.30.attn_v.weight": "82b12a9a74fd3d91b73cc2e841e2b3f0a5197ccd2998afa17020995f880d2267", + "blk.30.ffn_down.weight": "af9f4b1287c0d824ae22d6e335d19e04a70135b835be7caa2435f1d85e931993", + "blk.30.ffn_gate.weight": "e2ab3e6f15f5c50fca66c084cb6a57a2b6b82406d65150e82ea0437b93dd9a46", + "blk.30.ffn_norm.weight": "c1b9c325c83f00e177386a4d7e769945f2995e60950c4a576c0a2c4ab9703d04", + "blk.30.ffn_up.weight": "9b94a21efd419715d82071b490d3b635cf1e8da080620dcc39e5bde976d7e9a6", + "blk.31.attn_k.weight": "0db0d82e3ddcc2c06209f5f013e1d72a84a996c40bf00186be485b909cc268e8", + "blk.31.attn_norm.weight": "2b8b7239471f57140c5cdfe06bd224a4f6326282f99736e44fba4c7b120ac101", + "blk.31.attn_output.weight": "a310b048840cc3ff2be4b84796340e8e2cdf05ec89d14bd3655c109b2bfa9fcd", + "blk.31.attn_q.weight": "f45e0cd95645175ea82813455356d171838539bc3f7676d877c698f2af0a0eda", + "blk.31.attn_v.weight": "8bde008e809112aa7e7c23e9c3099087bcc557313b01306c87efa0a4a30805ba", + "blk.31.ffn_down.weight": "8266fec7e203fbfad7033120861e44984581ff8b6851d01dfb7b81c5d8fa90ec", + "blk.31.ffn_gate.weight": "b73bc0aa5baf006d9ef6403104891b8133671b0992398fe038380b67e0d7e2cf", + "blk.31.ffn_norm.weight": "9c62cc27a7b6017c1df8ad49bff249a8245e8895c6754f402cd44623fda83268", + "blk.31.ffn_up.weight": "5b970a4694ea3171a0167f6e1636d9f00268bc1c9640430ffc35218494884adb", + "output.weight": "74fa0ef08c57a30e633e7117b1e9c805f833e2e5e21434bc79ddf9c92c6d7330", + "output_norm.weight": "59b8a59fd3fbf39353506116e43e5e76edd0cbf2a2873d869da4cf27a04997c3" +} diff --git a/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json b/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json new file mode 100644 index 00000000..0967ef42 --- /dev/null +++ b/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json @@ -0,0 +1 @@ +{} diff --git a/convert/testdata/gemma-2b-it.json b/convert/testdata/gemma-2b-it.json new file mode 100644 index 00000000..0482f1e1 --- /dev/null +++ b/convert/testdata/gemma-2b-it.json @@ -0,0 +1,188 @@ +{ + "general.architecture": "gemma", + "general.file_type": "1", + "general.quantization_version": "2", + "gemma.block_count": "18", + "gemma.context_length": "8192", + "gemma.embedding_length": "2048", + "gemma.feed_forward_length": "16384", + "gemma.attention.head_count": "8", + "gemma.attention.head_count_kv": "1", + "gemma.attention.key_length": "256", + "gemma.attention.value_length": "256", + "gemma.attention.layer_norm_rms_epsilon": "1e-06", + "tokenizer.ggml.model": "llama", + "tokenizer.ggml.add_bos_token": "true", + "tokenizer.ggml.add_eos_token": "false", + "tokenizer.ggml.bos_token_id": "2", + "tokenizer.ggml.eos_token_id": "1", + "tokenizer.ggml.padding_token_id": "0", + "tokenizer.ggml.unknown_token_id": "3", + "tokenizer.ggml.scores": "0872465d173867d755d3ee728f882b9dc2057a0bfd596fe1e3d131522f1250d8", + "tokenizer.ggml.token_type": "485e40bf3d715a4764818fc097d6a2a41db872d82ee714bc500872a3437ff48d", + "tokenizer.ggml.tokens": "c6e66de1841f04de8b8d236d461ab720a4c9b9b5414dc293a09c6e10eab45fda", + "token_embd.weight": "17b87ab2c01c80657855a5413d0457b4a041afaeda0cc785080e44e2f04acf07", + "blk.0.attn_k.weight": "28ac0da05754ad2714ae95da28a5ad191192140b30b8fd22d108d4700c9d989f", + "blk.0.attn_norm.weight": "3f9d5675d1ab0eb8a816719dac9fab81f2e95c52be02c34263339acbc087febb", + "blk.0.attn_output.weight": "703295c2c63990ff896778685c678f145298886f680f3ed5dc2a7ad54c293265", + "blk.0.attn_q.weight": "69c2d0e4870e9d722a190d356203c9605575a16863466c3d1747966ef1cf5791", + "blk.0.attn_v.weight": "95219c9c07b5ffe9a9a01e456d845eef2b11f4fc12c93dbbba479db395444c13", + "blk.0.ffn_down.weight": "a2feb5eb3d572c57c5bafbf0ab506862df1160fe40965dcfe4b9fd855c08bed7", + "blk.0.ffn_gate.weight": "fcca072c445c31f4dc4d5dfaa785b1bdf7271342442099b74fd17268b5829fbf", + "blk.0.ffn_norm.weight": "7621f95dbd245cade6fffd6b08797d69d8e3954e960f0b5551b90d967ab95448", + "blk.0.ffn_up.weight": "14a9bcdd451403c67136391e1b6e53b3b1830f00199bd911dbcc56d8749c14f4", + "blk.1.attn_k.weight": "c70f73c5df20579cb44d971164b48b5f0d8d5abdb38b381e7a8b880ba12aa406", + "blk.1.attn_norm.weight": "88b6b91f93a1ef83425a7c7dc2a2fbd3b22704a04c64a80061df376ac8c33626", + "blk.1.attn_output.weight": "f031a537490c452be3b3bb51e6b7949a636405756e160976a1c070a792ea00ee", + "blk.1.attn_q.weight": "bdb23214b1cf9cfd30f863a0a5868e52c6809d93b7e8f44df096a94204d9896a", + "blk.1.attn_v.weight": "e9bbc0b05f2c872fb1403f8f938cd1612b502229ee401f12593b1164c61acc00", + "blk.1.ffn_down.weight": "5ff53811038b661a7b8f2bfdf213bebfb185ec1a6060b662f063714f33584d79", + "blk.1.ffn_gate.weight": "205085c8c951a5c7543b1495183cd96028fb49f67464b3e9862a2693a6077a33", + "blk.1.ffn_norm.weight": "798f354fc85afce9625f5d10093a585a966831698a0560e6c9b97ce659eb4b22", + "blk.1.ffn_up.weight": "db92dc5684cb6e90940e13f4d1da555ed20ba4f8cab1e990ddfd7553e2e91315", + "blk.2.attn_k.weight": "ef5ce360c4eed6d00d03ca4761e0f8e4b0af4509978468314be14f3d46621044", + "blk.2.attn_norm.weight": "6dadbc05dbd0d3fabb4216affa60a3de1378a82d2859dc90b338cbe70f50d455", + "blk.2.attn_output.weight": "6bbf87a966f691bbfd7c8d25629aa4e6710107bd431a667434861febb391edc5", + "blk.2.attn_q.weight": "4e575c09ae2de417ce9057ce8b073680e860a24aae13a472b68f101b760752e5", + "blk.2.attn_v.weight": "cd33f7f01141e9439afdaf2ea1aaced9feaa335e32a58daa136ebd555d4d96f4", + "blk.2.ffn_down.weight": "b970ff1b0b6494165defe2fbfa1d31425766ed71e64de9ec4e66ac3955c8bc5f", + "blk.2.ffn_gate.weight": "dbb3e1360402e0e369b101995bb686b73f95d4a7673f061be85d64d15dfb0061", + "blk.2.ffn_norm.weight": "bfb7980105d8ac9647710454f57a5cdac50598a0f6f4884e16f1d94b00844687", + "blk.2.ffn_up.weight": "50ef89339b275a438b664686f6227dd9b6e43853ed6856ec9e33ef4bbd90bda1", + "blk.3.attn_k.weight": "be942ea98151434eebcd2c1da4b00e0146152fe524a530689b1fd491cb833d21", + "blk.3.attn_norm.weight": "0df2f218daf609c289fb7c60c5f375fa99c0d4e04381ad5a494a19144edd8e20", + "blk.3.attn_output.weight": "c2184aaf86aa2cb8f47be49f60b165834e97205f39c6ee1dfd19fd4411a156ce", + "blk.3.attn_q.weight": "4f86e2a0a4221c1c84ff9c409ac89893cb95d7208cf65bf1e98e24e01125f991", + "blk.3.attn_v.weight": "abfdb8a60c349dadde641d1afc9542025e24fbf41a3238bfa9675e0b1f1e4b68", + "blk.3.ffn_down.weight": "58821a8d87008d47d122427911c6fad5272aca70c448bbae223256a74bacd07e", + "blk.3.ffn_gate.weight": "776e051f1a0ddd5c4934e69186683a75ca9a3c8c0f61911bba321fed1dd287d2", + "blk.3.ffn_norm.weight": "7f380f29335e28be90bfcfae6f6d69fdf5751211b36d2dd62aa5541ed113e4f2", + "blk.3.ffn_up.weight": "fc5ae8d488894cbd4951059675468d227da27871d26e925c9941863841c097ee", + "blk.4.attn_k.weight": "14833b078cc4c5137bdd5fdc0538047974ca147a99b0282e1b144440c78bc1db", + "blk.4.attn_norm.weight": "0a69957d4a15599fb80ad4753558020804925221457d9a5052926754d3768065", + "blk.4.attn_output.weight": "887a49b6130fb6297cf10767207c3dd97191b2cf63723449af9c27bca8dbeda0", + "blk.4.attn_q.weight": "51fd577b76764824dd6f0d4891c137ebe4736f591b5ca2793c5fff2be49abbde", + "blk.4.attn_v.weight": "1a623c43cf9c509d1b7ea0d1a5c04d0af4809665f9f9e93b7d6dba8c5df178fa", + "blk.4.ffn_down.weight": "5d61e8856d8941d2b1fd138116d015f63840d0fa1e31e20e20a5ceca1536ceec", + "blk.4.ffn_gate.weight": "06640f7273764f8ca5df7e386547417916b6cd7d565a8343153113239a94b0a1", + "blk.4.ffn_norm.weight": "91a6c6c41b894228e361435ecbc5058dca34d4911a23da5b56de219299c964d3", + "blk.4.ffn_up.weight": "d016dac1055e36d6a10b6317e57f98a904709ea892ef3194342f4d2f6326561e", + "blk.5.attn_k.weight": "987146afe124131500808cc0da33c06d207433656d41df6e6d8c99118a83bac5", + "blk.5.attn_norm.weight": "6b354938966f2608a2fb8d0f5b363ed0d8b0967c2ec8d0abd5c625b413042ded", + "blk.5.attn_output.weight": "cdcbfe02c6ff79d5326882b017a02099f5af71beedf6b1b3eb4de01e3a844536", + "blk.5.attn_q.weight": "b910d0cff781d3efb42eab0a302f46f286b2de717079175680d5b42bf8c309c8", + "blk.5.attn_v.weight": "66d3a279f747412f9f4b0e8abad44540c122ab2e811a7ee74c1f33bc36caade9", + "blk.5.ffn_down.weight": "c9b0efd2212981f16d956d8571f054b68780ad01f4917033647e359b557a4653", + "blk.5.ffn_gate.weight": "fe96b94109ca141c01f6a04788e20783019ca6ec334aa1f3134810bdb499e557", + "blk.5.ffn_norm.weight": "aa7b016e832e7055a36c6e20de58ea1936f995f390401fff1c5fc65906064e49", + "blk.5.ffn_up.weight": "555ce27c4873d3375394f38ad3b45e3d8848f9d5642dc1602383d0f0a33c2a14", + "blk.6.attn_k.weight": "88280d461db324c4f36475ce396793063e61a27283ec64511b0480890fb5b3b4", + "blk.6.attn_norm.weight": "af8f460c411f660d33196286d208f1845fd5a2b45f7b56549a4df31e7515447a", + "blk.6.attn_output.weight": "dd9996fb0a256e8375ad3917705258a33fce006bcea0f536caae420a77974d8b", + "blk.6.attn_q.weight": "7a4841541191e037cfb9b07930c4d8cab451809658b182f0ada6ccde9615c003", + "blk.6.attn_v.weight": "ae81e6a592b64d701a9d40233e986039a56cba8d8d24f61aea93c6393cf3078a", + "blk.6.ffn_down.weight": "622dd1ce1706355cbc659a8ab2c4509678ffe0f3ad34258e5e25ed2a5d951bcd", + "blk.6.ffn_gate.weight": "8389a735c0bd5591010f8ced9805a2a12c749f6df0d3c18ad4d05c2a302e7168", + "blk.6.ffn_norm.weight": "621f5346400382474d61358397bd58fb1459b07c53e376e4bca15e08b3f9b3fb", + "blk.6.ffn_up.weight": "8d834e4c42f13c251dfee36cf89e12f1bd400680d00d5c2e6cac0459e9ce2f7f", + "blk.7.attn_k.weight": "8bd0412de65a3e64901ef8fe6a28c95e116bf39dc9aa22f0126b9d36688e5ea7", + "blk.7.attn_norm.weight": "056d8e56be4e87d6dc6f900762f0dc6fde07bfdc50dd85bfc510415e2bba3f3d", + "blk.7.attn_output.weight": "27972eda51da53d416ff95aed78149a2c5a287b47d2cd46f2f544ca692ecb3bb", + "blk.7.attn_q.weight": "41eca977b9371f7932800c11a9c45b931310196919e2a0651b847703b180fc7f", + "blk.7.attn_v.weight": "13c74fd7e07f08883a09fb070a1fe5bbdd2341b4cb8d1cac07c4b637049b5774", + "blk.7.ffn_down.weight": "9e75db42468800849a9a7da603d0072c5e86c8ed2b4d8b20a312a51fb86a7a10", + "blk.7.ffn_gate.weight": "db6bdc3117f910088aaf7db51f2da63ea5bd933de36af5599c215bfb26f7db2b", + "blk.7.ffn_norm.weight": "48bb82b49bfc8679a1e77f282ee182d952db7a3c11be7ef9a102ee2ddd8011e2", + "blk.7.ffn_up.weight": "feebea87175817a0f3585ec0af09dc873d94c203581ae97a712eb356d3b49efe", + "blk.8.attn_k.weight": "d5640ad71b6af68d88e17bf8e7fc26c907d2262605457a84247dd9afc2884d69", + "blk.8.attn_norm.weight": "75b850c481a69083ae09d0207ba7317b37c735a39fcf5fef5400e6c84fb1257f", + "blk.8.attn_output.weight": "cbd669dbdea2bdd90f9f0cc97566b3dffff3c56cecb4f47290ceef30da83b2d6", + "blk.8.attn_q.weight": "9edcb63087a431bac361822497e6ecdaa06d9ea4a1a754e36da7ba9f8db81c7c", + "blk.8.attn_v.weight": "3fb72c2c4f95a83626aa3e30062f9450b09ab37c7871e229f18bbc5cf744633c", + "blk.8.ffn_down.weight": "bd69d2c9172974fff154441b237b4787fb53b2d185325442d5048130ef5bc4ef", + "blk.8.ffn_gate.weight": "d04689c80553edd011d1cbaa5d570fffa7fa91e88b66cf1352d89ab60b72f908", + "blk.8.ffn_norm.weight": "e49984183b735b7f2c4e4730c289eed9394056d2e283a00fd83ea0915df31a73", + "blk.8.ffn_up.weight": "8fe62a1ce8e847e567add6c6f6bf2922bc467495b5eb4c116b3cb85b85b3b211", + "blk.9.attn_k.weight": "d90904959e5004cf0d6e729c6bff18cc33c094798b802473c1ec55ab8d276183", + "blk.9.attn_norm.weight": "79277f290cc07411115d8fa138045edf4a17b3416ab2145409cbe8ab829fd4ee", + "blk.9.attn_output.weight": "5a21bf2e1f09a81405025f96d4153ffb630158e17269cff8ffff935c38ceb1a7", + "blk.9.attn_q.weight": "51b1d0febc3b350945be4504f55afa4347517bde0f710e1a4b88e6b17e71e7c7", + "blk.9.attn_v.weight": "aab7e1db0a8b50a03036356791ffce736ab010d15674c96eaef8049d80076054", + "blk.9.ffn_down.weight": "cbf43ec84becb40c9359a181ab0e641fd7faae7d34b549501f7cfb7afdc3d764", + "blk.9.ffn_gate.weight": "dce0e8661c778327bed7f03b6790d26710764188aed9dc746e6e05863891fa57", + "blk.9.ffn_norm.weight": "6d41642104f995c77bf31122b13237caebda3e7fcccb1367ce91db36b015e923", + "blk.9.ffn_up.weight": "82fe4c67bf24e7b2d6f6e05f7b1234c2bf90c3932951091a9066211b8e15ecbb", + "blk.10.attn_k.weight": "f6a9ed8fd8d3229b5d03175c413ffc56a07f2ce7236271986361dd3d8993f9aa", + "blk.10.attn_norm.weight": "cebbef89f0326ca8e02df3867a571e4d61c20c2a12f295f98ae590d62bc86010", + "blk.10.attn_output.weight": "34f5efb86accb4f06347d83a32558ea8eab3039d128969161a741ebacbb656ff", + "blk.10.attn_q.weight": "1e0efe27df2d5d50f7157253ba2cfd436d6781c3dc78ca176d0c16a210b5b763", + "blk.10.attn_v.weight": "8f085bf50a2b0f83cd6cdda3c8ef5a9e204a36348ed95871aac725d1f68640cf", + "blk.10.ffn_down.weight": "bf3b3cb4cace435809ac7b4cc933f20853af12f1f272d3dcefe7f19c0f203b8b", + "blk.10.ffn_gate.weight": "d3df7a1413b1c5adf1a1dcda9e5225a15c89874bae53bb6137ad1ea42fca2d34", + "blk.10.ffn_norm.weight": "a1da603b0480471b5ed8e862148cecd5fed918f8304d6933ab0bdb25b8d2fb8f", + "blk.10.ffn_up.weight": "bffbba605922e972dc47dda88a0b4659aa52236c76e5fe861a949e6d9a367492", + "blk.11.attn_k.weight": "9f31c63d66cd32c29b1eb8bb829d0c8525ce2ae936e0eefdaab6335a2d12a3df", + "blk.11.attn_norm.weight": "0bde1a266d8b2e8f202bb7e2e88b19147ca83021901f6d3cae77a4df5548c754", + "blk.11.attn_output.weight": "e10725c7cf746ed4a7e472cf7aea6cb564e5db6a1d5197adc980d650a387ccea", + "blk.11.attn_q.weight": "05ee758a7d065802630f8c65dca424364c1c8825e389aa33f9405c45e8a50cce", + "blk.11.attn_v.weight": "0c3ae7090f11775d24c51120db6e305db6aff706493e7ee123dcab74485ba789", + "blk.11.ffn_down.weight": "7ba40b8e12c09c5fb2006b77a771cb01ce894e88a3b3e1877f927a5b89c91709", + "blk.11.ffn_gate.weight": "db76388a023b98097972d354ba1c6a5e26efdeb1c596b9c28bf2cd8f6596975e", + "blk.11.ffn_norm.weight": "a38c3ae1b89a68ddc7b72c99c5b28be7fe3787c4fad9904d0c43d64eaf00c474", + "blk.11.ffn_up.weight": "13c8142f9cf1eddc658babf978daf3515c4ccc45f849f3e7e3930aa18a8480a0", + "blk.12.attn_k.weight": "f03241c36ac87cb57429a2ef22186b8d7d0b590a8b173beb01fa13d93772f3b1", + "blk.12.attn_norm.weight": "4568f654e6d65104d586e7c16ba960c83428698ce103022b7e0be15e2884e13b", + "blk.12.attn_output.weight": "04867603f82f91e41306e09b33ecda0104b3ee4834061f2c0bbdc8da33c72509", + "blk.12.attn_q.weight": "70fe04b9a8e08b6100cc8d6b58bf4cbbad15ca1de82d63baca5d352ba6c4cbae", + "blk.12.attn_v.weight": "15cb28db61a86c98687991d7e611bc92a1fcc6007f3432149cfb5fe518a4f65e", + "blk.12.ffn_down.weight": "6d10c790a4e3dc44c2dc36d96251ae97cdf30a4fa04d4c43e31bfbd038e6a7b7", + "blk.12.ffn_gate.weight": "3462a2d8f6b4743b25e24da51b90018ac2858d05ac7e582bcb69063cfdac1104", + "blk.12.ffn_norm.weight": "1f96392c1faa34e34ae5dea55a6a86c5aa4c79758952075d53d28de89dd88456", + "blk.12.ffn_up.weight": "d22eacc612a7411953d948483c5fb201e11722955ee0754da866e7bec578ac6d", + "blk.13.attn_k.weight": "5864977e6b733ea942647d6feed5c76156c48c200649c22e4e11b9e5860e57f3", + "blk.13.attn_norm.weight": "87e053535144723db4145aa5402acc54331b7696752d852bb9fc542ff33f0fb5", + "blk.13.attn_output.weight": "078145f5ad83f8b14f97a869346f7fd1583b24d1e3edadaa95d3da4242973f8f", + "blk.13.attn_q.weight": "3b8caf35504cbc4d1a7dd6e011a95760703b7f71e2218b030b1254f811362dd7", + "blk.13.attn_v.weight": "4fdf8365a603e043e5b40c4a21c84ac167f9be62794178f9d8a608dfe5653bf9", + "blk.13.ffn_down.weight": "a07d3abbfcacf48ba028df2cab895be32cc15022d23389a745286e79c1b1d1fd", + "blk.13.ffn_gate.weight": "1d2ab39666aa2909acc96787432a3ed13b19d25170f74665fadff9b17bbaffb1", + "blk.13.ffn_norm.weight": "4f2e809fda5f3eadf52578ee50e0ba36e53be91e55dce418c12dfe595f5f18e7", + "blk.13.ffn_up.weight": "8783d2720c2c37ca176a5801e0b3ef1f9cc9cf3ef1cd37af423aaf6b2a27e2bd", + "blk.14.attn_k.weight": "ce9428e2b55d43ae0c6690dbd56182f99adc427694ba8236b405cc8ea5035e86", + "blk.14.attn_norm.weight": "6abb35f9db8251d6ae954bda147c6ada2371b0574d11702e828f3c6ac99b7cc0", + "blk.14.attn_output.weight": "fe3880916d0ceb5bff672c88bbefb7060a545be609bf049beb2024b38221836d", + "blk.14.attn_q.weight": "7c8ad81be6f4a350931fd108b5f7c9e366e8c26ef62d1d85ffef5dca8fd893f8", + "blk.14.attn_v.weight": "e4bdedffacbebe38567a0734dfd67db90e911d9a9669fcde9a7c4ad8a0066c52", + "blk.14.ffn_down.weight": "ef6694dff1e05820aac0cd2b22f39ac7788b4967afc9250775575554c66aab2c", + "blk.14.ffn_gate.weight": "db63c4179e2db704bc505e2b4696e055b593e295a1b7c4c586fc793bdd5aab19", + "blk.14.ffn_norm.weight": "2796a62d832a9710148f95d533320492a33e712b2e5218659c548705bd11684d", + "blk.14.ffn_up.weight": "3f78c78d8c2d54df45f799d4ff902316628af296834afe4ceed63d4a324ff03e", + "blk.15.attn_k.weight": "6e810ee3859e07695645ee0c9a5efc7962668984a5f0a9325f47e462743b447c", + "blk.15.attn_norm.weight": "0956b576ae96db0b28cb09f761f801cfd9281432284664f0fe181c8d9c55d1ec", + "blk.15.attn_output.weight": "03a17f7e94208177aace5cc41b7f54670ba57873b7274ff6e23caf58cce110ca", + "blk.15.attn_q.weight": "b8edafe7d2216a6f8b4ae4905a906475490e6ea418f6e1d3cec563dbdc6fab91", + "blk.15.attn_v.weight": "f8ae8cae0f4cfa34a459824eba57350c3c248104ba5607e7d9dc7d7c39aaf4a6", + "blk.15.ffn_down.weight": "8d02eb439da852246d2ca67e9b7b6de0b090b80744355e64728a23e41926505b", + "blk.15.ffn_gate.weight": "ed5bf361c67db8731f186b775826f21c33bdb521111fd2d922539719a770239f", + "blk.15.ffn_norm.weight": "5942ca3c73209ac9a0c8bfd9b4aab7f7be7aee9aa12d9c35833493b44af76767", + "blk.15.ffn_up.weight": "f4bebf4ad99ec5f911327dec347be6c595814885309c7bc5647ce28c7f4d1cf5", + "blk.16.attn_k.weight": "756a534c19364448e0958b8948fe33891c6ccda0fbb4dfa2024e1f532a87804b", + "blk.16.attn_norm.weight": "386b7b9e4e6509f6af9c022d942b6c6c6cc136aeed8751ecb037c74d7c4bfb93", + "blk.16.attn_output.weight": "3ba1a766a25830b84d7c22178203635f9c5624caad290bc5e5d73da5d5e7a2ec", + "blk.16.attn_q.weight": "d39b0c91e1fda7685d50a0f7cc8d18c44b5bdc90a142c7fda0bc329cca1afa74", + "blk.16.attn_v.weight": "98b33fcb0ee3483cff1b06ecb44d7b7ffb4d34c268248e4d73dfdf82b2065b2f", + "blk.16.ffn_down.weight": "14006f5e4acb2f9416271ae562e299359cd2585739c7fc77ccbca54495563948", + "blk.16.ffn_gate.weight": "12f8abae2d301d8f88bedb6af98b1daecc7b0b8d05148594f931f30958d77aca", + "blk.16.ffn_norm.weight": "129a15a046ee96d06de288bd43c80f77a6b0fb3a159c7367154c6e4aaf362672", + "blk.16.ffn_up.weight": "b4a5911a45f3871ef1d4efb7dc7108645a564b70f818eccf45beebef2e844ee9", + "blk.17.attn_k.weight": "5e1bfcff0146ebdde3817b656952892eb671e14e75afc92fa53f84f8eecbec4c", + "blk.17.attn_norm.weight": "60bc988fab7c4b29ee9de599df41a8de00caa94fcd74677da011fac82f60f465", + "blk.17.attn_output.weight": "ba49b40d6a0b5685f749c24b0edbed3adc44dbe13b5d5e5fa1e56169fc746555", + "blk.17.attn_q.weight": "82bb415d24efcd14d03ace03f907bb70db6a204c76a0bdd1892e0fba165db87d", + "blk.17.attn_v.weight": "73dbe54beb91a899884e275ea81ffc5187a20cb7d5b68d5c299b783096999d94", + "blk.17.ffn_down.weight": "7c086166241e0664f8963fd1ca4ed74c737abfb2525ec20f8435821ff50158f3", + "blk.17.ffn_gate.weight": "51a32f78244d42a539f619c5ce661db9e6cf41636280a826d439b5444edcd28c", + "blk.17.ffn_norm.weight": "c4bb247fccd1ecc84875028af63dd20aaf5cbd17eb94a9bc36679c09285dccab", + "blk.17.ffn_up.weight": "b5886182790bc6fbadd63de9bc4ffee416f3b69a66280d197ab8c18edf769abf", + "output_norm.weight": "481f3097d0a20412e35b3a739b1b958487bcd41ff67744baa3c9acbddd2ee4d4" +} diff --git a/llm/ggla.go b/llm/ggla.go index 34c4f6ca..831f6071 100644 --- a/llm/ggla.go +++ b/llm/ggla.go @@ -36,6 +36,8 @@ type ggla struct { kv KV tensors []*Tensor + + tensorOffset uint64 } func newGGLA(container *containerGGLA) *ggla { @@ -50,7 +52,10 @@ func (llm *ggla) KV() KV { } func (llm *ggla) Tensors() Tensors { - return llm.tensors + return Tensors{ + Items: llm.tensors, + Offset: llm.tensorOffset, + } } func (llm *ggla) decode(rs io.ReadSeeker) (retErr error) { @@ -66,6 +71,13 @@ func (llm *ggla) decode(rs io.ReadSeeker) (retErr error) { } llm.kv["alpha"] = alpha + offset, err := rs.Seek(0, io.SeekCurrent) + if err != nil { + return err + } + + llm.tensorOffset = uint64(offset) + for { var dims uint32 if err := binary.Read(rs, binary.LittleEndian, &dims); err != nil { diff --git a/llm/ggml.go b/llm/ggml.go index fddb5039..d7f2eef7 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -112,11 +112,14 @@ func (kv KV) ChatTemplate() string { return s } -type Tensors []*Tensor +type Tensors struct { + Items []*Tensor + Offset uint64 +} func (ts Tensors) Layers() map[string]Layer { layers := make(map[string]Layer) - for _, t := range ts { + for _, t := range ts.Items { parts := strings.Split(t.Name, ".") if parts[0] == "blk" { // join first and second part, e.g. blk.%d diff --git a/llm/gguf.go b/llm/gguf.go index a8427aed..aadfc4ba 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -89,6 +89,7 @@ type gguf struct { tensors []*Tensor parameters uint64 + tensorOffset uint64 scratch [16 << 10]byte } @@ -109,7 +110,10 @@ func (llm *gguf) KV() KV { } func (llm *gguf) Tensors() Tensors { - return llm.tensors + return Tensors{ + Items: llm.tensors, + Offset: llm.tensorOffset, + } } func (llm *gguf) numTensor() uint64 { @@ -236,6 +240,14 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error { alignment = 32 } + offset, err := rs.Seek(0, io.SeekCurrent) + if err != nil { + return err + } + + padding := llm.padding(offset, int64(alignment)) + llm.tensorOffset = uint64(offset + padding) + for _, tensor := range llm.tensors { offset, err := rs.Seek(0, io.SeekCurrent) if err != nil { From 5e9db9fb0bcefbe599734b02dd030f4a347ce576 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Fri, 31 May 2024 20:00:49 -0700 Subject: [PATCH 2/6] refactor convert --- convert/convert.go | 243 +++++------- convert/convert_gemma.go | 103 ++++++ convert/convert_llama.go | 182 +++++++++ convert/convert_mixtral.go | 89 +++++ convert/convert_test.go | 25 +- convert/gemma.go | 102 ----- convert/llama.go | 159 -------- convert/mistral.go | 84 ----- convert/mixtral.go | 87 ----- convert/reader.go | 74 ++++ convert/reader_safetensors.go | 140 +++++++ convert/reader_torch.go | 46 +++ convert/safetensors.go | 309 ---------------- .../testdata/Mistral-7B-Instruct-v0.2.json | 2 +- .../testdata/Mixtral-8x7B-Instruct-v0.1.json | 349 +++++++++++++++++- convert/tokenizer.go | 265 ++++++++++--- convert/tokenizer_spm.go | 83 +++++ convert/torch.go | 287 -------------- llm/gguf.go | 326 +++++++--------- llm/memory_test.go | 6 +- server/model.go | 26 +- server/routes_create_test.go | 5 +- server/routes_generate_test.go | 8 +- server/sched_test.go | 8 +- 24 files changed, 1514 insertions(+), 1494 deletions(-) create mode 100644 convert/convert_gemma.go create mode 100644 convert/convert_llama.go create mode 100644 convert/convert_mixtral.go delete mode 100644 convert/gemma.go delete mode 100644 convert/llama.go delete mode 100644 convert/mistral.go delete mode 100644 convert/mixtral.go create mode 100644 convert/reader.go create mode 100644 convert/reader_safetensors.go create mode 100644 convert/reader_torch.go delete mode 100644 convert/safetensors.go create mode 100644 convert/tokenizer_spm.go delete mode 100644 convert/torch.go diff --git a/convert/convert.go b/convert/convert.go index 103de457..4ad64d72 100644 --- a/convert/convert.go +++ b/convert/convert.go @@ -1,200 +1,123 @@ package convert import ( - "cmp" - "encoding/binary" "encoding/json" + "errors" "fmt" "io" "log/slog" "os" "path/filepath" - "slices" - "strings" - "google.golang.org/protobuf/proto" - - "github.com/ollama/ollama/convert/sentencepiece" "github.com/ollama/ollama/llm" ) -const ( - _ int32 = iota - tokenTypeNormal - tokenTypeUnknown - tokenTypeControl - tokenTypeUserDefined - tokenTypeUnused - tokenTypeByte -) - -type Params struct { - Architectures []string `json:"architectures"` - VocabSize int `json:"vocab_size"` - HiddenSize int `json:"hidden_size"` // n_embd - HiddenLayers int `json:"num_hidden_layers"` // n_layer - ContextSize int `json:"max_position_embeddings"` - IntermediateSize int `json:"intermediate_size"` - AttentionHeads int `json:"num_attention_heads"` // n_head - KeyValHeads int `json:"num_key_value_heads"` - NormEPS float64 `json:"rms_norm_eps"` - BoSTokenID int `json:"bos_token_id"` - EoSTokenID int `json:"eos_token_id"` - HeadDimension int `json:"head_dim"` - PaddingTokenID int `json:"pad_token_id"` - RopeFrequencyBase float64 `json:"rope_theta"` - - Experts int `json:"num_local_experts"` - ExpertsUsed int `json:"num_experts_per_tok"` - - PreTokenizer string - - ByteOrder +type Parameters struct { + Architectures []string `json:"architectures"` + VocabSize uint32 `json:"vocab_size"` } -type ByteOrder interface { - binary.ByteOrder - binary.AppendByteOrder +func (Parameters) KV(t *Tokenizer) llm.KV { + kv := llm.KV{ + "general.file_type": uint32(1), + "general.quantization_version": uint32(2), + "tokenizer.ggml.pre": t.Pre, + "tokenizer.ggml.model": t.Vocabulary.Model, + "tokenizer.ggml.tokens": t.Vocabulary.Tokens, + "tokenizer.ggml.scores": t.Vocabulary.Scores, + "tokenizer.ggml.token_type": t.Vocabulary.Types, + } + + if t.Template != "" { + kv["tokenizer.chat_template"] = t.Template + } + + for _, sv := range t.SpecialVocabulary { + kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID) + kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken + } + + return kv } -type ModelArch interface { - GetTensors() error - LoadVocab() error - WriteGGUF(io.WriteSeeker) error +func (Parameters) specialTypes() []string { + return []string{ + "bos", "eos", "unk", "sep", "pad", "cls", "mask", + } } -type ModelFormat interface { - GetLayerName(string) (string, error) - GetTensors(string, *Params) ([]llm.Tensor, error) - GetParams(string) (*Params, error) - GetModelArch(string, string, *Params) (ModelArch, error) +func (Parameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []*llm.Tensor) error { + return llm.WriteGGUF(ws, kv, ts) } -type ModelData struct { - Path string - Name string - Params *Params - Vocab *Vocab - Tensors []llm.Tensor - Format ModelFormat +type Converter interface { + // KV maps parameters to LLM key-values + KV(*Tokenizer) llm.KV + // Tensors maps input tensors to LLM tensors. Model specific modifications can be done here. + Tensors([]Tensor) []*llm.Tensor + + // tensorName returns the LLM tensor name for a specific input name + tensorName(string) string + // specialTypes returns any special token types the model uses + specialTypes() []string + writeFile(io.WriteSeeker, llm.KV, []*llm.Tensor) error } -func GetModelFormat(dirname string) (ModelFormat, error) { - files, err := filepath.Glob(filepath.Join(dirname, "*")) +func Convert(d string, ws io.WriteSeeker) error { + f, err := os.Open(filepath.Join(d, "config.json")) if err != nil { - return nil, err + return err + } + defer f.Close() + + var p Parameters + if err := json.NewDecoder(f).Decode(&p); err != nil { + return err } - for _, fn := range files { - if strings.HasSuffix(fn, ".safetensors") { - return &SafetensorFormat{}, nil - } else if strings.HasSuffix(fn, ".bin") || strings.HasSuffix(fn, ".pth") { - slog.Debug("model is torch") - return &TorchFormat{}, nil - } + if len(p.Architectures) < 1 { + return errors.New("unknown architecture") } - return nil, fmt.Errorf("couldn't determine model format") -} + var c Converter + switch p.Architectures[0] { + case "LlamaForCausalLM", "MistralForCausalLM": + c = &llama{} + case "MixtralForCausalLM": + c = &mixtral{} + case "GemmaForCausalLM": + c = &gemma{} + default: + return errors.New("unsupported architecture") + } -// Details on gguf's tokenizer can be found at: -// https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#tokenizer -type Vocab struct { - Tokens []string - Scores []float32 - Types []int32 - Merges []string -} - -func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) { - slog.Info(fmt.Sprintf("reading vocab from %s", filepath.Join(dirpath, "tokenizer.model"))) - in, err := os.ReadFile(filepath.Join(dirpath, "tokenizer.model")) + bts, err := os.ReadFile(filepath.Join(d, "config.json")) if err != nil { - return nil, err + return err } - // To regenerate sentencepiece from the protobufs use: - // protoc -I=./ --go_out=./ sentencepiece_model.proto - modelProto := &sentencepiece.ModelProto{} - if err := proto.Unmarshal(in, modelProto); err != nil { - return nil, err + if err := json.Unmarshal(bts, c); err != nil { + return err } - v := &Vocab{ - Tokens: make([]string, 0), - Scores: make([]float32, 0), - Types: make([]int32, 0), + t, err := parseTokenizer(d, c.specialTypes()) + if err != nil { + return err } - pieces := modelProto.GetPieces() - for _, p := range pieces { - v.Tokens = append(v.Tokens, p.GetPiece()) - v.Scores = append(v.Scores, p.GetScore()) - t := p.GetType() - switch t { - case sentencepiece.ModelProto_SentencePiece_UNKNOWN: - case sentencepiece.ModelProto_SentencePiece_CONTROL: - case sentencepiece.ModelProto_SentencePiece_UNUSED: - case sentencepiece.ModelProto_SentencePiece_BYTE: - default: - t = sentencepiece.ModelProto_SentencePiece_NORMAL - } - v.Types = append(v.Types, int32(t)) - } - - slog.Info(fmt.Sprintf("vocab size: %d", len(v.Tokens))) - - // add any additional tokens - addIn, err := os.ReadFile(filepath.Join(dirpath, "added_tokens.json")) - if os.IsNotExist(err) { - return v, nil - } else if err != nil { - return nil, err - } - - slog.Info("reading user defined tokens") - - var extraTokenData map[string]int - if err := json.Unmarshal(addIn, &extraTokenData); err != nil { - return nil, err - } - - type token struct { - key string - pos int - } - - extraTokens := make([]token, 0) - for k, id := range extraTokenData { - extraTokens = append(extraTokens, token{k, id}) - } - - slices.SortFunc(extraTokens, func(a, b token) int { - return cmp.Compare(a.pos, b.pos) - }) - - numToks := len(v.Tokens) - - for cnt, t := range extraTokens { - // the token id should match the specific index for the total number of tokens - if t.pos != cnt+numToks { - return nil, fmt.Errorf("token ID '%d' for '%s' doesn't match total token size", t.pos, t.key) - } - v.Tokens = append(v.Tokens, t.key) - v.Scores = append(v.Scores, -1000.0) - v.Types = append(v.Types, tokenTypeUserDefined) - } - slog.Info(fmt.Sprintf("vocab size w/ extra tokens: %d", len(v.Tokens))) - - if params.VocabSize > len(v.Tokens) { - missingTokens := params.VocabSize - len(v.Tokens) - slog.Warn(fmt.Sprintf("vocab is missing %d tokens", missingTokens)) - for cnt := range missingTokens { - v.Tokens = append(v.Tokens, fmt.Sprintf("", cnt+1)) - v.Scores = append(v.Scores, -1) - v.Types = append(v.Types, tokenTypeUserDefined) + if vocabSize := int(p.VocabSize); vocabSize > len(t.Vocabulary.Tokens) { + slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", p.VocabSize, "actual", len(t.Vocabulary.Tokens)) + for i := range vocabSize - len(t.Vocabulary.Tokens) { + t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i)) + t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1) + t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined) } } - return v, nil + ts, err := parseTensors(d) + if err != nil { + return err + } + + return c.writeFile(ws, c.KV(t), c.Tensors(ts)) } diff --git a/convert/convert_gemma.go b/convert/convert_gemma.go new file mode 100644 index 00000000..332fee7f --- /dev/null +++ b/convert/convert_gemma.go @@ -0,0 +1,103 @@ +package convert + +import ( + "strings" + + "github.com/pdevine/tensor" + "github.com/pdevine/tensor/native" + + "github.com/ollama/ollama/llm" +) + +type gemma struct { + Parameters + MaxPositionEmbeddings uint32 `json:"max_position_embeddings"` + HiddenSize uint32 `json:"hidden_size"` + HiddenLayers uint32 `json:"num_hidden_layers"` + IntermediateSize uint32 `json:"intermediate_size"` + NumAttentionHeads uint32 `json:"num_attention_heads"` + NumKeyValueHeads uint32 `json:"num_key_value_heads"` + RMSNormEPS float32 `json:"rms_norm_eps"` + HeadDim uint32 `json:"head_dim"` +} + +var _ Converter = (*gemma)(nil) + +func (p *gemma) KV(t *Tokenizer) llm.KV { + kv := p.Parameters.KV(t) + kv["general.architecture"] = "gemma" + kv["general.name"] = "gemma" + kv["gemma.context_length"] = p.MaxPositionEmbeddings + kv["gemma.embedding_length"] = p.HiddenSize + kv["gemma.block_count"] = p.HiddenLayers + kv["gemma.feed_forward_length"] = p.IntermediateSize + kv["gemma.attention.head_count"] = p.NumAttentionHeads + kv["gemma.attention.head_count_kv"] = p.NumKeyValueHeads + kv["gemma.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS + kv["gemma.attention.key_length"] = p.HeadDim + kv["gemma.attention.value_length"] = p.HeadDim + kv["tokenizer.ggml.eot_token_id"] = uint32(107) + kv["tokenizer.ggml.middle_token_id"] = uint32(68) + kv["tokenizer.ggml.prefix_token_id"] = uint32(67) + kv["tokenizer.ggml.suffix_token_id"] = uint32(69) + return kv +} + +func (p *gemma) Tensors(ts []Tensor) []*llm.Tensor { + var out []*llm.Tensor + for _, t := range ts { + name := p.tensorName(t.Name()) + if strings.HasSuffix(name, "_norm.weight") { + t.SetRepacker(p.addOne) + } + + out = append(out, &llm.Tensor{ + Name: name, + Kind: t.Kind(), + Shape: t.Shape(), + WriterTo: t, + }) + } + + return out +} + +func (p *gemma) tensorName(n string) string { + return strings.NewReplacer( + "model.embed_tokens", "token_embd", + "model.norm", "output_norm", + "model.layers", "blk", + "input_layernorm", "attn_norm", + "self_attn.q_proj", "attn_q", + "self_attn.k_proj", "attn_k", + "self_attn.v_proj", "attn_v", + "self_attn.o_proj", "attn_output", + "mlp.gate_proj", "ffn_gate", + "mlp.down_proj", "ffn_down", + "mlp.up_proj", "ffn_up", + "post_attention_layernorm", "ffn_norm", + "block_sparse_moe.gate", "ffn_inp", + ).Replace(n) +} + +func (*gemma) addOne(_ string, data []float32, shape []uint64) ([]float32, error) { + n := tensor.New(tensor.WithShape(int(shape[0])), tensor.WithBacking(data)) + ones := tensor.Ones(tensor.Float32, int(shape[0])) + + n, err := n.Add(ones) + if err != nil { + return nil, err + } + + ts, err := native.SelectF32(n, 0) + if err != nil { + return nil, err + } + + var f32s []float32 + for _, t := range ts { + f32s = append(f32s, t...) + } + + return f32s, nil +} diff --git a/convert/convert_llama.go b/convert/convert_llama.go new file mode 100644 index 00000000..700049d3 --- /dev/null +++ b/convert/convert_llama.go @@ -0,0 +1,182 @@ +package convert + +import ( + "cmp" + "fmt" + "strings" + + "github.com/ollama/ollama/llm" + "github.com/pdevine/tensor" + "github.com/pdevine/tensor/native" +) + +type llama struct { + Parameters + NLayers uint32 `json:"n_layers"` + NumHiddenLayers uint32 `json:"num_hidden_layers"` + NLayer uint32 `json:"n_layer"` + MaxPositionEmbeddings uint32 `json:"max_position_embeddings"` + NCtx uint32 `json:"n_ctx"` + HiddenSize uint32 `json:"hidden_size"` + NEmbd uint32 `json:"n_embd"` + IntermediateSize uint32 `json:"intermediate_size"` + NInner uint32 `json:"n_inner"` + NumAttentionHeads uint32 `json:"num_attention_heads"` + NHead uint32 `json:"n_head"` + NumKeyValueHeads uint32 `json:"num_key_value_heads"` + RopeTheta float32 `json:"rope_theta"` + RopeScaling struct { + Type string `json:"type"` + Factor float32 `json:"factor"` + } `json:"rope_scaling"` + RMSNormEPS float32 `json:"rms_norm_eps"` + LayerNormEPS float32 `json:"layer_norm_eps"` + LayerNormEpsilon float32 `json:"layer_norm_epsilon"` + NormEpsilon float32 `json:"norm_epsilon"` + HeadDim uint32 `json:"head_dim"` +} + +var _ Converter = (*llama)(nil) + +func (p *llama) KV(t *Tokenizer) llm.KV { + kv := p.Parameters.KV(t) + kv["general.architecture"] = "llama" + kv["general.name"] = "llama" + kv["llama.vocab_size"] = p.VocabSize + + kv["llama.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer) + + if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 { + kv["llama.context_length"] = contextLength + } + + if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 { + kv["llama.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd) + } + + if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 { + kv["llama.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner) + } + + if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 { + kv["llama.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead) + kv["llama.rope.dimension_count"] = p.HiddenSize / headCount + } + + if p.RopeTheta > 0 { + kv["llama.rope.freq_base"] = p.RopeTheta + } + + if p.RopeScaling.Type == "linear" { + kv["llama.rope.scaling.type"] = p.RopeScaling.Type + kv["llama.rope.scaling.factor"] = p.RopeScaling.Factor + } + + if p.NumKeyValueHeads > 0 { + kv["llama.attention.head_count_kv"] = p.NumKeyValueHeads + } + + if p.RMSNormEPS > 0 { + kv["llama.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS + } + + if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 { + kv["llama.attention.layer_norm_epsilon"] = layerNormEpsilon + } + + if p.HeadDim > 0 { + kv["llama.attention.key_length"] = p.HeadDim + kv["llama.attention.value_length"] = p.HeadDim + } + + if len(t.Merges) > 0 { + kv["tokenizer.ggml.merges"] = t.Merges + } + + return kv +} + +func (p *llama) Tensors(ts []Tensor) []*llm.Tensor { + var out []*llm.Tensor + for _, t := range ts { + name := p.tensorName(t.Name()) + if strings.HasSuffix(name, "attn_q.weight") || + strings.HasSuffix(name, "attn_k.weight") { + t.SetRepacker(p.repack) + } + + out = append(out, &llm.Tensor{ + Name: name, + Kind: t.Kind(), + Shape: t.Shape(), + WriterTo: t, + }) + } + + return out +} + +func (p *llama) tensorName(n string) string { + return strings.NewReplacer( + "lm_head", "output", + "model.embed_tokens", "token_embd", + "model.norm", "output_norm", + "model.layers", "blk", + "input_layernorm", "attn_norm", + "self_attn.q_proj", "attn_q", + "self_attn.k_proj", "attn_k", + "self_attn.v_proj", "attn_v", + "self_attn.o_proj", "attn_output", + "mlp.gate_proj", "ffn_gate", + "mlp.down_proj", "ffn_down", + "mlp.up_proj", "ffn_up", + "post_attention_layernorm", "ffn_norm", + // mixtral + "block_sparse_moe.gate", "ffn_gate_inp", + ).Replace(n) +} + +func (p *llama) repack(name string, data []float32, shape []uint64) ([]float32, error) { + var dims []int + for _, dim := range shape { + dims = append(dims, int(dim)) + } + + var heads uint32 + if strings.HasSuffix(name, "q_proj.weight") { + heads = p.NumAttentionHeads + } else if strings.HasSuffix(name, "k_proj.weight") { + heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads) + } else { + return nil, fmt.Errorf("unknown tensor for repack: %s", name) + } + + n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data)) + if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil { + return nil, err + } + + if err := n.T(0, 2, 1, 3); err != nil { + return nil, err + } + + if err := n.Reshape(dims...); err != nil { + return nil, err + } + + if err := n.Transpose(); err != nil { + return nil, err + } + + ts, err := native.SelectF32(n, 1) + if err != nil { + return nil, err + } + + var f32s []float32 + for _, t := range ts { + f32s = append(f32s, t...) + } + + return f32s, nil +} diff --git a/convert/convert_mixtral.go b/convert/convert_mixtral.go new file mode 100644 index 00000000..c55a27f8 --- /dev/null +++ b/convert/convert_mixtral.go @@ -0,0 +1,89 @@ +package convert + +import ( + "fmt" + "io" + "slices" + "strings" + + "github.com/ollama/ollama/llm" +) + +type mixtral struct { + llama + NumLocalExperts uint32 `json:"num_local_experts"` + NumExpertsPerToken uint32 `json:"num_experts_per_tok"` +} + +var _ Converter = (*mixtral)(nil) + +func (p *mixtral) KV(t *Tokenizer) llm.KV { + kv := p.llama.KV(t) + + if p.NumLocalExperts > 0 { + kv["llama.expert_count"] = p.NumLocalExperts + } + + if p.NumExpertsPerToken > 0 { + kv["llama.expert_used_count"] = p.NumExpertsPerToken + } + + return kv +} + +func (p *mixtral) Tensors(ts []Tensor) []*llm.Tensor { + oldnew := []string{ + "model.layers", "blk", + "w1", "ffn_gate_exps", + "w2", "ffn_down_exps", + "w3", "ffn_up_exps", + } + + for i := range p.NumLocalExperts { + oldnew = append(oldnew, fmt.Sprintf(".block_sparse_moe.experts.%d.", i), ".") + } + + // group experts of the same layer (model.layers.%d) and type (w[123]) into a single tensor + namer := strings.NewReplacer(oldnew...) + experts := make(map[string]experts) + + // merge experts into a single tensor while removing them from ts + ts = slices.DeleteFunc(ts, func(t Tensor) bool { + if !strings.Contains(t.Name(), ".block_sparse_moe.experts.") { + return false + } + + name := namer.Replace(t.Name()) + experts[name] = append(experts[name], t) + return true + }) + + var out []*llm.Tensor + for n, e := range experts { + // TODO(mxyng): sanity check experts + out = append(out, &llm.Tensor{ + Name: n, + Kind: e[0].Kind(), + Shape: append([]uint64{uint64(len(e))}, e[0].Shape()...), + WriterTo: e, + }) + } + + return append(out, p.llama.Tensors(ts)...) +} + +type experts []Tensor + +func (e experts) WriteTo(w io.Writer) (int64, error) { + // TODO(mxyng): experts _should_ be numerically sorted by expert but this should check + for _, t := range e { + // the canonical merged experts tensor stacks all experts along a new, 0 axis, + // e.g. `tensor.Stack(0, e[0], e[1:]...)`, which requires allocating temporary buffers + // this accomplishes the same thing by writing each expert tensor in sequence + if _, err := t.WriteTo(w); err != nil { + return 0, err + } + } + + return 0, nil +} diff --git a/convert/convert_test.go b/convert/convert_test.go index a3727bed..0fbd436f 100644 --- a/convert/convert_test.go +++ b/convert/convert_test.go @@ -20,36 +20,13 @@ import ( func convertFull(t *testing.T, d string) (*os.File, llm.KV, llm.Tensors) { t.Helper() - mf, err := GetModelFormat(d) - if err != nil { - t.Fatal(err) - } - - params, err := mf.GetParams(d) - if err != nil { - t.Fatal(err) - } - - arch, err := mf.GetModelArch("", d, params) - if err != nil { - t.Fatal(err) - } - - if err := arch.LoadVocab(); err != nil { - t.Fatal(err) - } - - if err := arch.GetTensors(); err != nil { - t.Fatal(err) - } - f, err := os.CreateTemp(t.TempDir(), "f16") if err != nil { t.Fatal(err) } defer f.Close() - if err := arch.WriteGGUF(f); err != nil { + if err := Convert(d, f); err != nil { t.Fatal(err) } diff --git a/convert/gemma.go b/convert/gemma.go deleted file mode 100644 index d01ffedf..00000000 --- a/convert/gemma.go +++ /dev/null @@ -1,102 +0,0 @@ -package convert - -import ( - "fmt" - "io" - "log/slog" - "strings" - - "github.com/pdevine/tensor" - "github.com/pdevine/tensor/native" - - "github.com/ollama/ollama/llm" -) - -type GemmaModel struct { - ModelData -} - -func addOnes(data []float32, vectorSize int) ([]float32, error) { - n := tensor.New(tensor.WithShape(vectorSize), tensor.WithBacking(data)) - ones := tensor.Ones(tensor.Float32, vectorSize) - - n, err := n.Add(ones) - if err != nil { - return nil, err - } - - ts, err := native.SelectF32(n, 0) - if err != nil { - return nil, err - } - - var f32s []float32 - for _, t := range ts { - f32s = append(f32s, t...) - } - - return f32s, nil -} - -func (m *GemmaModel) GetTensors() error { - t, err := m.Format.GetTensors(m.Path, m.Params) - if err != nil { - return err - } - - slog.Debug(fmt.Sprintf("Total tensors: %d", len(t))) - for _, l := range t { - if strings.HasSuffix(l.Name, "norm.weight") { - wt := l.WriterTo.(safetensorWriterTo) - wt.repacker = m.Repack - l.WriterTo = wt - } - m.Tensors = append(m.Tensors, l) - } - - return nil -} - -func (m *GemmaModel) LoadVocab() error { - v, err := LoadSentencePieceTokens(m.Path, m.Params) - if err != nil { - return err - } - m.Vocab = v - return nil -} - -func (m *GemmaModel) Repack(_ string, data []float32, shape []uint64) ([]float32, error) { - return addOnes(data, int(shape[0])) -} - -func (m *GemmaModel) WriteGGUF(ws io.WriteSeeker) error { - kv := llm.KV{ - "general.architecture": "gemma", - "general.name": m.Name, - "gemma.context_length": uint32(m.Params.ContextSize), - "gemma.embedding_length": uint32(m.Params.HiddenSize), - "gemma.block_count": uint32(m.Params.HiddenLayers), - "gemma.feed_forward_length": uint32(m.Params.IntermediateSize), - "gemma.attention.head_count": uint32(m.Params.AttentionHeads), - "gemma.attention.head_count_kv": uint32(m.Params.KeyValHeads), - "gemma.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS), - "gemma.attention.key_length": uint32(m.Params.HeadDimension), - "gemma.attention.value_length": uint32(m.Params.HeadDimension), - "general.file_type": uint32(1), - "tokenizer.ggml.model": "llama", - - "tokenizer.ggml.tokens": m.Vocab.Tokens, - "tokenizer.ggml.scores": m.Vocab.Scores, - "tokenizer.ggml.token_type": m.Vocab.Types, - - "tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID), - "tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID), - "tokenizer.ggml.padding_token_id": uint32(m.Params.PaddingTokenID), - "tokenizer.ggml.unknown_token_id": uint32(3), - "tokenizer.ggml.add_bos_token": true, - "tokenizer.ggml.add_eos_token": false, - } - - return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors) -} diff --git a/convert/llama.go b/convert/llama.go deleted file mode 100644 index b4211b02..00000000 --- a/convert/llama.go +++ /dev/null @@ -1,159 +0,0 @@ -package convert - -import ( - "cmp" - "errors" - "fmt" - "io" - "os" - "path/filepath" - "regexp" - "strings" - - "github.com/pdevine/tensor" - "github.com/pdevine/tensor/native" - - "github.com/ollama/ollama/llm" -) - -type LlamaModel struct { - ModelData -} - -func (m *LlamaModel) GetTensors() error { - t, err := m.Format.GetTensors(m.Path, m.Params) - if err != nil { - return err - } - - pattern := `^blk\.[0-9]+\.attn_(?Pq|k)\.weight$` - re, err := regexp.Compile(pattern) - if err != nil { - return err - } - - for _, l := range t { - matches := re.FindAllStringSubmatch(l.Name, -1) - if len(matches) > 0 { - switch m.Format.(type) { - case *TorchFormat: - wt := l.WriterTo.(torchWriterTo) - wt.repacker = m.Repack - l.WriterTo = wt - case *SafetensorFormat: - wt := l.WriterTo.(safetensorWriterTo) - wt.repacker = m.Repack - l.WriterTo = wt - } - } - m.Tensors = append(m.Tensors, l) - } - - return nil -} - -func (m *LlamaModel) LoadVocab() (err error) { - pre, ts, merges, err := parseTokens(filepath.Join(m.Path, "tokenizer.json")) - if errors.Is(err, os.ErrNotExist) { - return nil - } else if err != nil { - return err - } - - m.Vocab = &Vocab{} - for _, t := range ts { - m.Vocab.Tokens = append(m.Vocab.Tokens, t.Content) - m.Vocab.Types = append(m.Vocab.Types, t.Type()) - } - - m.Vocab.Merges = merges - m.Params.PreTokenizer = pre - return nil -} - -func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error { - kv := llm.KV{ - "general.architecture": "llama", - "general.name": m.Name, - "llama.vocab_size": uint32(len(m.Vocab.Tokens)), - "llama.context_length": uint32(m.Params.ContextSize), - "llama.embedding_length": uint32(m.Params.HiddenSize), - "llama.block_count": uint32(m.Params.HiddenLayers), - "llama.feed_forward_length": uint32(m.Params.IntermediateSize), - "llama.rope.freq_base": float32(m.Params.RopeFrequencyBase), - "llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads), - "llama.attention.head_count": uint32(m.Params.AttentionHeads), - "llama.attention.head_count_kv": uint32(m.Params.KeyValHeads), - "llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS), - "general.file_type": uint32(1), - "tokenizer.ggml.model": "gpt2", - - "tokenizer.ggml.pre": m.Params.PreTokenizer, - "tokenizer.ggml.tokens": m.Vocab.Tokens, - "tokenizer.ggml.token_type": m.Vocab.Types, - - "tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID), - "tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID), - "tokenizer.ggml.unknown_token_id": uint32(0), - } - - if len(m.Vocab.Merges) > 0 { - kv["tokenizer.ggml.merges"] = m.Vocab.Merges - } else { - kv["tokenizer.ggml.scores"] = m.Vocab.Scores - } - - return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors) -} - -func (m *LlamaModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) { - return llamaRepack(name, m.Params, data, shape) -} - -func llamaRepack(name string, params *Params, data []float32, shape []uint64) ([]float32, error) { - var dims []int - for _, dim := range shape { - if dim != 0 { - dims = append(dims, int(dim)) - } - } - - var heads int - switch { - case strings.HasSuffix(name, "attn_q.weight"): - heads = params.AttentionHeads - case strings.HasSuffix(name, "attn_k.weight"): - heads = cmp.Or(params.KeyValHeads, params.AttentionHeads) - default: - return nil, fmt.Errorf("unknown tensor name: %s", name) - } - - n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data)) - if err := n.Reshape(append([]int{heads, 2, dims[0] / heads / 2}, dims[1:]...)...); err != nil { - return nil, err - } - - if err := n.T(0, 2, 1, 3); err != nil { - return nil, err - } - - if err := n.Reshape(dims...); err != nil { - return nil, err - } - - if err := n.Transpose(); err != nil { - return nil, err - } - - ts, err := native.SelectF32(n, 1) - if err != nil { - return nil, err - } - - var f32s []float32 - for _, t := range ts { - f32s = append(f32s, t...) - } - - return f32s, nil -} diff --git a/convert/mistral.go b/convert/mistral.go deleted file mode 100644 index 8fe066d6..00000000 --- a/convert/mistral.go +++ /dev/null @@ -1,84 +0,0 @@ -package convert - -import ( - "io" - "regexp" - - "github.com/ollama/ollama/llm" -) - -type MistralModel struct { - ModelData -} - -func (m *MistralModel) GetTensors() error { - t, err := m.Format.GetTensors(m.Path, m.Params) - if err != nil { - return err - } - - pattern := `^blk\.[0-9]+\.attn_(?Pq|k)\.weight$` - re, err := regexp.Compile(pattern) - if err != nil { - return err - } - - for _, l := range t { - matches := re.FindAllStringSubmatch(l.Name, -1) - if len(matches) > 0 { - wt := l.WriterTo.(safetensorWriterTo) - wt.repacker = m.Repack - l.WriterTo = wt - } - m.Tensors = append(m.Tensors, l) - } - - return nil -} - -func (m *MistralModel) LoadVocab() error { - v, err := LoadSentencePieceTokens(m.Path, m.Params) - if err != nil { - return err - } - m.Vocab = v - return nil -} - -func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error { - kv := llm.KV{ - "general.architecture": "llama", - "general.name": m.Name, - "llama.context_length": uint32(m.Params.ContextSize), - "llama.embedding_length": uint32(m.Params.HiddenSize), - "llama.block_count": uint32(m.Params.HiddenLayers), - "llama.feed_forward_length": uint32(m.Params.IntermediateSize), - "llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads), - "llama.attention.head_count": uint32(m.Params.AttentionHeads), - "llama.attention.head_count_kv": uint32(m.Params.KeyValHeads), - "llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS), - "general.file_type": uint32(1), - "tokenizer.ggml.model": "llama", - - "tokenizer.ggml.tokens": m.Vocab.Tokens, - "tokenizer.ggml.scores": m.Vocab.Scores, - "tokenizer.ggml.token_type": m.Vocab.Types, - - "tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID), - "tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID), - "tokenizer.ggml.add_bos_token": true, - "tokenizer.ggml.add_eos_token": false, - "tokenizer.ggml.unknown_token_id": uint32(0), - } - - if m.Params.HeadDimension > 0 { - kv["llama.attention.key_length"] = uint32(m.Params.HeadDimension) - kv["llama.attention.value_length"] = uint32(m.Params.HeadDimension) - } - - return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors) -} - -func (m *MistralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) { - return llamaRepack(name, m.Params, data, shape) -} diff --git a/convert/mixtral.go b/convert/mixtral.go deleted file mode 100644 index baea68cd..00000000 --- a/convert/mixtral.go +++ /dev/null @@ -1,87 +0,0 @@ -package convert - -import ( - "io" - "regexp" - - "github.com/ollama/ollama/llm" -) - -type MixtralModel struct { - ModelData -} - -func (m *MixtralModel) GetTensors() error { - t, err := m.Format.GetTensors(m.Path, m.Params) - if err != nil { - return err - } - - pattern := `^blk\.[0-9]+\.attn_(?Pq|k)\.weight$` - re, err := regexp.Compile(pattern) - if err != nil { - return err - } - - for _, l := range t { - matches := re.FindAllStringSubmatch(l.Name, -1) - if len(matches) > 0 { - wt := l.WriterTo.(safetensorWriterTo) - wt.repacker = m.Repack - l.WriterTo = wt - } - m.Tensors = append(m.Tensors, l) - } - - return nil -} - -func (m *MixtralModel) LoadVocab() error { - v, err := LoadSentencePieceTokens(m.Path, m.Params) - if err != nil { - return err - } - m.Vocab = v - return nil -} - -func (m *MixtralModel) WriteGGUF(ws io.WriteSeeker) error { - kv := llm.KV{ - "general.architecture": "llama", - "general.name": m.Name, - "llama.block_count": uint32(m.Params.HiddenLayers), - "llama.context_length": uint32(m.Params.ContextSize), - "llama.embedding_length": uint32(m.Params.HiddenSize), - "llama.feed_forward_length": uint32(m.Params.IntermediateSize), - "llama.attention.head_count": uint32(m.Params.AttentionHeads), - "llama.attention.head_count_kv": uint32(m.Params.KeyValHeads), - - "llama.rope.freq_base": float32(m.Params.RopeFrequencyBase), - "llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS), - - "llama.expert_count": uint32(m.Params.Experts), - "llama.expert_used_count": uint32(m.Params.ExpertsUsed), - - "llama.vocab_size": uint32(len(m.Vocab.Tokens)), - "llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads), - - "general.file_type": uint32(1), - "tokenizer.ggml.model": "llama", - - "tokenizer.ggml.tokens": m.Vocab.Tokens, - "tokenizer.ggml.scores": m.Vocab.Scores, - "tokenizer.ggml.token_type": m.Vocab.Types, - - "tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID), - "tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID), - "tokenizer.ggml.unknown_token_id": uint32(0), - "tokenizer.ggml.add_bos_token": true, - "tokenizer.ggml.add_eos_token": false, - } - - return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors) -} - -func (m *MixtralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) { - return llamaRepack(name, m.Params, data, shape) -} diff --git a/convert/reader.go b/convert/reader.go new file mode 100644 index 00000000..9be8ac2e --- /dev/null +++ b/convert/reader.go @@ -0,0 +1,74 @@ +package convert + +import ( + "errors" + "io" + "path/filepath" + "strings" +) + +type Tensor interface { + Name() string + Shape() []uint64 + Kind() uint32 + SetRepacker(repacker) + WriteTo(io.Writer) (int64, error) +} + +type tensorBase struct { + name string + shape []uint64 + repacker +} + +func (t tensorBase) Name() string { + return t.name +} + +func (t tensorBase) Shape() []uint64 { + return t.shape +} + +func (t tensorBase) Kind() uint32 { + if strings.HasSuffix(t.name, ".block_sparse_moe.gate.weight") { + return 0 + } + + switch len(t.shape) { + case 0: + panic("invalid tensor shape") + case 1: + return 0 + default: + return 1 + } +} + +func (t *tensorBase) SetRepacker(fn repacker) { + t.repacker = fn +} + +type repacker func(string, []float32, []uint64) ([]float32, error) + +func parseTensors(d string) ([]Tensor, error) { + patterns := map[string]func(...string) ([]Tensor, error){ + "model-*-of-*.safetensors": parseSafetensors, + "model.safetensors": parseSafetensors, + "pytorch_model-*-of-*.bin": parseTorch, + "pytorch_model.bin": parseTorch, + "consolidated.*.pth": parseTorch, + } + + for pattern, parseFn := range patterns { + matches, err := filepath.Glob(filepath.Join(d, pattern)) + if err != nil { + return nil, err + } + + if len(matches) > 0 { + return parseFn(matches...) + } + } + + return nil, errors.New("unknown tensor format") +} diff --git a/convert/reader_safetensors.go b/convert/reader_safetensors.go new file mode 100644 index 00000000..440581af --- /dev/null +++ b/convert/reader_safetensors.go @@ -0,0 +1,140 @@ +package convert + +import ( + "bytes" + "encoding/binary" + "encoding/json" + "fmt" + "io" + "os" + "slices" + + "github.com/d4l3k/go-bfloat16" + "github.com/x448/float16" + "golang.org/x/exp/maps" +) + +type safetensorMetadata struct { + Type string `json:"dtype"` + Shape []uint64 `json:"shape"` + Offsets []int64 `json:"data_offsets"` +} + +func parseSafetensors(ps ...string) ([]Tensor, error) { + var ts []Tensor + for _, p := range ps { + f, err := os.Open(p) + if err != nil { + return nil, err + } + defer f.Close() + + var n int64 + if err := binary.Read(f, binary.LittleEndian, &n); err != nil { + return nil, err + } + + b := bytes.NewBuffer(make([]byte, 0, n)) + if _, err = io.CopyN(b, f, n); err != nil { + return nil, err + } + + var headers map[string]safetensorMetadata + if err := json.NewDecoder(b).Decode(&headers); err != nil { + return nil, err + } + + keys := maps.Keys(headers) + slices.Sort(keys) + + for _, key := range keys { + if value := headers[key]; value.Type != "" { + ts = append(ts, safetensor{ + path: p, + dtype: value.Type, + offset: safetensorsPad(n, value.Offsets[0]), + size: safetensorsPad(n, value.Offsets[1]) - safetensorsPad(n, value.Offsets[0]), + tensorBase: &tensorBase{ + name: key, + shape: value.Shape, + }, + }) + } + } + } + + return ts, nil +} + +func safetensorsPad(n, s int64) int64 { + return 8 + n + s +} + +type safetensor struct { + path string + dtype string + offset int64 + size int64 + *tensorBase +} + +func (st safetensor) WriteTo(w io.Writer) (int64, error) { + f, err := os.Open(st.path) + if err != nil { + return 0, err + } + defer f.Close() + + if _, err = f.Seek(st.offset, io.SeekStart); err != nil { + return 0, err + } + + var f32s []float32 + switch st.dtype { + case "F32": + f32s = make([]float32, st.size/4) + if err = binary.Read(f, binary.LittleEndian, f32s); err != nil { + return 0, err + } + case "F16": + u16s := make([]uint16, st.size/2) + if err = binary.Read(f, binary.LittleEndian, u16s); err != nil { + return 0, err + } + + for _, b := range u16s { + f32s = append(f32s, float16.Frombits(b).Float32()) + } + + case "BF16": + u8s := make([]uint8, st.size) + if err = binary.Read(f, binary.LittleEndian, u8s); err != nil { + return 0, err + } + + f32s = bfloat16.DecodeFloat32(u8s) + default: + return 0, fmt.Errorf("unknown data type: %s", st.dtype) + } + + if st.repacker != nil { + f32s, err = st.repacker(st.Name(), f32s, st.Shape()) + if err != nil { + return 0, err + } + } + + switch st.Kind() { + case 0: + return 0, binary.Write(w, binary.LittleEndian, f32s) + case 1: + f16s := make([]uint16, len(f32s)) + for i := range f32s { + f16s[i] = float16.Fromfloat32(f32s[i]).Bits() + } + + return 0, binary.Write(w, binary.LittleEndian, f16s) + default: + return 0, fmt.Errorf("unknown storage type: %d", st.Kind()) + } +} diff --git a/convert/reader_torch.go b/convert/reader_torch.go new file mode 100644 index 00000000..1428706e --- /dev/null +++ b/convert/reader_torch.go @@ -0,0 +1,46 @@ +package convert + +import ( + "io" + + "github.com/nlpodyssey/gopickle/pytorch" + "github.com/nlpodyssey/gopickle/types" +) + +func parseTorch(ps ...string) ([]Tensor, error) { + var ts []Tensor + for _, p := range ps { + pt, err := pytorch.Load(p) + if err != nil { + return nil, err + } + + for _, k := range pt.(*types.Dict).Keys() { + t := pt.(*types.Dict).MustGet(k) + + var shape []uint64 + for dim := range t.(*pytorch.Tensor).Size { + shape = append(shape, uint64(dim)) + } + + ts = append(ts, torch{ + storage: t.(*pytorch.Tensor).Source, + tensorBase: &tensorBase{ + name: k.(string), + shape: shape, + }, + }) + } + } + + return ts, nil +} + +type torch struct { + storage pytorch.StorageInterface + *tensorBase +} + +func (pt torch) WriteTo(w io.Writer) (int64, error) { + return 0, nil +} diff --git a/convert/safetensors.go b/convert/safetensors.go deleted file mode 100644 index f45687f1..00000000 --- a/convert/safetensors.go +++ /dev/null @@ -1,309 +0,0 @@ -package convert - -import ( - "bytes" - "encoding/binary" - "encoding/json" - "fmt" - "io" - "os" - "path/filepath" - "regexp" - "slices" - "strings" - - "github.com/d4l3k/go-bfloat16" - "github.com/x448/float16" - - "github.com/ollama/ollama/llm" -) - -type safetensorWriterTo struct { - t *llm.Tensor - - params *Params - bo ByteOrder - - filename string - dtype string - - offset, size int64 - repacker func(string, []float32, []uint64) ([]float32, error) -} - -type safetensorMetadata struct { - Type string `json:"dtype"` - Shape []uint64 `json:"shape"` - Offsets []int64 `json:"data_offsets"` -} - -type SafetensorFormat struct{} - -func (m *SafetensorFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) { - var tensors []llm.Tensor - matches, err := filepath.Glob(filepath.Join(dirpath, "*.safetensors")) - if err != nil { - return nil, err - } - - var offset uint64 - for _, f := range matches { - var t []llm.Tensor - var err error - t, offset, err = m.readTensors(f, offset, params) - if err != nil { - return nil, err - } - - tensors = append(tensors, t...) - } - return tensors, nil -} - -func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params) ([]llm.Tensor, uint64, error) { - f, err := os.Open(fn) - if err != nil { - return nil, 0, err - } - defer f.Close() - - var n int64 - if err := binary.Read(f, binary.LittleEndian, &n); err != nil { - return nil, 0, err - } - - b := bytes.NewBuffer(make([]byte, 0, n)) - if _, err = io.CopyN(b, f, n); err != nil { - return nil, 0, err - } - - var headers map[string]safetensorMetadata - if err := json.NewDecoder(b).Decode(&headers); err != nil { - return nil, 0, err - } - - var keys []string - for key := range headers { - if !strings.HasSuffix(key, "self_attn.rotary_embd.inv_freq") { - keys = append(keys, key) - } - } - - slices.Sort(keys) - - var tensors []llm.Tensor - for _, key := range keys { - value := headers[key] - - var kind uint32 - switch len(value.Shape) { - case 0: - // valuedata - continue - case 2: - kind = 1 - } - - name, err := m.GetLayerName(key) - if err != nil { - return nil, 0, err - } - - shape := make([]uint64, len(value.Shape)) - copy(shape, value.Shape) - - pad := func(s int64) int64 { - return 8 + n + s - } - - t := llm.Tensor{ - Name: name, - Kind: kind, - Offset: offset, - Shape: shape, - } - - t.WriterTo = safetensorWriterTo{ - t: &t, - params: params, - bo: params.ByteOrder, - filename: fn, - dtype: value.Type, - offset: pad(value.Offsets[0]), - size: pad(value.Offsets[1]) - pad(value.Offsets[0]), - } - - offset += t.Size() - tensors = append(tensors, t) - } - - return tensors, offset, nil -} - -func (m *SafetensorFormat) GetParams(dirpath string) (*Params, error) { - f, err := os.Open(filepath.Join(dirpath, "config.json")) - if err != nil { - return nil, err - } - defer f.Close() - - var params Params - - if err := json.NewDecoder(f).Decode(¶ms); err != nil { - return nil, err - } - - params.ByteOrder = binary.LittleEndian - return ¶ms, nil -} - -func (m *SafetensorFormat) GetLayerName(n string) (string, error) { - directMap := map[string]string{ - "model.embed_tokens.weight": "token_embd.weight", - "lm_head.weight": "output.weight", - "model.norm.weight": "output_norm.weight", - } - - tMap := map[string]string{ - "model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight", - "model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight", - "model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight", - "model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight", - "model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight", - "model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight", - "model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight", - "model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight", - "model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight", - "model.layers.(\\d+).block_sparse_moe.gate.weight": "blk.$1.ffn_gate_inp.weight", - "model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w1.weight": "blk.$1.ffn_gate.$2.weight", - "model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w2.weight": "blk.$1.ffn_down.$2.weight", - "model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w3.weight": "blk.$1.ffn_up.$2.weight", - } - - v, ok := directMap[n] - if ok { - return v, nil - } - - // quick hack to rename the layers to gguf format - for k, v := range tMap { - re := regexp.MustCompile(k) - newName := re.ReplaceAllString(n, v) - if newName != n { - return newName, nil - } - } - - return "", fmt.Errorf("couldn't find a layer name for '%s'", n) -} - -func (r safetensorWriterTo) WriteTo(w io.Writer) (n int64, err error) { - f, err := os.Open(r.filename) - if err != nil { - return 0, err - } - defer f.Close() - - if _, err = f.Seek(r.offset, io.SeekStart); err != nil { - return 0, err - } - - var f32s []float32 - switch r.dtype { - case "F32": - f32s = make([]float32, r.size/4) - if err = binary.Read(f, r.bo, f32s); err != nil { - return 0, err - } - case "F16": - u16s := make([]uint16, r.size/2) - if err = binary.Read(f, r.bo, u16s); err != nil { - return 0, err - } - - for _, b := range u16s { - f32s = append(f32s, float16.Frombits(b).Float32()) - } - - case "BF16": - u8s := make([]uint8, r.size) - if err = binary.Read(f, r.bo, u8s); err != nil { - return 0, err - } - - f32s = bfloat16.DecodeFloat32(u8s) - default: - return 0, fmt.Errorf("unknown data type: %s", r.dtype) - } - - if r.repacker != nil { - f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape) - if err != nil { - return 0, err - } - } - - switch r.t.Kind { - case 0: - return 0, binary.Write(w, r.bo, f32s) - case 1: - f16s := make([]uint16, len(f32s)) - for i := range f32s { - f16s[i] = float16.Fromfloat32(f32s[i]).Bits() - } - - return 0, binary.Write(w, r.bo, f16s) - default: - return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind) - } -} - -func (m *SafetensorFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) { - switch len(params.Architectures) { - case 0: - return nil, fmt.Errorf("No architecture specified to convert") - case 1: - switch params.Architectures[0] { - case "LlamaForCausalLM": - return &LlamaModel{ - ModelData{ - Name: name, - Path: dirPath, - Params: params, - Format: m, - }, - }, nil - case "MistralForCausalLM": - return &MistralModel{ - ModelData{ - Name: name, - Path: dirPath, - Params: params, - Format: m, - }, - }, nil - case "MixtralForCausalLM": - return &MixtralModel{ - ModelData{ - Name: name, - Path: dirPath, - Params: params, - Format: m, - }, - }, nil - case "GemmaForCausalLM": - return &GemmaModel{ - ModelData{ - Name: name, - Path: dirPath, - Params: params, - Format: m, - }, - }, nil - default: - return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0]) - } - } - - return nil, fmt.Errorf("Unknown error") -} diff --git a/convert/testdata/Mistral-7B-Instruct-v0.2.json b/convert/testdata/Mistral-7B-Instruct-v0.2.json index 1da4d2ad..88d447b3 100644 --- a/convert/testdata/Mistral-7B-Instruct-v0.2.json +++ b/convert/testdata/Mistral-7B-Instruct-v0.2.json @@ -4,7 +4,7 @@ "general.quantization_version": "2", "llama.block_count": "32", "llama.context_length": "32768", - "llama.embedding_length": "", + "llama.embedding_length": "4096", "llama.feed_forward_length": "14336", "llama.attention.head_count": "32", "llama.attention.head_count_kv": "8", diff --git a/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json b/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json index 0967ef42..a1596532 100644 --- a/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json +++ b/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json @@ -1 +1,348 @@ -{} +{ + "general.architecture": "llama", + "general.file_type": "1", + "general.quantization_version": "2", + "llama.block_count": "32", + "llama.context_length": "32768", + "llama.embedding_length": "4096", + "llama.feed_forward_length": "14336", + "llama.rope.dimension_count": "128", + "llama.rope.freq_base": "1e+06", + "llama.attention.head_count": "32", + "llama.attention.head_count_kv": "8", + "llama.attention.layer_norm_rms_epsilon": "1e-05", + "llama.expert_count": "8", + "llama.expert_used_count": "2", + "tokenizer.ggml.model": "llama", + "tokenizer.ggml.add_bos_token": "true", + "tokenizer.ggml.add_eos_token": "false", + "tokenizer.ggml.bos_token_id": "1", + "tokenizer.ggml.eos_token_id": "2", + "tokenizer.ggml.unknown_token_id": "0", + "tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676", + "tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e", + "tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6", + "token_embd.weight": "1d1d1d39a867d5a4bfb32792a47247d2638c10c95a6259391d02843583505cc4", + "blk.0.ffn_gate_exps.weight": "2e5cd43ac3f26c44f071926ff6c3f239ecc52a34bc9a5b5906d3d4c1bf2fbbfa", + "blk.0.ffn_down_exps.weight": "a4dfc7e7c96e7402eb70279601675b956bb7331da8101e63fe5c0a611b6972e5", + "blk.0.ffn_up_exps.weight": "2d5d87b378b2319c344ed2c642598b6f7cb6beeb582a8ea51abc9ae690d473c3", + "blk.0.ffn_gate_inp.weight": "a46aaf5aba7401ce6e41f158242b4879d34901661f3ede85496cbd0ce79d6314", + "blk.0.attn_norm.weight": "3fe37d913bdd2b65076bcdd6efe64a37b0b03cacbb1b80b9f7089068aa35f38c", + "blk.0.ffn_norm.weight": "5e14308a3c894734eb204c8f558bdc817e94bbd5b4e9cb4094e91ba388c8f7f2", + "blk.0.attn_k.weight": "73d943dcac0911e87bd771f4aa1c901e1bfe1aed293af06e1a67812159859f67", + "blk.0.attn_output.weight": "4c5f754c855e262e8d4c94c6fbbb57af06399dc0e170d7d99a1a17fc9aab9227", + "blk.0.attn_q.weight": "d6fd7403c873d49c05f6f03208f30d99ad34cb3b71c9990c47334d502a8e4c7b", + "blk.0.attn_v.weight": "cf17cf64b2d683bd9de6cebaf60e5c264df6fdc38fe719dde9d54c80334f6366", + "blk.1.ffn_gate_inp.weight": "0d524de81cd915816b4e714bf595ad6946a9130b3de731cd89428b2781230809", + "blk.1.attn_k.weight": "2ea47f412992b374c70674730fe84700e0c8cce177086ce9b6635e42408964bd", + "blk.1.attn_output.weight": "b4b2520794d54113e86c8ff678eacfc62e35be4395a594a6c8c22b4383ebcc0c", + "blk.1.attn_q.weight": "5db930c98c4f91f6eab57eb974c72210b158e366d23d6d2890b2759c053bee33", + "blk.1.attn_v.weight": "079bdde09668394bf7af9f8bc175017b4f48f0ab64e6dd855a4d7561d1693c0f", + "blk.1.ffn_gate_exps.weight": "146a62de19f9ab093deb101f9640534ffc3dc40d69f508be12fc0475d01b0c7a", + "blk.1.ffn_down_exps.weight": "949da94a3c0f375160672a979e85f7def284264b10d48d038238aad5f5ece793", + "blk.1.ffn_up_exps.weight": "7016a3f467d9e3f2f4b4019579ed86b757469cd367f2b225483305376b4bb3c1", + "blk.1.attn_norm.weight": "1614d1e6ed537737275eb888666c7bac533f4eefbe73dec92b591045ca9e1afd", + "blk.1.ffn_norm.weight": "405a455fa7d1ec36894652ceb554bbcb09a07fd6405f42741e66dc4a4665c19c", + "blk.2.ffn_gate_exps.weight": "90d5003fc7421f44220c0842d43128955e91488f6f785fe570b62d81b719e964", + "blk.2.ffn_down_exps.weight": "ecdc2b5a8b504ef0a7833acff47d69b0c1fa9c22126de1bb120ff5e48c3d6e2c", + "blk.2.ffn_up_exps.weight": "2cbd9485a32460d315eb50a2f3b00863fd77245bfe885b7565efac1cdb1f191e", + "blk.2.ffn_gate_inp.weight": "0d0a17a1a2c7a61f2cca49ecbb479154dc93a870873257bc4f225e7607f2e2c2", + "blk.2.attn_norm.weight": "b2e4c5a977f87a6f880896bd73596234c9b83622fa0d7add5892501e3155913c", + "blk.2.ffn_norm.weight": "0ab875b4280afa922376cfc7b9aa3f7071c9432ea1254091ce7de3749df0e8e6", + "blk.2.attn_k.weight": "bb884af51fb51550acfef54ccf1b58ce8284e587806e6a2f88c8265e1ad05a5e", + "blk.2.attn_output.weight": "0f03099ba1ef342ea61af9cd71d028123bbd8b1dd7d7fd9b509aef77815427d9", + "blk.2.attn_q.weight": "8fad0d29eb4c9d24e564774ee3316b9eb7a4c4985e4567111d2c836c830f6cf3", + "blk.2.attn_v.weight": "fe04c847ff677632401a94e7b6b6fdca60391ab21cb23bd791533115de6303a1", + "blk.3.ffn_gate_inp.weight": "29e3aaa724590c070e614af8288939603d2641b0ef11e8c0f476bebb2776673c", + "blk.3.attn_k.weight": "231cc5631def10f7f292d8862d6125ff555164cd70480ac76362149fad204497", + "blk.3.attn_output.weight": "86467a605c62852e05fda1a7ef43150df2cf715fe59785dbcba09f1c27cfa086", + "blk.3.attn_q.weight": "901822402453922225c2d6ac79616691d48217635d5ff7338daa971d5ddee210", + "blk.3.attn_v.weight": "27030784f44375720df2f090933645a31a022d3fb3b14573e5ca0b78f44070c1", + "blk.3.ffn_gate_exps.weight": "231ba59cc0b988d125d77bf627aa3f04636684870af88f081f3944b48a160d86", + "blk.3.ffn_down_exps.weight": "530c3ab44ae4d66e8afa4d10c153ba5dfcdfb7321989a988e62e9d12e7234625", + "blk.3.ffn_up_exps.weight": "b85c2d4d9d11332e702b3c0a6610d4f525f9a93e5d12f5c7c55c592c40755e75", + "blk.3.attn_norm.weight": "05dbb6d88cfa6b199f9d705ccbda97c0ef13f9ec875c595398a1a42d009a4555", + "blk.3.ffn_norm.weight": "6880b1c27d46969ce36fac049c05dc8b89e4bb47dc89df357e32df7e18fc512e", + "blk.4.ffn_gate_exps.weight": "a883b4f225b760c5a2f6605dc5e2167ab85bb398c70bf64ceb539fcbd6128dcd", + "blk.4.ffn_down_exps.weight": "d291bb656aae77947d4b525e2819bf4112afece53ff31de9dab999af1f65f9c4", + "blk.4.ffn_up_exps.weight": "38592afb8ba3dcfb26970f906174f7d3fa62da44fa4be4fc6912a19030ea9164", + "blk.4.ffn_gate_inp.weight": "1596cb74e8fd6c3080b937b06468bb397b0dbb661e6d180a6bcbdc43e8bfd0c6", + "blk.4.attn_norm.weight": "f90c83c5ff4366281d283384efc941620542b9cfdea160d678dc54a75e33f758", + "blk.4.ffn_norm.weight": "d28d8c49d1746b7cc085562d1074905fd14023844de823dc4fb22202bb280790", + "blk.4.attn_k.weight": "792bbf412cc357140fdaba543e547a9b2f7582919e307bbd9a80c7d6d8f5f1f9", + "blk.4.attn_output.weight": "d98e4a062d2631d9c315f1990d5f6ca9a88e7e0e46387f611ccb0353f876aa12", + "blk.4.attn_q.weight": "1a11a55a91d9f748a72176ff6b1c174844df406e00d1b66b9aa64dc6ee4bcd1d", + "blk.4.attn_v.weight": "04cb3c02b12a6313c7ac7044513441083d534fb4c5a3f63bbaa58f7edbd2fadb", + "blk.5.ffn_gate_inp.weight": "cbd5cdf015d33a2da6703eb74c22fcb97581fb9175435173b6dc4f9e8364320d", + "blk.5.attn_k.weight": "4fdf3405e4d657403f5647b51233521310ee984b4b81bbcd901cb3e6ab76b7ff", + "blk.5.attn_output.weight": "4a25662c46979a29600ed77e1907cf81fb16ef30e724c155444e54ccb76af481", + "blk.5.attn_q.weight": "e2acb30e30b97300039bb20ad0878f05159d5657fa811748a51d5b6fb35d631e", + "blk.5.attn_v.weight": "306504b6a26aa123c63dbbed3f4ced0ed2ee8fb6a30bf0093539b817539f5ece", + "blk.5.ffn_gate_exps.weight": "7e34df9b9944dbeea5e8565786d3aa6937314a4b87acd4d0874687877c5a39fd", + "blk.5.ffn_down_exps.weight": "c4b7a57a42b5ac0a8ae27dcd5cb2646d7a7cc7123126d44a56ab128e85f60b13", + "blk.5.ffn_up_exps.weight": "09d47593b6dd6c664a9155bff02fc2eb7ac4a70219a88162d05c802a01d3c6ba", + "blk.5.attn_norm.weight": "58804a036d6ac4c1fe357b8b6a97a5c37cae1c2f06ee0086c041d449c1c6ef6a", + "blk.5.ffn_norm.weight": "d872dee6789f0826211aa46ca9d0869e3e96bcace9e77d6559a7b6f3e524f3ca", + "blk.6.ffn_gate_inp.weight": "fb1eae732e974d6c1d020a5b4ef98c5f33016f984701bcea656f999a99daad66", + "blk.6.attn_k.weight": "55e9c59c5051ab5519b3a7962e1b5fa96a3c0251cb6200dc2f177885ad2de470", + "blk.6.attn_output.weight": "f3c834a8d0027370350e2b6294d95434d31432e57be6313b013c15a56303d61c", + "blk.6.attn_q.weight": "efaefe5f11c2140dc7cb532b0832c2a0b363a165cbda21f00fadae77efca377b", + "blk.6.attn_v.weight": "900bd734d75616d846a90a121c97e081c956a3d1ab012f66dd0bc62c43e1ec3c", + "blk.6.ffn_gate_exps.weight": "312a99661b1468fcaed2474621116f1681432755e973f3ee79d01912974fd424", + "blk.6.ffn_down_exps.weight": "ac9cd7db67a2ef0d2b5def86873673d05e48d49d147dd944469dbb8e2d4c46f6", + "blk.6.ffn_up_exps.weight": "57613e7e09579400a1a09fee4445acfbfe83f2f327fdf317877787d96ada6b84", + "blk.6.attn_norm.weight": "0e8801e09885c633bc01a9a5b85d4e878d30158a4eb41a937dc5b760ebd044cb", + "blk.6.ffn_norm.weight": "b8c58062ac93072f878446b0e7f958c737aa47fb769fc3a8f593133d12db2dd1", + "blk.7.ffn_gate_exps.weight": "1ef611732ff13edfa8d30981ed9dac00c15ceba9fc012ed0b199e9280a849948", + "blk.7.ffn_down_exps.weight": "856c6811945c7b0fa461ca17811cfa43436b4cdf5326bad23cbc30883486d7cc", + "blk.7.ffn_up_exps.weight": "6725e3e33994302ee13fa5ec163631ce2dcaa08aadde8fc166c2265d4561c5c5", + "blk.7.ffn_gate_inp.weight": "36b49d7f80c1003dc392b2c1b9960cd49889dd69e77b26b9e4b13d01f3d0a32a", + "blk.7.attn_norm.weight": "7a0ec49acc5e20ee71c6f80ca02f4f1e564c485e0ae0621309e7c2eb0c616cf0", + "blk.7.ffn_norm.weight": "eeae035c39ab6e64bc06a4baa1bf6e50d4c8b8797cb0ad8abd48be86974802c0", + "blk.7.attn_k.weight": "e8f78c1def01a7a38d2d9bf7becb17755e28fefe4927856f7890fbee52840187", + "blk.7.attn_output.weight": "5367f05ac3bb49ef8745ba5902e1bdd4442415a3ebff2c7e1a3918d7be6fe948", + "blk.7.attn_q.weight": "37c95fc5acc55a4f6e5f02cab9be60e4fe54c08b65f98f4455741b4aa542ff4e", + "blk.7.attn_v.weight": "c89f1343486ba55814233511e94090f7365662a8a4214aa4c278cdadc79196c2", + "blk.8.ffn_gate_inp.weight": "4e239afe8c7afb8de3a005757c887cf14b1622ca2d224227591cb0e5301f4c17", + "blk.8.attn_k.weight": "2ad0229f30fdcc1e85ce64e00d8f75902238294844a81d5af43e14ba75c02983", + "blk.8.attn_output.weight": "2e44a4722acb3b521b81d0b910f8ca2f6c286d874a92ddd02150566454061699", + "blk.8.attn_q.weight": "1cd2b09cb2f43e08de776b5f7eac197a5a6d4ffdfd52b21baa36319450147bd0", + "blk.8.attn_v.weight": "5a22c57ebfd33ac500cbcfd321d5b5b1783f8728801db6f3f8bed51c7183e4db", + "blk.8.ffn_gate_exps.weight": "91063fe56cb4f3ff3b41052bb5046fcf8ef61516a603ee90aab893a9d68c15a7", + "blk.8.ffn_down_exps.weight": "d4c3abc8f1d1b462f67f70bd8f404b3fcf45dceeaa8527fa120527254c383c90", + "blk.8.ffn_up_exps.weight": "76a1a1f08ec577716a2e7027b45293e9205751126424f1bebe1de89c78f087d5", + "blk.8.attn_norm.weight": "f980d774da39eb76c52358afac3e38cb4c81cb323deaabbe5c41822e3f17a98e", + "blk.8.ffn_norm.weight": "1c937658cf90f1a85db9a5f26e077730fdd4b694607dbeeb825c5fb2bc407e0b", + "blk.9.ffn_gate_exps.weight": "a2532471ecb7896d5c78e5a34e10cfaf4125265e1595166c8d0d0dfbe2a3187f", + "blk.9.ffn_down_exps.weight": "b47921a28412d48fee450b8b9d97cee42344a2e69f06d407fd9523d7adf13333", + "blk.9.ffn_up_exps.weight": "7c461bd1b2a73b439cff6a10d94afa01e8b06f7e6f09d9a6f28e3876aef48bce", + "blk.9.ffn_gate_inp.weight": "1648dfb08b5c06d7953a5a97ecb764995fae9487fb729a1c867023b2538149d0", + "blk.9.attn_norm.weight": "8635db0f299882a63b7cfcd1d4259c9e53fab22c31d3d054de36b1001380b31b", + "blk.9.ffn_norm.weight": "f9309aa323062d174c463613afef9b0a33501b510bfaa58a8e0e866d12ffef3c", + "blk.9.attn_k.weight": "dfe62030441e947a588512d18d9c6e4ed72c2f71c227d622c095e4263b23dadf", + "blk.9.attn_output.weight": "1977beb75c6349c50ba7dd3865d7c0a9c5c5ddc854413147b0eec98ac4fda351", + "blk.9.attn_q.weight": "eb132596719605cd6bd1782487f121994629e115190edd69240b12af66e734f5", + "blk.9.attn_v.weight": "9e708f15d332d7c5187b0693b1a977eb30a2fa10bf7df48ed9d7537c0aa6ed99", + "blk.10.ffn_gate_inp.weight": "97503a5d166c1925f9b65c0eed980753d411714d66896f3d0fad5286c7aba702", + "blk.10.attn_k.weight": "1ebdd222336bd25b48df1b138cdbe09021c4a5562ea7cb78cadd1255d2be3a39", + "blk.10.attn_output.weight": "5e98faa38e9d514b9057e1c8342c509cbe1083defd518e506f6bad89117d1f5a", + "blk.10.attn_q.weight": "3323a26c87d936d1dd87c577d0b763459fced726679612c874b3de5fc6d969c5", + "blk.10.attn_v.weight": "d5fa73cb56aca388e205f44455e4b4f676fdc12ed7fac4542fbb3b41ecea59ad", + "blk.10.ffn_gate_exps.weight": "225021b53782800906cd13b70be3a4161e8b300b97f984a959ccad6a6e8adcbd", + "blk.10.ffn_down_exps.weight": "f08eb91526bd22f5fd0402fe925d6141cdbb308a1ced0330858d0c85c71f5ef3", + "blk.10.ffn_up_exps.weight": "a9f688350c3b53eaada5103b5848bd9a3d7d6b327a70fa16c24bf28ece933eac", + "blk.10.attn_norm.weight": "5ba426c9dfc79805015ccd76cd1068b0ad3bb7a8453e14bb1d35486f122d8f95", + "blk.10.ffn_norm.weight": "98891d6acbc3986b2581b7a3af9f5946a392d9188972c6a8b15d4e745a4f2482", + "blk.11.ffn_gate_inp.weight": "b2365a60566e7dace892e1cb0e62eb73ce387352601723e847052b34874feaa6", + "blk.11.attn_k.weight": "0efbc1d1430505543ff71532a4fcda821aeac616ef6c1dca40e00d4f2ff70bea", + "blk.11.attn_output.weight": "3d5bd4d9a41236f30d4293edb9ae27beaa113ffb31b4fbfadff3a4c370dfd3e6", + "blk.11.attn_q.weight": "aa11e9db14dd9c77951511443077c2a1a78070753d7bd3d9811038473f69e325", + "blk.11.attn_v.weight": "5adc567f377aa11d1763d35f50e53fb2896a8b03b623ac36acc45efa2486d512", + "blk.11.ffn_gate_exps.weight": "71d07d982aabfab9eed3c733d49c20f023bf475368fc71db5084d91beadc4b47", + "blk.11.ffn_down_exps.weight": "9a06e61461e48b3925a9f7d9cca634d048c8b62163d7bc5c43e35899f959319e", + "blk.11.ffn_up_exps.weight": "bc05494d0dcec61021b3ac0c5bc1bf502736cadf48224e213bc139d562699a89", + "blk.11.attn_norm.weight": "a5758a10bdd0404ae1470e8e9db903985d4d07f60553c5001a5e7b660d4f7ada", + "blk.11.ffn_norm.weight": "814ae037563aad3771787316bec4806c95bf6f5991dd6474b4b1e5cc13dc18ee", + "blk.12.ffn_gate_exps.weight": "3a68b831ba1606fb9ef6dffed4732032447ecef23ea563ff4e79317586c7eb49", + "blk.12.ffn_down_exps.weight": "268b25e13f4b7beab08686e83705a41b21d15251809ee4784526f78a580da829", + "blk.12.ffn_up_exps.weight": "9105751a5b5b42ca2614d0456f24f779d2e2ac8cdff0f96842aa7ae2b70f341e", + "blk.12.ffn_gate_inp.weight": "d0de1558cc1d458c5c504f63ddc59785c323df7330474bb0644c346104b40a3a", + "blk.12.attn_norm.weight": "859a4c8113678e2e202d10299850e0cfb52eb11ea50bcbf4fe3ff39bdd394154", + "blk.12.ffn_norm.weight": "7fbf4c459c1760218877e9ee3f5ad49e960956a4369bcfe96c143f04ff9ddf97", + "blk.12.attn_k.weight": "0a7e254fdf3730a57372b6ff421a613eabaea68cdefd64800857941411318374", + "blk.12.attn_output.weight": "ceb763fc15d88af149d8fb78e82db2b7dab3aeae584af8cf7611a12356a397e5", + "blk.12.attn_q.weight": "a43402d23c46cb2d3cb3c2a98c81b19d10026b7e6742370fed6b2880b6e049b5", + "blk.12.attn_v.weight": "3bc24f2c0480ce91ef72993ee8f1cf962f7359e12183424583ffa1246bf3db52", + "blk.13.ffn_gate_inp.weight": "a6d68c82bfe66d8bab68f980f5f18268a9e2c0cd6b8832ed39010e0de198ae05", + "blk.13.attn_k.weight": "0166c39546b37dc2e01b2b396ba43e183f797dd04eaa51a6d103d8b58ee4bace", + "blk.13.attn_output.weight": "2ce5eb198deab9557475a58b69b11e9874b547e05c23f223c6e42fa35ddca069", + "blk.13.attn_q.weight": "745c1bbdf434284a7fae98f45e821c076dd9c2a2467dba6a9d8cf0041e419dbc", + "blk.13.attn_v.weight": "9ece68d5ac64d1421ea7aa32e1cff9cc1fecf5175f4c4da858dd31d8633e3337", + "blk.13.ffn_gate_exps.weight": "ccfdcb4670b131689de12d396a010b5ea737795cf5c15a14a304d720b3c7c899", + "blk.13.ffn_down_exps.weight": "8b8fb328664764f1aaa5cbdec336d5654e981e965a02ef622bde5f07ea1c164d", + "blk.13.ffn_up_exps.weight": "d2ace0236c2fb3365fdc85499d676a7f65813c48e5085348b1df1799922766ec", + "blk.13.attn_norm.weight": "1ed29d7d89ce52d7cb4d57e895ff7115430466e917136c049c385c030ed44e9c", + "blk.13.ffn_norm.weight": "a194fc542597a4dcfdfaec5e3cba2a2b2b21b21edfc87c39c0d7f7651355bc4d", + "blk.14.ffn_gate_exps.weight": "a625e3574e5e740e7f8e2f9c40390f2f382c720aab5b10534e298002dd8d1fb9", + "blk.14.ffn_down_exps.weight": "bc366f015b83c865946afd74c8a884943e0ea2c671314a0b7bb72f21a44d2f78", + "blk.14.ffn_up_exps.weight": "ee3199bf2086de77b49f57f487676be8ee70e102a2fb5a5ef8ddbbc28a9eff41", + "blk.14.ffn_gate_inp.weight": "2b437870c850fa2e2044d032bb02908af634356e37466fdae260b933e48ee8b4", + "blk.14.attn_norm.weight": "cd8344d193a1cbd42bd898e17f4bcb1ca0b2918420fbdafa9249a6f2b7f4ae06", + "blk.14.ffn_norm.weight": "70eec40374e558fed5b07257283cf36342b6b0129285a00007deb59c32c9f7c8", + "blk.14.attn_k.weight": "4053bdb507e0543d724b632570bac86b31707696d90a0db44c49b2a082e0d599", + "blk.14.attn_output.weight": "0182632cb0e06a07241b8293d25d109fbc1862e1e337d435f908e8681e2eb1ab", + "blk.14.attn_q.weight": "ffc7794a4c1b6f793c842dba969435330a7a80b9212e457b4b2ac33e68b41241", + "blk.14.attn_v.weight": "6411805292d528e61bbaad8f9aab9dd073529a17946c057fb06864fad9cf3211", + "blk.15.ffn_gate_inp.weight": "77d0744567c76e6abb67f81ba9c715b2b544841186d5b948309571eff213bafb", + "blk.15.attn_k.weight": "1f7957954ea4c6521c257b35a360e868ffa02bdb3de91f146d5e06bb4a545c98", + "blk.15.attn_output.weight": "d7809d36bd8d3342240c46fd87bcc7f9821a222f48d9a95e45ae50460265d3cf", + "blk.15.attn_q.weight": "25f509313ae4d8401b871904059f472a26f5714e7c791c725de77a1a522c976e", + "blk.15.attn_v.weight": "96fedf5a591fc0f020e6de10fd72ff12b3ef9cf70cd21dabaa0d3e7b06f54e73", + "blk.15.ffn_gate_exps.weight": "8f950d976b2fd9a3d213b84123cf114c1377efde9352767fb2ddee89e177c8ef", + "blk.15.ffn_down_exps.weight": "6fd09d1557bb94b06efbd4f6a1ca4be532a202ba290e9315bc8da3d12a5c4c4a", + "blk.15.ffn_up_exps.weight": "cbeb59ae7b0266a928dc7e3a6e70a9330b92f9ee1b17ee1ed91022108204a33c", + "blk.15.attn_norm.weight": "2005330911ac2edc7b6d27aca021c67d30d16eb632e49b1a13f30fdb2717aed0", + "blk.15.ffn_norm.weight": "0e9198f3b548eb78acc8961f2b3350d238d26cec110933ba753a8cf0035c501c", + "blk.16.ffn_gate_inp.weight": "a41d1f99d739c8b150c3945b6949763988d0c6a4c5a2b5855592ca1a48ed23d5", + "blk.16.attn_k.weight": "b624e2ec88c2d3047f60530fb87e72cb4a5e655a9663f6f3e9b09e5ad32cddaa", + "blk.16.attn_output.weight": "687759ea75e45108526ffc1573d6fdf084728079bfc2dc89b9979e76280f43c4", + "blk.16.attn_q.weight": "beff3a45c7e9ec82ffc6d3c701126be28654d10aabd747d03441210491fd31b6", + "blk.16.attn_v.weight": "43a349b13f0b9d040cacecd942bcb168c030fef8c75c987d59a4fce6c14e855b", + "blk.16.ffn_gate_exps.weight": "793406d6c13d727c82bb7b692ca98d65ca975baee69fc57be5378d77c5a19b62", + "blk.16.ffn_down_exps.weight": "9bad3dd150d0230404b7f886ac7ff8803225757e813f195cdb26bad245243b4d", + "blk.16.ffn_up_exps.weight": "7449d663023fea3496475bf0a9c1de7272ad0ce9adcb3265e8e424badaa674dc", + "blk.16.attn_norm.weight": "a424ce34c195a401df1ce37ac4f2794e8a6720b1ee8acb21428e2b68c65e0125", + "blk.16.ffn_norm.weight": "405a68bb8e16e1064df2de55ca3cd9ceddda1d9fc0af007a9bd7cad4b2676248", + "blk.17.ffn_gate_exps.weight": "97c6e5321491ca5dc039ee88da0eb0e78f347372785411809af84b3298cb19dd", + "blk.17.ffn_down_exps.weight": "1617ac19788a1be19bac69277408761e6bdf5719d63a8c7fea14d41cc27641b5", + "blk.17.ffn_up_exps.weight": "4ead1c365f112581c10610ea3f63d2a1474311d2503d2060fed4b458ef337f5d", + "blk.17.ffn_gate_inp.weight": "ed4b3393f2523f2b5e0fc7680a1caa2842e605728a529b5af68a7fa8d7abf940", + "blk.17.attn_norm.weight": "beac17ef86a7fb2b5840cc72f7a95a5e3d6bd24e7fa698e0b0ebb9bdac45c561", + "blk.17.ffn_norm.weight": "81cb58ec6d6dc02a0b4ede10adc336dc865fa76f982d4eab0e4a37b40f5b0fac", + "blk.17.attn_k.weight": "eab569e5ea8c8b05e5a6a209fba031129453c2e28181eee3e736b3b04b36bbec", + "blk.17.attn_output.weight": "f85b70f01438ce8fe5d10599b113f30bf18dee2bbae0657d3eba295870001db3", + "blk.17.attn_q.weight": "887ceebfbf6a2b94b43d2df4439ac3a5bbc29311d4b28addc04d525546032047", + "blk.17.attn_v.weight": "2df9414d65014c06a93da22ba3a668be7b83e2e8008e98d7771f7dfebed98298", + "blk.18.ffn_gate_inp.weight": "9b07741a0950fc667e5fd25937e33bc22e1f764f80eb4ff3119f005327ae0f6e", + "blk.18.attn_k.weight": "8649598dbb63938744c39bcda5ce8c31773e29c573be8d4d2c114f5030f8d3e8", + "blk.18.attn_output.weight": "f8e391adb92622298ca834d5d1eda48b69c3b1c51c5a584ef6c54a725c298d75", + "blk.18.attn_q.weight": "84bf8708a2eed618f48f69c178ed7dd11fa4c468102376e72e910ebd037d131f", + "blk.18.attn_v.weight": "31db3cd773f09548c2c1b1eac2718e46364a7810970fe9c433fad9d8de5397eb", + "blk.18.ffn_gate_exps.weight": "be2a2ba378002f1b61f86c273a69eede9b93786d5ce96b4fee1861f730dca4c4", + "blk.18.ffn_down_exps.weight": "d35196159e37705db50a5343e3989f7335477f1a4add67ef42ad64a638cd07ae", + "blk.18.ffn_up_exps.weight": "c6ceedd86e97913a6dcadc838e7abb762d629fb8dd55f15cf02fd9bd66d2ba78", + "blk.18.attn_norm.weight": "41f0b1ad83d6e3cb9fbe0d27878c2e7ad4a351b9f554a6bc9117c01745cdf6e5", + "blk.18.ffn_norm.weight": "96646204bd0d82f25dc77faba4dbd86b1332e449313e6684e00122da8be99057", + "blk.19.ffn_gate_exps.weight": "c6eb7f61e7938bda0492dbc05e51e8f631c99224fe18e99861fc4fc53ba9e9ff", + "blk.19.ffn_down_exps.weight": "4384803da3a3a3d44120d7dd192fe2c9bbd9a1a0cb492dbec1fdd7565230f1e8", + "blk.19.ffn_up_exps.weight": "22d73de2fbb8bb0f1bd2caf17fad8a355c47d914143f7f6e6d0128f66f074a60", + "blk.19.ffn_gate_inp.weight": "9a0cc4a2301a5634022fbce41189021bf0d1a961792d2d9330fd35556d18e5bd", + "blk.19.attn_norm.weight": "c5cc56ec5df9a1f7d5ad71fbda49f1433132e58895d45cb44c73420bd61ebd6b", + "blk.19.ffn_norm.weight": "77e17de741742ef2482fc7872fd423c8e3c1454dc4d2be89ee939084b6d78bc0", + "blk.19.attn_k.weight": "a92ea36ce2e3569656306aeefb835ccd5d1b03b33a86e0d3d030644cc923b813", + "blk.19.attn_output.weight": "5e2a912b37855f84ea964907a1a86d609cbdd79efa0c93c3e8e2fc07caf7c226", + "blk.19.attn_q.weight": "4ef3a5913292ac3c1a6fd3e9e53d011021f2b41d0276cf849706d1ca925cf7a7", + "blk.19.attn_v.weight": "42981b75b68ae852cee638b5433605c147da4392aaa6d7a06e756115b0171f39", + "blk.20.ffn_gate_inp.weight": "71381b9879a7c80b9f7b475abc0aa31b8cd71ccc00856ebe89764a2acb9df2dc", + "blk.20.attn_k.weight": "1928b7ebc054eb3967929ed6fb446314d5352f4aaf8b475ce55c6345019f2ea4", + "blk.20.attn_output.weight": "6071ecd9ca91af0d2ba93fef4a1a56f3b243dd70f862a21a2d164d56f386043b", + "blk.20.attn_q.weight": "002e95042a40f36ceed5829e3d0c8072e5f5e4ee86a089e2902b2348fed24dd5", + "blk.20.attn_v.weight": "42f509cdb1c0e298f89f896e349be86952c5168e49b3f83bb17badbcb7596d57", + "blk.20.ffn_gate_exps.weight": "a684a3ffe4b0a57c819a5fa9cb3521de223f392732927271e97ce925b6e33765", + "blk.20.ffn_down_exps.weight": "e3081a7bc7ba750d8a4886bc8ca4f231b55db4ca082b54b4106c7531964725cb", + "blk.20.ffn_up_exps.weight": "fad0fd5eca36ab154788da28be8ec25bb5d6db06c9d133db89e96df358a2f6a2", + "blk.20.attn_norm.weight": "c3e3f2429715ae95e884ef1246b0b461b23c5cc0ed08beecf70a14cddd184820", + "blk.20.ffn_norm.weight": "ff31f609dda65ca496b0584fabea6550e42edd05ebf229812aa6b7bb5ede15e6", + "blk.21.ffn_gate_exps.weight": "366f09ef0ecfb86808eb3296cc9abdb957951d27f6533c03f1422b54061da660", + "blk.21.ffn_down_exps.weight": "3fc495947d27fcca7fc0893c8a96e5d48ba27b2c8c58f8fcfb8dcfcd5539741c", + "blk.21.ffn_up_exps.weight": "6713ed51410bcc8283cbb001c4ad784098f25701e8021f4fa4f411e186859c4a", + "blk.21.ffn_gate_inp.weight": "6d4c92c01ec801647134d907bf1108878156df266a6107abc10526332b328b93", + "blk.21.attn_norm.weight": "27605719ae2df24f4f2e85a730927cab20367631612cb501631f6bbf38eb1209", + "blk.21.ffn_norm.weight": "ca80ee8177db185b15a4a378c1cb6f7143c76546a7f1726bda23f329323d4ffa", + "blk.21.attn_k.weight": "9e49f743d4a5bda9b4bd9c40c2ca37cdae5aec7e54cb193897ac8b4945ada14d", + "blk.21.attn_output.weight": "ab923540879753feaed152f5950f69cdd83d8f2413ca873f5f038b63ab0aea12", + "blk.21.attn_q.weight": "62617fc3f1c9d2aa672a4d91a121c7a91b92d145b65e75f0b06b4bb7c825dc36", + "blk.21.attn_v.weight": "15f8b2e72f8e8e992f2f6b3e93238a9d7be7bd6136f91c9d04b4b4cd0cd60369", + "blk.22.ffn_gate_inp.weight": "3ddb1773d9257b68add7a2a4e94dad25ed926803e02707863dd742ab9b2dc179", + "blk.22.attn_k.weight": "680e45a9e8d5feddee5266e119dc053bf80718fa9af1cf6803e6f493b265f1eb", + "blk.22.attn_output.weight": "0d5fae3402fb2c5aa3a860010e3973fc8e3168d1015f7a76b7b2964681693206", + "blk.22.attn_q.weight": "eee7e3d426ab533bd18d62c9aa142eedbde394bed07db58313e0fccc82a23237", + "blk.22.attn_v.weight": "26b5be1fe3c2b6824c5a648a3e4bdf17691904526fca158fbc3ebb627b67e2f4", + "blk.22.ffn_gate_exps.weight": "32ab7a7735313d60f6a75229b1aeee940b6aee176c9648536bf5921b0dc2929a", + "blk.22.ffn_down_exps.weight": "67590808f6a67777d3eb7976c31fe616d388b98fecbb12253b72d1241d70753f", + "blk.22.ffn_up_exps.weight": "fc245c0183e6d90829ff5e71a4ec93e4860b3d4c1a17b9dda2fb64f5f5c9ed32", + "blk.22.attn_norm.weight": "128e99d206d4d6724758ec97468af767fa0aea592149c324b731659c1e74a1a8", + "blk.22.ffn_norm.weight": "e45f498033f0cffa15da0eff2c47b4472e43fcf8921729fc4eeb2e3a6b3c78e2", + "blk.23.ffn_gate_inp.weight": "d63e686f5325fbc89fa242c2c52a3b8ff54f867dca914c9ae6eea13e9d6f46e5", + "blk.23.attn_k.weight": "f71f5a577f46ea12b1818f3a5ff4b85ddc45f9a2afb0fa2e041d71a3e31c6779", + "blk.23.attn_output.weight": "92b13563c1e0eac0d748fb67b235dfd7a64c8f16e2dafb316885744582e23b4b", + "blk.23.attn_q.weight": "2f9b9c35dc4f912f3f51c06e2d68f417b51a0de0a84aac530a64f9d3d7b0a2dd", + "blk.23.attn_v.weight": "268e40813806e74a5c364b19556d087bf8374e76e7b6fcf55c381eb7da13ccd1", + "blk.23.ffn_gate_exps.weight": "12f857e7a7ce228afac34d99b602c8d6fe96984f2a21118f459a58cb767ee65e", + "blk.23.ffn_down_exps.weight": "cdb082c16599c3bb36a28066dcc122d9529b54fa91b6cf0153437ec960a5e16d", + "blk.23.ffn_up_exps.weight": "f4b99f6f44d7b8b5a305894e88633bf5938fc1f6303a2b2092399da9c8b64d7c", + "blk.23.attn_norm.weight": "a691392210383915916b4d3886d5e4d56e7855e27e37e414fbd73bf66b3712e6", + "blk.23.ffn_norm.weight": "0c3dc72f667e5ae19b69bfa9f2bd2a01a57681f89ef9527bad4eb0d8c7b70da8", + "blk.24.ffn_gate_exps.weight": "86baca2a3157994df7fd8ced5e08436d5c1810dc29c0715637c36de723e0e7d1", + "blk.24.ffn_down_exps.weight": "ac5d559562b35c34993e34b071f66d15c65be5907797078c2d2a49aba54e3192", + "blk.24.ffn_up_exps.weight": "fce0a099cf09777f44fbab3606ceb75f7fae6f0b80725f9e871654b8cdf9262a", + "blk.24.ffn_gate_inp.weight": "e7c6800c0cfc56b565b2d35ad6f1dbfdb70dd0b05b338bc8da2286ffc3678d79", + "blk.24.attn_norm.weight": "dc6cc18ec52d102d015153c4a1132f9d7a504e29cbdec81c5edbf3b9e65815e1", + "blk.24.ffn_norm.weight": "480d5a1397af5e0e657f1e67d20ec0cdef5724e71246a326843321b87ffabd33", + "blk.24.attn_k.weight": "338c0597954a9b95a782545b2fe36469553e73f86ae2d2b5697767b28e1c7daa", + "blk.24.attn_output.weight": "a77d23b79933c67e52f1eef7f83a3dff4f767ce0bbcc39572f8cec4acd457643", + "blk.24.attn_q.weight": "45c9478593002be1998e96e70668aafa2dd3972380fbc1df12fb05c24ba959e0", + "blk.24.attn_v.weight": "515729420885408a6a9614bc27cda393ed907521318d14d21335d39a3eff0b61", + "blk.25.ffn_gate_inp.weight": "aae4ac40e9ab3925241f9d784b54b38851d9bc999a6c3bc03fc3f17c9b28a67c", + "blk.25.attn_k.weight": "4ab4808d02396c35b00b426f536015673b71c17ae6cd55bbc2e6bfe7a4c59d0c", + "blk.25.attn_output.weight": "1990bb982b77e0c947cd1a8ef0b36227ee1259e6dbbc2829e5c136edf88675eb", + "blk.25.attn_q.weight": "a1490f3048e8c0ec8784f8550c43adf5cc8d0f2f90131c934713fe4b1b015bd7", + "blk.25.attn_v.weight": "f15e53c6d45b3b6f58808fa968425d65e0b26b7f9b268127a77abb1227c67431", + "blk.25.ffn_gate_exps.weight": "656662447ff54f56ee80f78a1b9483f7efdc40f7375d0cd8a9c72ccf21f77e7b", + "blk.25.ffn_down_exps.weight": "db06f101bccbaef19cced0f6c185166e18202465f4a42cddfd535fbe5cbabb4a", + "blk.25.ffn_up_exps.weight": "584a7b02456f27fe1d8d3c7ccd21d426b6ea887795a3ed77f704596a1e3841d7", + "blk.25.attn_norm.weight": "8f0f3597982930fd237e9d609776c64f2b909a455b21678f83a7ebd4bbb83e64", + "blk.25.ffn_norm.weight": "3e7079c32582afba0c55e032f254adc18d2997705eec860185e9a6dd3d82f07e", + "blk.26.ffn_gate_exps.weight": "e70341691b583b86489812b29b77aa41eb658b1865733d6118da54c66e3bfcc6", + "blk.26.ffn_down_exps.weight": "5c1b812d11dfb064af816ced5ab6463bf9722eefdfc341b8a93705d5038fd781", + "blk.26.ffn_up_exps.weight": "e18118362ae54ef7432781c83884f9fb230a9d934e342aabeda8822ea5f71fb6", + "blk.26.ffn_gate_inp.weight": "cd1c5f6710166b9567c6b74c97b2348b191c60aa860958c6bc264ab095261dff", + "blk.26.attn_norm.weight": "71d087531af2520bda2e676c489e8529cef5db8aeea1eec0a937a8b4f2fa2e54", + "blk.26.ffn_norm.weight": "7f704e936fda28eb5c2cc339f0f6a5f78170b5aa43c01265b21668870d819c82", + "blk.26.attn_k.weight": "1cc62a0ce0ae251275d898c52c4a9fba5995fca10955d2011d10dd1a59e1afb8", + "blk.26.attn_output.weight": "636e881b1505f9cef656a4be98bec6a4765321d51f9bf1dac8933397cf44b765", + "blk.26.attn_q.weight": "89a3c4d202d7d6adebb9e0c1bcfd8b775f6456386f1be25e86e43acc949c1e16", + "blk.26.attn_v.weight": "ff2cc963b597cdf1a21703f3e7022af3bb4c65a34a19e19d9309a7c5e198b5bd", + "blk.27.ffn_gate_inp.weight": "6150139498fefe380bb99d11e72028da47a15ecb73dfc5b2774f726f4bed8f9e", + "blk.27.attn_k.weight": "f286eb9e5c56c7b801a497aedc40158c2a27877d7f9fb59b3fc67834798902d2", + "blk.27.attn_output.weight": "5dc3d3a05f9f7729509147fd09c16fb53f85f520cdab5cb69abf4bae3fd460c7", + "blk.27.attn_q.weight": "8462e40f86b24251960d6f35a9ea99b8793a01937faf1aec2859f2e5395dbb61", + "blk.27.attn_v.weight": "bac1a99e38e25953f8315f7212eb9777dc216cadb09b959977885ae62724ceca", + "blk.27.ffn_gate_exps.weight": "6a15eca7f0f6ecfd93db2e55c63875348ec4a78c4ff643ec46df9e958c0101e4", + "blk.27.ffn_down_exps.weight": "2e1c91247c4359e2073a8e5f26fd7f6426da7be3ed5bc65dcfff701f0a5022b2", + "blk.27.ffn_up_exps.weight": "65d6f5c553c9332085eae4aeadf25090b5d7768212ea7b08ed698102c21b29a1", + "blk.27.attn_norm.weight": "7fab8ae63ec8e91ce625cd130ab96d8427dad3a7413bb21b25ec5f408c5b9f5a", + "blk.27.ffn_norm.weight": "532720546b0fdcd423a02ca6e3e9d8aacb84b1b3e8269968f88a47fe2a69bab4", + "blk.28.ffn_gate_inp.weight": "a305ea58d98962d9dcf0c53ad2389b7acc8936fb35a0e3fc9410e7767cd49dea", + "blk.28.attn_k.weight": "8315e8a2e4f78dfdf36d4fc18fffc74bc95fe42c3ae4f9af2b6c874612c0f71b", + "blk.28.attn_output.weight": "9b5fdedd32d39ef46a22cca7cd5355d7b93bd07ea305f466a8aad6ca5a4f3778", + "blk.28.attn_q.weight": "4e8fb96997c30e231c437130f410d7c91d541a816f6c568b5f3bfdb4b8dece74", + "blk.28.attn_v.weight": "1fec739cf3bd7b4913f72ca358d4cf31391c304de44ac0ae31ecb825beaa7cfd", + "blk.28.ffn_gate_exps.weight": "9f259789d535e09268266b9a8020f32d6a6779966c909d91d3a10574f06238a2", + "blk.28.ffn_down_exps.weight": "516d3f8abaedb01b9916a4b67d4672159769138ef2850158bc1b32c41e31f0e8", + "blk.28.ffn_up_exps.weight": "f2f1d88d2c31ed588806fb5ad981d68f5134d7284c4fc022fd018de2eef437fc", + "blk.28.attn_norm.weight": "960fd005598deadaebd969996f4367a9dbfad90539a863674fe95730935acc64", + "blk.28.ffn_norm.weight": "e1993b37ced93d4049e9af2c47b0d9207d8f7e6f2cc3a52f57bef30bc806d805", + "blk.29.ffn_gate_exps.weight": "58927146338f443513337476b3cd30e6341742f096c2beb5890d400f10121298", + "blk.29.ffn_down_exps.weight": "03a3386e4f0b75a28c5608e23b2de8f0de25f21954e4aa7fc343431bde9db07e", + "blk.29.ffn_up_exps.weight": "6916b7490a7ae7b04a5d81cc1e7ac9b20c483434f3b186b12d87fe176bf1567b", + "blk.29.ffn_gate_inp.weight": "98e710e467a3d567abe4ce29d78b8e8dc033148762290c0c5e1ae4d78efd8c78", + "blk.29.attn_norm.weight": "4e64cb307d37be20d55f38c94faf7e451d11df5e60df347906cbaf9c5441be71", + "blk.29.ffn_norm.weight": "696c23a52f742679bd44440d687a4c44b4302d57f1e9dc5610d23374336187e7", + "blk.29.attn_k.weight": "e85253652fd6120c623634ba66b725bf7cd491318b54ccdad2c7df8851d64c0a", + "blk.29.attn_output.weight": "4f650a71efb150d1f24cd4d114d4187bf570ac424da3b92ea6455abdf1aea705", + "blk.29.attn_q.weight": "69fa7da901026ebcbbbc848455b425458b7e3295007d7fc093acf4b38e2166ea", + "blk.29.attn_v.weight": "17e2e7590b317b21f106de546aafd955579703d1e95d6aea044ee72ec3a514c9", + "blk.30.ffn_gate_inp.weight": "3a03284b4aa60d59d4a2ec86253469b61fc656372afca427cb77a5332fbcc62c", + "blk.30.attn_k.weight": "d518cfd0db9708e769eb1399e87ee49357dc54d5afdbac3d4c0ca46c64e789eb", + "blk.30.attn_output.weight": "9b44378714d784c5ef9ab604359091baca4e0ec222afa139b7f840eaefb371fd", + "blk.30.attn_q.weight": "cbb95365bbfbcad0c9cd99b4eebb5a5d32de68ce08e4063b5ec3e792b7548044", + "blk.30.attn_v.weight": "e7985c04fe1740e35a9598f43b67b0922b4fc2d00b68a92a9f917b82c3248de1", + "blk.30.ffn_gate_exps.weight": "8ac4bbd07935d98f895ba94dc174e5ad5046c3c222b53729d60f987c05e7eb70", + "blk.30.ffn_down_exps.weight": "dd672cc71e82abf05064a18121b8e55fe1a4f19bc1d7cb9a142f4add54bc336e", + "blk.30.ffn_up_exps.weight": "12282f664a2a12aa25e2deac58946108715ebb978bafed5274cef24569107646", + "blk.30.attn_norm.weight": "1a33458fee054c6c9c896a4bb0a4e1fbfa0293b2408c7dd2b81d692e966e7273", + "blk.30.ffn_norm.weight": "311e33b68051f507f1478ed8f2693fddb846170ddb7285a91be43f795c2ce31e", + "blk.31.ffn_gate_exps.weight": "8af43d9867a51cd8392fb48b981b0ceee0ae979c491c07d711b3b56b5162c786", + "blk.31.ffn_down_exps.weight": "5579cb7758c1600b19d1f540deffe081b575962e37437b3b2efb2fb0a2924e40", + "blk.31.ffn_up_exps.weight": "f2e7c005276b3a001fb40753f027fa10b4d5a346f43cf4b4bbdeec6e74e1cf6a", + "blk.31.ffn_gate_inp.weight": "89885dc0e30b6b16a90c0331d7fa3174671e941364e8102d934f02132237e61b", + "blk.31.attn_norm.weight": "99e4e9bf86a9edf8c404153a7e8a82324ba79da462622196e2faba161bd95172", + "blk.31.ffn_norm.weight": "55335997cf6de781bf332b943de96ff4646966b05d9fee86b76ea897e27b6ca7", + "blk.31.attn_k.weight": "cee570762b78da6316b637892cc4b080e40f57af5551ffb1866b9a8e80e96628", + "blk.31.attn_output.weight": "fa321ff55ec7819ead7b819fd45215262f39744569765ba2113c989c03588802", + "blk.31.attn_q.weight": "9e2c409b878f8a2a1436874abf428fceb1c534b21f9ad4dd6f532b8a469007f0", + "blk.31.attn_v.weight": "a845d0be68ba537b4a775bfba4d897faf7c82a811a2612b0b7420cc4f3574cb8", + "output.weight": "16101cbb74b54cda9ebc07ca3c762e3263a56efb3cc011156184b95807d7cf13", + "output_norm.weight": "d7aa61585baedd60157aafe157930785742c55989c288573566a971b02423564" +} diff --git a/convert/tokenizer.go b/convert/tokenizer.go index fd6df5f5..baee04aa 100644 --- a/convert/tokenizer.go +++ b/convert/tokenizer.go @@ -3,19 +3,148 @@ package convert import ( "cmp" "crypto/sha256" + "encoding/hex" "encoding/json" + "errors" "fmt" "log/slog" "os" + "path/filepath" "slices" +) - "golang.org/x/exp/maps" +const ( + _ int32 = iota + tokenTypeNormal + tokenTypeUnknown + tokenTypeControl + tokenTypeUserDefined + tokenTypeUnused + tokenTypeByte ) type Tokenizer struct { - Version string `json:"version"` - AddedTokens []Token `json:"added_tokens"` - Model TokenizerModel `json:"model"` + *Vocabulary + SpecialVocabulary []*SpecialVocabulary + Merges []string + + Pre string + Template string +} + +func parseTokenizer(d string, specialTypes []string) (*Tokenizer, error) { + v, err := parseVocabulary(d) + if err != nil { + return nil, err + } + + t := &Tokenizer{ + Vocabulary: v, + Pre: "default", + } + + addedTokens := make(map[string]token) + if f, err := os.Open(filepath.Join(d, "tokenizer.json")); errors.Is(err, os.ErrNotExist) { + } else if err != nil { + return nil, err + } else { + defer f.Close() + + var tt tokenizer + if err := json.NewDecoder(f).Decode(&tt); err != nil { + return nil, err + } + + for _, t := range tt.AddedTokens { + addedTokens[t.Content] = t + } + + t.Merges = tt.Model.Merges + + sha256sum := sha256.New() + for _, pt := range tt.PreTokenizer.PreTokenizers { + switch pt.Type { + case "Split": + if pt.Pattern.Regex != "" { + sha256sum.Write([]byte(pt.Pattern.Regex)) + } + } + } + + switch digest := hex.EncodeToString(sha256sum.Sum(nil)); digest { + case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f": + t.Pre = "llama-bpe" + case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02": + t.Pre = "deepseek-llm" + case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e": + t.Pre = "deepseek-coder" + case "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855": + // noop, empty pretokenizer + default: + slog.Warn("unknown pretokenizer, using default", "digest", digest) + } + } + + if f, err := os.Open(filepath.Join(d, "tokenizer_config.json")); errors.Is(err, os.ErrNotExist) { + } else if err != nil { + return nil, err + } else { + defer f.Close() + + var p map[string]json.RawMessage + if err := json.NewDecoder(f).Decode(&p); err != nil { + return nil, err + } + + if template, ok := p["chat_template"]; ok { + if err := json.Unmarshal(template, &t.Template); err != nil { + return nil, err + } + } + + for _, st := range specialTypes { + sv := SpecialVocabulary{Type: st} + if bts, ok := p[fmt.Sprintf("add_%s_token", st)]; ok { + if err := json.Unmarshal(bts, &sv.AddToken); err != nil { + return nil, err + } + } + + if bts, ok := p[fmt.Sprintf("%s_token", st)]; ok { + var content string + if err := json.Unmarshal(bts, &content); err != nil { + var mm map[string]any + if err := json.Unmarshal(bts, &mm); err != nil { + continue + } + + content, ok = mm["content"].(string) + if !ok { + continue + } + } + + sv.Content = content + } + + if id, ok := addedTokens[sv.Content]; ok { + sv.ID = id.ID + t.SpecialVocabulary = append(t.SpecialVocabulary, &sv) + } + } + } + + return t, nil +} + +type tokenizer struct { + Version string `json:"version"` + AddedTokens []token `json:"added_tokens"` + Model struct { + Type string `json:"type"` + Vocab map[string]int `json:"vocab"` + Merges []string `json:"merges"` + } `json:"model"` PreTokenizer struct { PreTokenizers []struct { @@ -27,80 +156,106 @@ type Tokenizer struct { } `json:"pre_tokenizer"` } -type TokenizerModel struct { - Type string `json:"type"` - Vocab map[string]int `json:"vocab"` - Merges []string `json:"merges"` - Tokens []Token -} - -type Token struct { +type token struct { ID int `json:"id"` Content string `json:"content"` Special bool `json:"special"` UserDefined bool } -func (t *Token) Type() int32 { - switch { - case t.Special: - return tokenTypeControl - case t.UserDefined: - return tokenTypeUserDefined - default: - return tokenTypeNormal - } +type Vocabulary struct { + Model string + Tokens []string + Scores []float32 + Types []int32 } -func (t *Tokenizer) maxID() int { - return max( - slices.Max(maps.Values(t.Model.Vocab)), - slices.MaxFunc(t.AddedTokens, func(a, b Token) int { - return cmp.Compare(a.ID, b.ID) - }).ID, - ) -} - -func parseTokens(dirpath string) (pre string, tokens []Token, merges []string, err error) { - f, err := os.Open(dirpath) +func parseVocabularyFromTokenizer(p string) (*Vocabulary, error) { + f, err := os.Open(filepath.Join(p, "tokenizer.json")) if err != nil { - panic(err) + return nil, err } defer f.Close() - var t Tokenizer + var t tokenizer if err := json.NewDecoder(f).Decode(&t); err != nil { - return "", nil, nil, err + return nil, err } - tokens = make([]Token, t.maxID()+1) + var tokens []token for k, v := range t.Model.Vocab { - tokens[v] = Token{ID: v, Content: k, Special: false, UserDefined: false} + tokens = append(tokens, token{ + ID: v, + Content: k, + }) } - for _, v := range t.AddedTokens { - v.UserDefined = true - tokens[v.ID] = v + for _, t := range t.AddedTokens { + t.UserDefined = true + tokens = append(tokens, t) } - sha256sum := sha256.New() - for _, pt := range t.PreTokenizer.PreTokenizers { - if pt.Type == "Split" && pt.Pattern.Regex != "" { - sha256sum.Write([]byte(pt.Pattern.Regex)) + slices.SortFunc(tokens, func(i, j token) int { + return cmp.Compare(i.ID, j.ID) + }) + + v := Vocabulary{Model: "gpt2"} + for _, t := range tokens { + v.Tokens = append(v.Tokens, t.Content) + v.Scores = append(v.Scores, float32(t.ID)) + + switch { + case t.Special: + v.Types = append(v.Types, tokenTypeControl) + case t.UserDefined: + v.Types = append(v.Types, tokenTypeUserDefined) + default: + v.Types = append(v.Types, tokenTypeNormal) } } - switch digest := fmt.Sprintf("%x", sha256sum.Sum(nil)); digest { - case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f": - pre = "llama-bpe" - case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02": - pre = "deepseek-llm" - case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e": - pre = "deepseek-coder" - default: - slog.Warn("unknown pretokenizer, using default", "digest", digest) - pre = "default" + return &v, nil +} + +func parseVocabulary(d string) (*Vocabulary, error) { + patterns := map[string]func(string) (*Vocabulary, error){ + "tokenizer.model": parseSentencePiece, + "tokenizer.json": parseVocabularyFromTokenizer, } - return pre, tokens, t.Model.Merges, nil + for pattern, parseFn := range patterns { + matches, err := filepath.Glob(filepath.Join(d, pattern)) + if err != nil { + return nil, err + } + + if len(matches) > 0 { + return parseFn(d) + } + } + + return nil, errors.New("unknown tensor format") +} + +type SpecialVocabulary struct { + Type string + ID int + Content string + AddToken bool +} + +func (sv SpecialVocabulary) Key() string { + switch t := sv.Type; t { + case "bos", "eos", "cls", "mask": + return t + case "unk": + return "unknown" + case "sep": + //nolint:misspell // this is an upstream typo + return "seperator" + case "pad": + return "padding" + } + + panic("unknown special vocabulary type") } diff --git a/convert/tokenizer_spm.go b/convert/tokenizer_spm.go new file mode 100644 index 00000000..75d9fe26 --- /dev/null +++ b/convert/tokenizer_spm.go @@ -0,0 +1,83 @@ +package convert + +import ( + "cmp" + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + "slices" + + "google.golang.org/protobuf/proto" + + "github.com/ollama/ollama/convert/sentencepiece" +) + +func parseSentencePiece(d string) (*Vocabulary, error) { + bts, err := os.ReadFile(filepath.Join(d, "tokenizer.model")) + if err != nil { + return nil, err + } + + var spm sentencepiece.ModelProto + if err := proto.Unmarshal(bts, &spm); err != nil { + return nil, err + } + + v := Vocabulary{Model: "llama"} + for _, piece := range spm.GetPieces() { + v.Tokens = append(v.Tokens, piece.GetPiece()) + v.Scores = append(v.Scores, piece.GetScore()) + + switch t := piece.GetType(); t { + case sentencepiece.ModelProto_SentencePiece_UNKNOWN, + sentencepiece.ModelProto_SentencePiece_CONTROL, + sentencepiece.ModelProto_SentencePiece_UNUSED, + sentencepiece.ModelProto_SentencePiece_BYTE: + v.Types = append(v.Types, int32(t)) + default: + v.Types = append(v.Types, int32(sentencepiece.ModelProto_SentencePiece_NORMAL)) + } + } + + f, err := os.Open(filepath.Join(d, "added_tokens.json")) + if errors.Is(err, os.ErrNotExist) { + return &v, nil + } else if err != nil { + return nil, err + } + defer f.Close() + + var atm map[string]int + if err := json.NewDecoder(f).Decode(&atm); err != nil { + return nil, err + } + + type t struct { + id int + content string + } + + var ts []t + for content, id := range atm { + ts = append(ts, t{id, content}) + } + + slices.SortFunc(ts, func(i, j t) int { + return cmp.Compare(i.id, j.id) + }) + + n := len(v.Tokens) + for i, t := range ts { + if t.id != i+n { + return nil, fmt.Errorf("invalid token id: %d", t.id) + } + + v.Tokens = append(v.Tokens, t.content) + v.Scores = append(v.Scores, -1000.0) + v.Types = append(v.Types, tokenTypeUserDefined) + } + + return &v, nil +} diff --git a/convert/torch.go b/convert/torch.go deleted file mode 100644 index 55414adc..00000000 --- a/convert/torch.go +++ /dev/null @@ -1,287 +0,0 @@ -package convert - -import ( - "encoding/binary" - "encoding/json" - "fmt" - "io" - "log/slog" - "os" - "path/filepath" - "regexp" - "strings" - - "github.com/nlpodyssey/gopickle/pytorch" - "github.com/nlpodyssey/gopickle/types" - "github.com/x448/float16" - - "github.com/ollama/ollama/llm" -) - -type torchWriterTo struct { - t *llm.Tensor - - params *Params - bo ByteOrder - - storage pytorch.StorageInterface - repacker func(string, []float32, []uint64) ([]float32, error) -} - -type TorchFormat struct{} - -func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) { - slog.Debug("getting torch tensors") - - var files []string - if pt, _ := filepath.Glob(filepath.Join(dirpath, "consolidated*.pth")); len(pt) > 0 { - files = append(files, pt...) - } else if pt, _ := filepath.Glob(filepath.Join(dirpath, "pytorch_model*.pth")); len(pt) > 0 { - files = append(files, pt...) - } - - var offset uint64 - var tensors []llm.Tensor - for _, fn := range files { - m, err := pytorch.Load(fn) - if err != nil { - slog.Error(fmt.Sprintf("error unpickling: %q", err)) - return []llm.Tensor{}, err - } - - for _, k := range m.(*types.Dict).Keys() { - if strings.HasSuffix(k.(string), "self_attn.rotary_emb.inv_freq") { - continue - } - - t, _ := m.(*types.Dict).Get(k) - tshape := t.(*pytorch.Tensor).Size - - var size uint64 - var kind uint32 - switch len(tshape) { - case 0: - continue - case 1: - // convert to float32 - kind = 0 - size = uint64(tshape[0] * 4) - case 2: - // convert to float16 - kind = 1 - size = uint64(tshape[0] * tshape[1] * 2) - } - - ggufName, err := tf.GetLayerName(k.(string)) - if err != nil { - slog.Error(err.Error()) - return nil, err - } - slog.Debug(fmt.Sprintf("'%35s': '%30s' %10d [%#v]", k.(string), ggufName, size, tshape)) - - shape := []uint64{0, 0, 0, 0} - for i := range tshape { - shape[i] = uint64(tshape[i]) - } - - tensor := llm.Tensor{ - Name: ggufName, - Kind: kind, - Offset: offset, // calculate the offset - Shape: shape, - } - - tensor.WriterTo = torchWriterTo{ - t: &tensor, - params: params, - bo: params.ByteOrder, - storage: t.(*pytorch.Tensor).Source, - } - - tensors = append(tensors, tensor) - offset += size - } - } - - return tensors, nil -} - -func getAltParams(dirpath string) (*Params, error) { - f, err := os.Open(filepath.Join(dirpath, "params.json")) - if err != nil { - slog.Error("no params.json") - return nil, err - } - defer f.Close() - - type TorchParams struct { - HiddenSize int `json:"dim"` - AttentionHeads int `json:"n_heads"` - KeyValHeads int `json:"n_kv_heads"` - HiddenLayers int `json:"n_layers"` - RopeTheta float64 `json:"rope_theta"` - NormEPS float64 `json:"norm_eps"` - } - - var tparams TorchParams - - d := json.NewDecoder(f) - err = d.Decode(&tparams) - if err != nil { - return nil, err - } - - params := &Params{ - Architectures: []string{"LlamaForCausalLM"}, - HiddenSize: tparams.HiddenSize, - AttentionHeads: tparams.AttentionHeads, - KeyValHeads: tparams.KeyValHeads, - HiddenLayers: tparams.HiddenLayers, - NormEPS: tparams.NormEPS, - } - - switch { - case tparams.RopeTheta == 1000000: - // Codellama - params.ContextSize = 16384 - case tparams.NormEPS == 1e-06: - // llama2 - slog.Debug("Found llama2 - setting context size to 4096") - params.ContextSize = 4096 - default: - params.ContextSize = 2048 - } - - params.ByteOrder = binary.LittleEndian - return params, nil -} - -func (m *TorchFormat) GetParams(dirpath string) (*Params, error) { - f, err := os.Open(filepath.Join(dirpath, "config.json")) - if err != nil { - if os.IsNotExist(err) { - // try params.json instead - return getAltParams(dirpath) - } else { - return nil, err - } - } - - var params Params - d := json.NewDecoder(f) - err = d.Decode(¶ms) - if err != nil { - return nil, err - } - - params.ByteOrder = binary.LittleEndian - return ¶ms, nil -} - -func (m *TorchFormat) GetLayerName(n string) (string, error) { - directMap := map[string]string{ - "tok_embeddings.weight": "token_embd.weight", - "output.weight": "output.weight", - "norm.weight": "output_norm.weight", - "rope.freqs": "rope_freqs.weight", - "model.embed_tokens.weight": "token_embd.weight", - "lm_head.weight": "output.weight", - "model.norm.weight": "output_norm.weight", - } - - lMap := map[string]string{ - "layers.(\\d+).attention_norm.weight": "blk.$1.attn_norm.weight", - "layers.(\\d+).attention_output_norm.weight": "blk.$1.attn_norm.weight", - "layers.(\\d+).feed_forward.w2.weight": "blk.$1.ffn_down.weight", - "layers.(\\d+).feed_forward.w1.weight": "blk.$1.ffn_gate.weight", - "layers.(\\d+).feed_forward.w3.weight": "blk.$1.ffn_up.weight", - "layers.(\\d+).ffn_norm.weight": "blk.$1.ffn_norm.weight", - "layers.(\\d+).attention.wk.weight": "blk.$1.attn_k.weight", - "layers.(\\d+).attention.wo.weight": "blk.$1.attn_output.weight", - "layers.(\\d+).attention.wq.weight": "blk.$1.attn_q.weight", - "layers.(\\d+).attention.wv.weight": "blk.$1.attn_v.weight", - "model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight", - "model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight", - "model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight", - "model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight", - "model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight", - "model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight", - "model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight", - "model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight", - "model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight", - } - - v, ok := directMap[n] - if ok { - return v, nil - } - - // quick hack to rename the layers to gguf format - for k, v := range lMap { - re := regexp.MustCompile(k) - newName := re.ReplaceAllString(n, v) - if newName != n { - return newName, nil - } - } - - return "", fmt.Errorf("couldn't find a layer name for '%s'", n) -} - -func (r torchWriterTo) WriteTo(w io.Writer) (n int64, err error) { - var f32s []float32 - switch s := r.storage.(type) { - case *pytorch.FloatStorage: - f32s = s.Data - case *pytorch.HalfStorage: - f32s = s.Data - case *pytorch.BFloat16Storage: - f32s = s.Data - default: - return 0, fmt.Errorf("unknown data type: %T", s) - } - - if r.repacker != nil { - f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape) - if err != nil { - return 0, err - } - } - - switch r.t.Kind { - case 0: - return 0, binary.Write(w, r.bo, f32s) - case 1: - f16s := make([]uint16, len(f32s)) - for i := range f32s { - f16s[i] = float16.Fromfloat32(f32s[i]).Bits() - } - - return 0, binary.Write(w, r.bo, f16s) - default: - return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind) - } -} - -func (m *TorchFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) { - switch len(params.Architectures) { - case 0: - return nil, fmt.Errorf("No architecture specified to convert") - case 1: - switch params.Architectures[0] { - case "LlamaForCausalLM": - return &LlamaModel{ - ModelData{ - Name: name, - Path: dirPath, - Params: params, - Format: m, - }, - }, nil - default: - return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0]) - } - } - - return nil, fmt.Errorf("Unknown error") -} diff --git a/llm/gguf.go b/llm/gguf.go index aadfc4ba..e61babf2 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -2,11 +2,16 @@ package llm import ( "bytes" + "cmp" "encoding/binary" "encoding/json" "fmt" "io" + "log/slog" + "slices" "strings" + + "golang.org/x/exp/maps" ) type containerGGUF struct { @@ -88,7 +93,7 @@ type gguf struct { kv KV tensors []*Tensor - parameters uint64 + parameters uint64 tensorOffset uint64 scratch [16 << 10]byte @@ -101,10 +106,6 @@ func newGGUF(container *containerGGUF) *gguf { } } -func NewGGUFV3(bo binary.ByteOrder) *gguf { - return newGGUF(&containerGGUF{ByteOrder: bo, Version: 3}) -} - func (llm *gguf) KV() KV { return llm.kv } @@ -203,7 +204,7 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error { return fmt.Errorf("failed to read tensor dimensions: %w", err) } - shape := [4]uint64{1, 1, 1, 1} + shape := make([]uint64, dims) for i := 0; uint32(i) < dims; i++ { shape[i], err = readGGUF[uint64](llm, rs) if err != nil { @@ -245,7 +246,7 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error { return err } - padding := llm.padding(offset, int64(alignment)) + padding := ggufPadding(offset, int64(alignment)) llm.tensorOffset = uint64(offset + padding) for _, tensor := range llm.tensors { @@ -254,7 +255,7 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error { return fmt.Errorf("failed to get current offset: %w", err) } - padding := llm.padding(offset, int64(alignment)) + padding := ggufPadding(offset, int64(alignment)) if _, err := rs.Seek(padding, io.SeekCurrent); err != nil { return fmt.Errorf("failed to seek to init padding: %w", err) } @@ -273,12 +274,12 @@ func readGGUF[T any](llm *gguf, r io.Reader) (T, error) { return t, err } -func writeGGUF[V any](llm *gguf, w io.Writer, t uint32, v V) error { - if err := binary.Write(w, llm.ByteOrder, t); err != nil { +func writeGGUF[V any](w io.Writer, t uint32, v V) error { + if err := binary.Write(w, binary.LittleEndian, t); err != nil { return err } - return binary.Write(w, llm.ByteOrder, v) + return binary.Write(w, binary.LittleEndian, v) } func readGGUFV1String(llm *gguf, r io.Reader) (string, error) { @@ -342,12 +343,12 @@ func readGGUFString(llm *gguf, r io.Reader) (string, error) { return string(buf), nil } -func writeGGUFString(llm *gguf, w io.Writer, s string) error { - if err := binary.Write(w, llm.ByteOrder, ggufTypeString); err != nil { +func writeGGUFString(w io.Writer, s string) error { + if err := binary.Write(w, binary.LittleEndian, ggufTypeString); err != nil { return err } - if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil { + if err := binary.Write(w, binary.LittleEndian, uint64(len(s))); err != nil { return err } @@ -488,21 +489,21 @@ func readGGUFArray(llm *gguf, r io.Reader) (*array, error) { return a, nil } -func writeGGUFArray[S ~[]E, E any](llm *gguf, w io.Writer, t uint32, s S) error { - if err := binary.Write(w, llm.ByteOrder, ggufTypeArray); err != nil { +func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error { + if err := binary.Write(w, binary.LittleEndian, ggufTypeArray); err != nil { return err } - if err := binary.Write(w, llm.ByteOrder, t); err != nil { + if err := binary.Write(w, binary.LittleEndian, t); err != nil { return err } - if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil { + if err := binary.Write(w, binary.LittleEndian, uint64(len(s))); err != nil { return err } for _, e := range s { - if err := binary.Write(w, llm.ByteOrder, e); err != nil { + if err := binary.Write(w, binary.LittleEndian, e); err != nil { return err } } @@ -510,194 +511,55 @@ func writeGGUFArray[S ~[]E, E any](llm *gguf, w io.Writer, t uint32, s S) error return nil } -var ggufKVOrder = map[string][]string{ - "llama": { - "general.architecture", - "general.name", - "llama.vocab_size", - "llama.context_length", - "llama.embedding_length", - "llama.block_count", - "llama.feed_forward_length", - "llama.attention.head_count", - "llama.attention.head_count_kv", - "llama.attention.layer_norm_rms_epsilon", - "llama.rope.freq_base", - "llama.rope.dimension_count", - "llama.expert_count", - "llama.expert_used_count", - "gemma.context_length", - "gemma.embedding_length", - "gemma.block_count", - "gemma.feed_forward_length", - "gemma.attention.head_count", - "gemma.attention.head_count_kv", - "gemma.attention.layer_norm_rms_epsilon", - "gemma.attention.key_length", - "gemma.attention.value_length", - "general.file_type", - "tokenizer.ggml.pre", - "tokenizer.ggml.model", - "tokenizer.ggml.tokens", - "tokenizer.ggml.scores", - "tokenizer.ggml.merges", - "tokenizer.ggml.token_type", - "tokenizer.ggml.bos_token_id", - "tokenizer.ggml.eos_token_id", - "tokenizer.ggml.unknown_token_id", - "tokenizer.ggml.padding_token_id", - "tokenizer.ggml.add_bos_token", - "tokenizer.ggml.add_eos_token", - "tokenizer.chat_template", - "bert.pooling_type", - }, -} - -func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error { - switch llm.Version { - case 3: - llm.V3.NumTensor = uint64(len(tensors)) - llm.V3.NumKV = uint64(len(kv)) - default: - return fmt.Errorf("not implemented: ggufv%d", llm.Version) - } - - if err := binary.Write(ws, llm.ByteOrder, []byte("GGUF")); err != nil { +func WriteGGUF(ws io.WriteSeeker, kv KV, ts []*Tensor) error { + if err := binary.Write(ws, binary.LittleEndian, []byte("GGUF")); err != nil { return err } - if err := binary.Write(ws, llm.ByteOrder, llm.Version); err != nil { + if err := binary.Write(ws, binary.LittleEndian, uint32(3)); err != nil { return err } - if err := binary.Write(ws, llm.ByteOrder, llm.numTensor()); err != nil { + if err := binary.Write(ws, binary.LittleEndian, uint64(len(ts))); err != nil { return err } - if err := binary.Write(ws, llm.ByteOrder, llm.numKV()); err != nil { + if err := binary.Write(ws, binary.LittleEndian, uint64(len(kv))); err != nil { return err } - kvCheck := make(map[string]bool) - for k := range kv { - kvCheck[k] = false - } + keys := maps.Keys(kv) + slices.Sort(keys) - for _, k := range ggufKVOrder["llama"] { - v, ok := kv[k] - if !ok { - continue - } - kvCheck[k] = true - - if err := binary.Write(ws, llm.ByteOrder, uint64(len(k))); err != nil { - return err - } - - if err := binary.Write(ws, llm.ByteOrder, []byte(k)); err != nil { - return err - } - - var err error - switch v := v.(type) { - case uint32: - err = writeGGUF(llm, ws, ggufTypeUint32, v) - case float32: - err = writeGGUF(llm, ws, ggufTypeFloat32, v) - case bool: - err = writeGGUF(llm, ws, ggufTypeBool, v) - case string: - err = writeGGUFString(llm, ws, v) - case []int32: - err = writeGGUFArray(llm, ws, ggufTypeInt32, v) - case []uint32: - err = writeGGUFArray(llm, ws, ggufTypeUint32, v) - case []float32: - err = writeGGUFArray(llm, ws, ggufTypeFloat32, v) - case []string: - if err := binary.Write(ws, llm.ByteOrder, ggufTypeArray); err != nil { - return err - } - - if err := binary.Write(ws, llm.ByteOrder, ggufTypeString); err != nil { - return err - } - - if err := binary.Write(ws, llm.ByteOrder, uint64(len(v))); err != nil { - return err - } - - for _, e := range v { - if err := binary.Write(ws, llm.ByteOrder, uint64(len(e))); err != nil { - return err - } - - if err := binary.Write(ws, llm.ByteOrder, []byte(e)); err != nil { - return err - } - } - default: - return fmt.Errorf("improper type for '%s'", k) - } - if err != nil { + for _, key := range keys { + if err := ggufWriteKV(ws, key, kv[key]); err != nil { return err } } - for k, v := range kvCheck { - if !v { - return fmt.Errorf("Didn't know how to write kv %s", k) + slices.SortFunc(ts, func(a, b *Tensor) int { + var i, j int + if n, err := fmt.Sscanf(a.Name, "blk.%d", &i); err != nil || n != 1 { + return cmp.Compare(a.Name, b.Name) + } else if n, err := fmt.Sscanf(b.Name, "blk.%d", &j); err != nil || n != 1 { + return cmp.Compare(a.Name, b.Name) } - } - for _, tensor := range tensors { - if err := binary.Write(ws, llm.ByteOrder, uint64(len(tensor.Name))); err != nil { - return err - } - - if err := binary.Write(ws, llm.ByteOrder, []byte(tensor.Name)); err != nil { - return err - } - - var dims int - for cnt := range len(tensor.Shape) { - if tensor.Shape[cnt] > 0 { - dims++ - } - } - - if err := binary.Write(ws, llm.ByteOrder, uint32(dims)); err != nil { - return err - } - - for i := range dims { - if err := binary.Write(ws, llm.ByteOrder, tensor.Shape[dims-1-i]); err != nil { - return err - } - } - - if err := binary.Write(ws, llm.ByteOrder, tensor.Kind); err != nil { - return err - } - - if err := binary.Write(ws, llm.ByteOrder, tensor.Offset); err != nil { + return cmp.Compare(i, j) + }) + + var s uint64 + for _, t := range ts { + t.Offset = s + if err := ggufWriteTensorInfo(ws, t); err != nil { return err } + s += t.Size() } var alignment int64 = 32 - for _, tensor := range tensors { - offset, err := ws.Seek(0, io.SeekCurrent) - if err != nil { - return err - } - - padding := llm.padding(offset, alignment) - if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil { - return err - } - - if _, err := tensor.WriteTo(ws); err != nil { + for _, t := range ts { + if err := ggufWriteTensor(ws, t, alignment); err != nil { return err } } @@ -705,6 +567,102 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error { return nil } -func (gguf) padding(offset, align int64) int64 { +func ggufWriteKV(ws io.WriteSeeker, k string, v any) error { + slog.Debug(k, "type", fmt.Sprintf("%T", v)) + if err := binary.Write(ws, binary.LittleEndian, uint64(len(k))); err != nil { + return err + } + + if err := binary.Write(ws, binary.LittleEndian, []byte(k)); err != nil { + return err + } + + var err error + switch v := v.(type) { + case uint32: + err = writeGGUF(ws, ggufTypeUint32, v) + case float32: + err = writeGGUF(ws, ggufTypeFloat32, v) + case bool: + err = writeGGUF(ws, ggufTypeBool, v) + case string: + err = writeGGUFString(ws, v) + case []int32: + err = writeGGUFArray(ws, ggufTypeInt32, v) + case []uint32: + err = writeGGUFArray(ws, ggufTypeUint32, v) + case []float32: + err = writeGGUFArray(ws, ggufTypeFloat32, v) + case []string: + if err := binary.Write(ws, binary.LittleEndian, ggufTypeArray); err != nil { + return err + } + + if err := binary.Write(ws, binary.LittleEndian, ggufTypeString); err != nil { + return err + } + + if err := binary.Write(ws, binary.LittleEndian, uint64(len(v))); err != nil { + return err + } + + for _, e := range v { + if err := binary.Write(ws, binary.LittleEndian, uint64(len(e))); err != nil { + return err + } + + if err := binary.Write(ws, binary.LittleEndian, []byte(e)); err != nil { + return err + } + } + default: + return fmt.Errorf("improper type for '%s'", k) + } + + return err +} + +func ggufWriteTensorInfo(ws io.WriteSeeker, t *Tensor) error { + slog.Debug(t.Name, "kind", t.Kind, "shape", t.Shape, "offset", t.Offset) + if err := binary.Write(ws, binary.LittleEndian, uint64(len(t.Name))); err != nil { + return err + } + + if err := binary.Write(ws, binary.LittleEndian, []byte(t.Name)); err != nil { + return err + } + + if err := binary.Write(ws, binary.LittleEndian, uint32(len(t.Shape))); err != nil { + return err + } + + for i := range len(t.Shape) { + if err := binary.Write(ws, binary.LittleEndian, t.Shape[len(t.Shape)-i-1]); err != nil { + return err + } + } + + if err := binary.Write(ws, binary.LittleEndian, t.Kind); err != nil { + return err + } + + return binary.Write(ws, binary.LittleEndian, t.Offset) +} + +func ggufWriteTensor(ws io.WriteSeeker, t *Tensor, alignment int64) error { + offset, err := ws.Seek(0, io.SeekCurrent) + if err != nil { + return err + } + + if err := binary.Write(ws, binary.LittleEndian, bytes.Repeat([]byte{0}, int(ggufPadding(offset, alignment)))); err != nil { + return err + } + + _, err = t.WriteTo(ws) + return err +} + +func ggufPadding(offset, align int64) int64 { return (align - offset%align) % align } diff --git a/llm/memory_test.go b/llm/memory_test.go index 06ae7438..18c797ee 100644 --- a/llm/memory_test.go +++ b/llm/memory_test.go @@ -2,7 +2,6 @@ package llm import ( "bytes" - "encoding/binary" "fmt" "os" "testing" @@ -20,10 +19,9 @@ func TestEstimateGPULayers(t *testing.T) { f, err := os.CreateTemp(t.TempDir(), modelName) require.NoError(t, err) defer f.Close() - gguf := NewGGUFV3(binary.LittleEndian) inputLayerCount := 5 - tensors := []Tensor{ + tensors := []*Tensor{ {Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, {Name: "blk.1.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, {Name: "blk.2.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, @@ -32,7 +30,7 @@ func TestEstimateGPULayers(t *testing.T) { {Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, } assert.Len(t, tensors, inputLayerCount+1) - err = gguf.Encode(f, KV{ + err = WriteGGUF(f, KV{ "general.architecture": "llama", "general.name": "name", "llama.context_length": uint32(32), diff --git a/server/model.go b/server/model.go index c6d3078f..81272a34 100644 --- a/server/model.go +++ b/server/model.go @@ -143,30 +143,6 @@ func parseFromZipFile(_ context.Context, file *os.File, digest string, fn func(a return nil, err } - mf, err := convert.GetModelFormat(tempDir) - if err != nil { - return nil, err - } - - params, err := mf.GetParams(tempDir) - if err != nil { - return nil, err - } - - mArch, err := mf.GetModelArch("", tempDir, params) - if err != nil { - return nil, err - } - - fn(api.ProgressResponse{Status: "processing tensors"}) - if err := mArch.GetTensors(); err != nil { - return nil, err - } - - if err := mArch.LoadVocab(); err != nil { - return nil, err - } - fn(api.ProgressResponse{Status: "converting model"}) // TODO(mxyng): this should write directly into a layer @@ -178,7 +154,7 @@ func parseFromZipFile(_ context.Context, file *os.File, digest string, fn func(a defer temp.Close() defer os.Remove(temp.Name()) - if err = mArch.WriteGGUF(temp); err != nil { + if err := convert.Convert(tempDir, temp); err != nil { return nil, err } diff --git a/server/routes_create_test.go b/server/routes_create_test.go index 8c714209..4d616d8d 100644 --- a/server/routes_create_test.go +++ b/server/routes_create_test.go @@ -2,7 +2,6 @@ package server import ( "bytes" - "encoding/binary" "encoding/json" "fmt" "io" @@ -20,7 +19,7 @@ import ( var stream bool = false -func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) string { +func createBinFile(t *testing.T, kv map[string]any, ti []*llm.Tensor) string { t.Helper() f, err := os.CreateTemp(t.TempDir(), "") @@ -29,7 +28,7 @@ func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) string { } defer f.Close() - if err := llm.NewGGUFV3(binary.LittleEndian).Encode(f, kv, ti); err != nil { + if err := llm.WriteGGUF(f, kv, ti); err != nil { t.Fatal(err) } diff --git a/server/routes_generate_test.go b/server/routes_generate_test.go index 5c0caff1..02f95dd2 100644 --- a/server/routes_generate_test.go +++ b/server/routes_generate_test.go @@ -101,7 +101,7 @@ func TestGenerateChat(t *testing.T) { "tokenizer.ggml.tokens": []string{""}, "tokenizer.ggml.scores": []float32{0}, "tokenizer.ggml.token_type": []int32{0}, - }, []llm.Tensor{ + }, []*llm.Tensor{ {Name: "token_embd.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, {Name: "blk.0.attn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, {Name: "blk.0.ffn_down.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, @@ -149,7 +149,7 @@ func TestGenerateChat(t *testing.T) { Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{ "general.architecture": "bert", "bert.pooling_type": uint32(0), - }, []llm.Tensor{})), + }, []*llm.Tensor{})), Stream: &stream, }) @@ -399,7 +399,7 @@ func TestGenerate(t *testing.T) { "tokenizer.ggml.tokens": []string{""}, "tokenizer.ggml.scores": []float32{0}, "tokenizer.ggml.token_type": []int32{0}, - }, []llm.Tensor{ + }, []*llm.Tensor{ {Name: "token_embd.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, {Name: "blk.0.attn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, {Name: "blk.0.ffn_down.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, @@ -447,7 +447,7 @@ func TestGenerate(t *testing.T) { Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{ "general.architecture": "bert", "bert.pooling_type": uint32(0), - }, []llm.Tensor{})), + }, []*llm.Tensor{})), Stream: &stream, }) diff --git a/server/sched_test.go b/server/sched_test.go index 6959dace..f3c55514 100644 --- a/server/sched_test.go +++ b/server/sched_test.go @@ -3,7 +3,6 @@ package server import ( "bytes" "context" - "encoding/binary" "fmt" "log/slog" "os" @@ -114,8 +113,7 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est require.NoError(t, err) defer f.Close() - gguf := llm.NewGGUFV3(binary.LittleEndian) - err = gguf.Encode(f, llm.KV{ + require.NoError(t, llm.WriteGGUF(f, llm.KV{ "general.architecture": "llama", "general.name": "name", "llama.context_length": uint32(32), @@ -126,10 +124,10 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est "tokenizer.ggml.tokens": []string{" "}, "tokenizer.ggml.scores": []float32{0}, "tokenizer.ggml.token_type": []int32{0}, - }, []llm.Tensor{ + }, []*llm.Tensor{ {Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, {Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, - }) + })) require.NoError(t, err) fname := f.Name() From df993fa37bde19039231001be9f852386a12a860 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Mon, 8 Jul 2024 16:59:48 -0700 Subject: [PATCH 3/6] comments --- convert/convert.go | 46 +++++++++++++++++----------------- convert/convert_gemma.go | 6 ++--- convert/convert_llama.go | 6 ++--- convert/convert_mixtral.go | 6 ++--- convert/reader.go | 9 +++++-- convert/reader_safetensors.go | 5 ++-- convert/tokenizer.go | 15 +++++------ llm/gguf.go | 17 +++++-------- llm/memory_test.go | 2 +- server/routes_create_test.go | 2 +- server/routes_generate_test.go | 8 +++--- server/sched_test.go | 2 +- 12 files changed, 63 insertions(+), 61 deletions(-) diff --git a/convert/convert.go b/convert/convert.go index 4ad64d72..30c5a53f 100644 --- a/convert/convert.go +++ b/convert/convert.go @@ -40,13 +40,13 @@ func (Parameters) KV(t *Tokenizer) llm.KV { return kv } -func (Parameters) specialTypes() []string { +func (Parameters) specialTokenTypes() []string { return []string{ "bos", "eos", "unk", "sep", "pad", "cls", "mask", } } -func (Parameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []*llm.Tensor) error { +func (Parameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error { return llm.WriteGGUF(ws, kv, ts) } @@ -54,24 +54,27 @@ type Converter interface { // KV maps parameters to LLM key-values KV(*Tokenizer) llm.KV // Tensors maps input tensors to LLM tensors. Model specific modifications can be done here. - Tensors([]Tensor) []*llm.Tensor + Tensors([]Tensor) []llm.Tensor // tensorName returns the LLM tensor name for a specific input name tensorName(string) string - // specialTypes returns any special token types the model uses - specialTypes() []string - writeFile(io.WriteSeeker, llm.KV, []*llm.Tensor) error + // specialTokenTypes returns any special token types the model uses + specialTokenTypes() []string + writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error } -func Convert(d string, ws io.WriteSeeker) error { - f, err := os.Open(filepath.Join(d, "config.json")) +// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations +// and files it finds in the input path. +// Supported input model formats include safetensors. +// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model. +func Convert(path string, ws io.WriteSeeker) error { + bts, err := os.ReadFile(filepath.Join(path, "config.json")) if err != nil { return err } - defer f.Close() var p Parameters - if err := json.NewDecoder(f).Decode(&p); err != nil { + if err := json.Unmarshal(bts, &p); err != nil { return err } @@ -79,28 +82,23 @@ func Convert(d string, ws io.WriteSeeker) error { return errors.New("unknown architecture") } - var c Converter + var conv Converter switch p.Architectures[0] { case "LlamaForCausalLM", "MistralForCausalLM": - c = &llama{} + conv = &llama{} case "MixtralForCausalLM": - c = &mixtral{} + conv = &mixtral{} case "GemmaForCausalLM": - c = &gemma{} + conv = &gemma{} default: return errors.New("unsupported architecture") } - bts, err := os.ReadFile(filepath.Join(d, "config.json")) - if err != nil { + if err := json.Unmarshal(bts, conv); err != nil { return err } - if err := json.Unmarshal(bts, c); err != nil { - return err - } - - t, err := parseTokenizer(d, c.specialTypes()) + t, err := parseTokenizer(path, conv.specialTokenTypes()) if err != nil { return err } @@ -112,12 +110,14 @@ func Convert(d string, ws io.WriteSeeker) error { t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1) t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined) } + } else { + slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens)) } - ts, err := parseTensors(d) + ts, err := parseTensors(path) if err != nil { return err } - return c.writeFile(ws, c.KV(t), c.Tensors(ts)) + return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts)) } diff --git a/convert/convert_gemma.go b/convert/convert_gemma.go index 332fee7f..9213e157 100644 --- a/convert/convert_gemma.go +++ b/convert/convert_gemma.go @@ -43,15 +43,15 @@ func (p *gemma) KV(t *Tokenizer) llm.KV { return kv } -func (p *gemma) Tensors(ts []Tensor) []*llm.Tensor { - var out []*llm.Tensor +func (p *gemma) Tensors(ts []Tensor) []llm.Tensor { + var out []llm.Tensor for _, t := range ts { name := p.tensorName(t.Name()) if strings.HasSuffix(name, "_norm.weight") { t.SetRepacker(p.addOne) } - out = append(out, &llm.Tensor{ + out = append(out, llm.Tensor{ Name: name, Kind: t.Kind(), Shape: t.Shape(), diff --git a/convert/convert_llama.go b/convert/convert_llama.go index 700049d3..ed6469c5 100644 --- a/convert/convert_llama.go +++ b/convert/convert_llama.go @@ -96,8 +96,8 @@ func (p *llama) KV(t *Tokenizer) llm.KV { return kv } -func (p *llama) Tensors(ts []Tensor) []*llm.Tensor { - var out []*llm.Tensor +func (p *llama) Tensors(ts []Tensor) []llm.Tensor { + var out []llm.Tensor for _, t := range ts { name := p.tensorName(t.Name()) if strings.HasSuffix(name, "attn_q.weight") || @@ -105,7 +105,7 @@ func (p *llama) Tensors(ts []Tensor) []*llm.Tensor { t.SetRepacker(p.repack) } - out = append(out, &llm.Tensor{ + out = append(out, llm.Tensor{ Name: name, Kind: t.Kind(), Shape: t.Shape(), diff --git a/convert/convert_mixtral.go b/convert/convert_mixtral.go index c55a27f8..3263a27b 100644 --- a/convert/convert_mixtral.go +++ b/convert/convert_mixtral.go @@ -31,7 +31,7 @@ func (p *mixtral) KV(t *Tokenizer) llm.KV { return kv } -func (p *mixtral) Tensors(ts []Tensor) []*llm.Tensor { +func (p *mixtral) Tensors(ts []Tensor) []llm.Tensor { oldnew := []string{ "model.layers", "blk", "w1", "ffn_gate_exps", @@ -58,10 +58,10 @@ func (p *mixtral) Tensors(ts []Tensor) []*llm.Tensor { return true }) - var out []*llm.Tensor + var out []llm.Tensor for n, e := range experts { // TODO(mxyng): sanity check experts - out = append(out, &llm.Tensor{ + out = append(out, llm.Tensor{ Name: n, Kind: e[0].Kind(), Shape: append([]uint64{uint64(len(e))}, e[0].Shape()...), diff --git a/convert/reader.go b/convert/reader.go index 9be8ac2e..11ccaa81 100644 --- a/convert/reader.go +++ b/convert/reader.go @@ -29,6 +29,11 @@ func (t tensorBase) Shape() []uint64 { return t.shape } +const ( + tensorKindF32 uint32 = iota + tensorKindF16 +) + func (t tensorBase) Kind() uint32 { if strings.HasSuffix(t.name, ".block_sparse_moe.gate.weight") { return 0 @@ -38,9 +43,9 @@ func (t tensorBase) Kind() uint32 { case 0: panic("invalid tensor shape") case 1: - return 0 + return tensorKindF32 default: - return 1 + return tensorKindF16 } } diff --git a/convert/reader_safetensors.go b/convert/reader_safetensors.go index 440581af..d43c59a5 100644 --- a/convert/reader_safetensors.go +++ b/convert/reader_safetensors.go @@ -66,6 +66,7 @@ func parseSafetensors(ps ...string) ([]Tensor, error) { return ts, nil } +// safetensorsPad returns the padded size of the safetensors file given a length n and offset s func safetensorsPad(n, s int64) int64 { return 8 + n + s } @@ -125,9 +126,9 @@ func (st safetensor) WriteTo(w io.Writer) (int64, error) { } switch st.Kind() { - case 0: + case tensorKindF32: return 0, binary.Write(w, binary.LittleEndian, f32s) - case 1: + case tensorKindF16: f16s := make([]uint16, len(f32s)) for i := range f32s { f16s[i] = float16.Fromfloat32(f32s[i]).Bits() diff --git a/convert/tokenizer.go b/convert/tokenizer.go index baee04aa..43d8c14e 100644 --- a/convert/tokenizer.go +++ b/convert/tokenizer.go @@ -32,7 +32,7 @@ type Tokenizer struct { Template string } -func parseTokenizer(d string, specialTypes []string) (*Tokenizer, error) { +func parseTokenizer(d string, specialTokenTypes []string) (*Tokenizer, error) { v, err := parseVocabulary(d) if err != nil { return nil, err @@ -66,6 +66,8 @@ func parseTokenizer(d string, specialTypes []string) (*Tokenizer, error) { switch pt.Type { case "Split": if pt.Pattern.Regex != "" { + // create a checksum of all Split pretokenizers which should be sufficient + // to identify the pretokenizer sha256sum.Write([]byte(pt.Pattern.Regex)) } } @@ -102,7 +104,7 @@ func parseTokenizer(d string, specialTypes []string) (*Tokenizer, error) { } } - for _, st := range specialTypes { + for _, st := range specialTokenTypes { sv := SpecialVocabulary{Type: st} if bts, ok := p[fmt.Sprintf("add_%s_token", st)]; ok { if err := json.Unmarshal(bts, &sv.AddToken); err != nil { @@ -224,14 +226,13 @@ func parseVocabulary(d string) (*Vocabulary, error) { } for pattern, parseFn := range patterns { - matches, err := filepath.Glob(filepath.Join(d, pattern)) - if err != nil { + if _, err := os.Stat(filepath.Join(d, pattern)); errors.Is(err, os.ErrNotExist) { + continue + } else if err != nil { return nil, err } - if len(matches) > 0 { - return parseFn(d) - } + return parseFn(d) } return nil, errors.New("unknown tensor format") diff --git a/llm/gguf.go b/llm/gguf.go index e61babf2..98158313 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -489,6 +489,7 @@ func readGGUFArray(llm *gguf, r io.Reader) (*array, error) { return a, nil } +// writeGGUFArray writes a slice s of type E to the write with a gguf type of t func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error { if err := binary.Write(w, binary.LittleEndian, ggufTypeArray); err != nil { return err @@ -502,16 +503,10 @@ func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error { return err } - for _, e := range s { - if err := binary.Write(w, binary.LittleEndian, e); err != nil { - return err - } - } - - return nil + return binary.Write(w, binary.LittleEndian, s) } -func WriteGGUF(ws io.WriteSeeker, kv KV, ts []*Tensor) error { +func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error { if err := binary.Write(ws, binary.LittleEndian, []byte("GGUF")); err != nil { return err } @@ -537,7 +532,7 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []*Tensor) error { } } - slices.SortFunc(ts, func(a, b *Tensor) int { + slices.SortFunc(ts, func(a, b Tensor) int { var i, j int if n, err := fmt.Sscanf(a.Name, "blk.%d", &i); err != nil || n != 1 { return cmp.Compare(a.Name, b.Name) @@ -622,7 +617,7 @@ func ggufWriteKV(ws io.WriteSeeker, k string, v any) error { return err } -func ggufWriteTensorInfo(ws io.WriteSeeker, t *Tensor) error { +func ggufWriteTensorInfo(ws io.WriteSeeker, t Tensor) error { slog.Debug(t.Name, "kind", t.Kind, "shape", t.Shape, "offset", t.Offset) if err := binary.Write(ws, binary.LittleEndian, uint64(len(t.Name))); err != nil { return err @@ -649,7 +644,7 @@ func ggufWriteTensorInfo(ws io.WriteSeeker, t *Tensor) error { return binary.Write(ws, binary.LittleEndian, t.Offset) } -func ggufWriteTensor(ws io.WriteSeeker, t *Tensor, alignment int64) error { +func ggufWriteTensor(ws io.WriteSeeker, t Tensor, alignment int64) error { offset, err := ws.Seek(0, io.SeekCurrent) if err != nil { return err diff --git a/llm/memory_test.go b/llm/memory_test.go index 18c797ee..3220c8df 100644 --- a/llm/memory_test.go +++ b/llm/memory_test.go @@ -21,7 +21,7 @@ func TestEstimateGPULayers(t *testing.T) { defer f.Close() inputLayerCount := 5 - tensors := []*Tensor{ + tensors := []Tensor{ {Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, {Name: "blk.1.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, {Name: "blk.2.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, diff --git a/server/routes_create_test.go b/server/routes_create_test.go index 4d616d8d..9b7009df 100644 --- a/server/routes_create_test.go +++ b/server/routes_create_test.go @@ -19,7 +19,7 @@ import ( var stream bool = false -func createBinFile(t *testing.T, kv map[string]any, ti []*llm.Tensor) string { +func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) string { t.Helper() f, err := os.CreateTemp(t.TempDir(), "") diff --git a/server/routes_generate_test.go b/server/routes_generate_test.go index 02f95dd2..5c0caff1 100644 --- a/server/routes_generate_test.go +++ b/server/routes_generate_test.go @@ -101,7 +101,7 @@ func TestGenerateChat(t *testing.T) { "tokenizer.ggml.tokens": []string{""}, "tokenizer.ggml.scores": []float32{0}, "tokenizer.ggml.token_type": []int32{0}, - }, []*llm.Tensor{ + }, []llm.Tensor{ {Name: "token_embd.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, {Name: "blk.0.attn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, {Name: "blk.0.ffn_down.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, @@ -149,7 +149,7 @@ func TestGenerateChat(t *testing.T) { Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{ "general.architecture": "bert", "bert.pooling_type": uint32(0), - }, []*llm.Tensor{})), + }, []llm.Tensor{})), Stream: &stream, }) @@ -399,7 +399,7 @@ func TestGenerate(t *testing.T) { "tokenizer.ggml.tokens": []string{""}, "tokenizer.ggml.scores": []float32{0}, "tokenizer.ggml.token_type": []int32{0}, - }, []*llm.Tensor{ + }, []llm.Tensor{ {Name: "token_embd.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, {Name: "blk.0.attn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, {Name: "blk.0.ffn_down.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, @@ -447,7 +447,7 @@ func TestGenerate(t *testing.T) { Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{ "general.architecture": "bert", "bert.pooling_type": uint32(0), - }, []*llm.Tensor{})), + }, []llm.Tensor{})), Stream: &stream, }) diff --git a/server/sched_test.go b/server/sched_test.go index f3c55514..80395714 100644 --- a/server/sched_test.go +++ b/server/sched_test.go @@ -124,7 +124,7 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est "tokenizer.ggml.tokens": []string{" "}, "tokenizer.ggml.scores": []float32{0}, "tokenizer.ggml.token_type": []int32{0}, - }, []*llm.Tensor{ + }, []llm.Tensor{ {Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, {Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, })) From 781fc2d5769bd1df7895dc2a18ab44830f6684fc Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Wed, 31 Jul 2024 10:58:22 -0700 Subject: [PATCH 4/6] Update convert/reader_safetensors.go Co-authored-by: Jeffrey Morgan --- convert/reader_safetensors.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/convert/reader_safetensors.go b/convert/reader_safetensors.go index d43c59a5..c5fe663c 100644 --- a/convert/reader_safetensors.go +++ b/convert/reader_safetensors.go @@ -67,8 +67,8 @@ func parseSafetensors(ps ...string) ([]Tensor, error) { } // safetensorsPad returns the padded size of the safetensors file given a length n and offset s -func safetensorsPad(n, s int64) int64 { - return 8 + n + s +func safetensorsPad(n, offset int64) int64 { + return 8 + n + offset } type safetensor struct { From eafc607abb3422a7d8e488aeb7a129a67a1f75c6 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Sat, 29 Jun 2024 16:53:59 -0700 Subject: [PATCH 5/6] convert: only extract large files --- convert/convert.go | 11 ++-- convert/convert_test.go | 7 ++- convert/fs.go | 58 +++++++++++++++++++ convert/reader.go | 10 ++-- convert/reader_safetensors.go | 20 +++++-- convert/reader_torch.go | 3 +- convert/tokenizer.go | 22 ++++---- convert/tokenizer_spm.go | 8 +-- server/model.go | 79 ++++++-------------------- server/model_test.go | 102 ---------------------------------- 10 files changed, 120 insertions(+), 200 deletions(-) create mode 100644 convert/fs.go diff --git a/convert/convert.go b/convert/convert.go index 30c5a53f..b9461e4f 100644 --- a/convert/convert.go +++ b/convert/convert.go @@ -5,9 +5,8 @@ import ( "errors" "fmt" "io" + "io/fs" "log/slog" - "os" - "path/filepath" "github.com/ollama/ollama/llm" ) @@ -67,8 +66,8 @@ type Converter interface { // and files it finds in the input path. // Supported input model formats include safetensors. // Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model. -func Convert(path string, ws io.WriteSeeker) error { - bts, err := os.ReadFile(filepath.Join(path, "config.json")) +func Convert(fsys fs.FS, ws io.WriteSeeker) error { + bts, err := fs.ReadFile(fsys, "config.json") if err != nil { return err } @@ -98,7 +97,7 @@ func Convert(path string, ws io.WriteSeeker) error { return err } - t, err := parseTokenizer(path, conv.specialTokenTypes()) + t, err := parseTokenizer(fsys, conv.specialTokenTypes()) if err != nil { return err } @@ -114,7 +113,7 @@ func Convert(path string, ws io.WriteSeeker) error { slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens)) } - ts, err := parseTensors(path) + ts, err := parseTensors(fsys) if err != nil { return err } diff --git a/convert/convert_test.go b/convert/convert_test.go index 0fbd436f..67a2fcfe 100644 --- a/convert/convert_test.go +++ b/convert/convert_test.go @@ -6,6 +6,7 @@ import ( "flag" "fmt" "io" + "io/fs" "log/slog" "math" "os" @@ -17,7 +18,7 @@ import ( "golang.org/x/exp/maps" ) -func convertFull(t *testing.T, d string) (*os.File, llm.KV, llm.Tensors) { +func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) { t.Helper() f, err := os.CreateTemp(t.TempDir(), "f16") @@ -26,7 +27,7 @@ func convertFull(t *testing.T, d string) (*os.File, llm.KV, llm.Tensors) { } defer f.Close() - if err := Convert(d, f); err != nil { + if err := Convert(fsys, f); err != nil { t.Fatal(err) } @@ -76,7 +77,7 @@ func TestConvertFull(t *testing.T) { t.Skipf("%s not found", p) } - f, kv, tensors := convertFull(t, p) + f, kv, tensors := convertFull(t, os.DirFS(p)) actual := make(map[string]string) for k, v := range kv { if s, ok := v.(json.Marshaler); !ok { diff --git a/convert/fs.go b/convert/fs.go new file mode 100644 index 00000000..bf6da6c2 --- /dev/null +++ b/convert/fs.go @@ -0,0 +1,58 @@ +package convert + +import ( + "archive/zip" + "errors" + "io" + "io/fs" + "os" + "path/filepath" +) + +type ZipReader struct { + r *zip.Reader + p string + + // limit is the maximum size of a file that can be read directly + // from the zip archive. Files larger than this size will be extracted + limit int64 +} + +func NewZipReader(r *zip.Reader, p string, limit int64) fs.FS { + return &ZipReader{r, p, limit} +} + +func (z *ZipReader) Open(name string) (fs.File, error) { + r, err := z.r.Open(name) + if err != nil { + return nil, err + } + defer r.Close() + + if fi, err := r.Stat(); err != nil { + return nil, err + } else if fi.Size() < z.limit { + return r, nil + } + + if !filepath.IsLocal(name) { + return nil, zip.ErrInsecurePath + } + + n := filepath.Join(z.p, name) + if _, err := os.Stat(n); errors.Is(err, os.ErrNotExist) { + w, err := os.Create(n) + if err != nil { + return nil, err + } + defer w.Close() + + if _, err := io.Copy(w, r); err != nil { + return nil, err + } + } else if err != nil { + return nil, err + } + + return os.Open(n) +} diff --git a/convert/reader.go b/convert/reader.go index 11ccaa81..56a8ae89 100644 --- a/convert/reader.go +++ b/convert/reader.go @@ -3,7 +3,7 @@ package convert import ( "errors" "io" - "path/filepath" + "io/fs" "strings" ) @@ -55,8 +55,8 @@ func (t *tensorBase) SetRepacker(fn repacker) { type repacker func(string, []float32, []uint64) ([]float32, error) -func parseTensors(d string) ([]Tensor, error) { - patterns := map[string]func(...string) ([]Tensor, error){ +func parseTensors(fsys fs.FS) ([]Tensor, error) { + patterns := map[string]func(fs.FS, ...string) ([]Tensor, error){ "model-*-of-*.safetensors": parseSafetensors, "model.safetensors": parseSafetensors, "pytorch_model-*-of-*.bin": parseTorch, @@ -65,13 +65,13 @@ func parseTensors(d string) ([]Tensor, error) { } for pattern, parseFn := range patterns { - matches, err := filepath.Glob(filepath.Join(d, pattern)) + matches, err := fs.Glob(fsys, pattern) if err != nil { return nil, err } if len(matches) > 0 { - return parseFn(matches...) + return parseFn(fsys, matches...) } } diff --git a/convert/reader_safetensors.go b/convert/reader_safetensors.go index c5fe663c..1c169504 100644 --- a/convert/reader_safetensors.go +++ b/convert/reader_safetensors.go @@ -6,7 +6,7 @@ import ( "encoding/json" "fmt" "io" - "os" + "io/fs" "slices" "github.com/d4l3k/go-bfloat16" @@ -20,10 +20,10 @@ type safetensorMetadata struct { Offsets []int64 `json:"data_offsets"` } -func parseSafetensors(ps ...string) ([]Tensor, error) { +func parseSafetensors(fsys fs.FS, ps ...string) ([]Tensor, error) { var ts []Tensor for _, p := range ps { - f, err := os.Open(p) + f, err := fsys.Open(p) if err != nil { return nil, err } @@ -50,6 +50,7 @@ func parseSafetensors(ps ...string) ([]Tensor, error) { for _, key := range keys { if value := headers[key]; value.Type != "" { ts = append(ts, safetensor{ + fs: fsys, path: p, dtype: value.Type, offset: safetensorsPad(n, value.Offsets[0]), @@ -72,6 +73,7 @@ func safetensorsPad(n, offset int64) int64 { } type safetensor struct { + fs fs.FS path string dtype string offset int64 @@ -80,14 +82,20 @@ type safetensor struct { } func (st safetensor) WriteTo(w io.Writer) (int64, error) { - f, err := os.Open(st.path) + f, err := st.fs.Open(st.path) if err != nil { return 0, err } defer f.Close() - if _, err = f.Seek(st.offset, io.SeekStart); err != nil { - return 0, err + if seeker, ok := f.(io.Seeker); ok { + if _, err := seeker.Seek(st.offset, io.SeekStart); err != nil { + return 0, err + } + } else { + if _, err := io.CopyN(io.Discard, f, st.offset); err != nil { + return 0, err + } } var f32s []float32 diff --git a/convert/reader_torch.go b/convert/reader_torch.go index 1428706e..531996bf 100644 --- a/convert/reader_torch.go +++ b/convert/reader_torch.go @@ -2,12 +2,13 @@ package convert import ( "io" + "io/fs" "github.com/nlpodyssey/gopickle/pytorch" "github.com/nlpodyssey/gopickle/types" ) -func parseTorch(ps ...string) ([]Tensor, error) { +func parseTorch(fsys fs.FS, ps ...string) ([]Tensor, error) { var ts []Tensor for _, p := range ps { pt, err := pytorch.Load(p) diff --git a/convert/tokenizer.go b/convert/tokenizer.go index 43d8c14e..cca40eb0 100644 --- a/convert/tokenizer.go +++ b/convert/tokenizer.go @@ -7,9 +7,9 @@ import ( "encoding/json" "errors" "fmt" + "io/fs" "log/slog" "os" - "path/filepath" "slices" ) @@ -32,8 +32,8 @@ type Tokenizer struct { Template string } -func parseTokenizer(d string, specialTokenTypes []string) (*Tokenizer, error) { - v, err := parseVocabulary(d) +func parseTokenizer(fsys fs.FS, specialTokenTypes []string) (*Tokenizer, error) { + v, err := parseVocabulary(fsys) if err != nil { return nil, err } @@ -44,7 +44,7 @@ func parseTokenizer(d string, specialTokenTypes []string) (*Tokenizer, error) { } addedTokens := make(map[string]token) - if f, err := os.Open(filepath.Join(d, "tokenizer.json")); errors.Is(err, os.ErrNotExist) { + if f, err := fsys.Open("tokenizer.json"); errors.Is(err, os.ErrNotExist) { } else if err != nil { return nil, err } else { @@ -87,7 +87,7 @@ func parseTokenizer(d string, specialTokenTypes []string) (*Tokenizer, error) { } } - if f, err := os.Open(filepath.Join(d, "tokenizer_config.json")); errors.Is(err, os.ErrNotExist) { + if f, err := fsys.Open("tokenizer_config.json"); errors.Is(err, os.ErrNotExist) { } else if err != nil { return nil, err } else { @@ -172,8 +172,8 @@ type Vocabulary struct { Types []int32 } -func parseVocabularyFromTokenizer(p string) (*Vocabulary, error) { - f, err := os.Open(filepath.Join(p, "tokenizer.json")) +func parseVocabularyFromTokenizer(fsys fs.FS) (*Vocabulary, error) { + f, err := fsys.Open("tokenizer.json") if err != nil { return nil, err } @@ -219,20 +219,20 @@ func parseVocabularyFromTokenizer(p string) (*Vocabulary, error) { return &v, nil } -func parseVocabulary(d string) (*Vocabulary, error) { - patterns := map[string]func(string) (*Vocabulary, error){ +func parseVocabulary(fsys fs.FS) (*Vocabulary, error) { + patterns := map[string]func(fs.FS) (*Vocabulary, error){ "tokenizer.model": parseSentencePiece, "tokenizer.json": parseVocabularyFromTokenizer, } for pattern, parseFn := range patterns { - if _, err := os.Stat(filepath.Join(d, pattern)); errors.Is(err, os.ErrNotExist) { + if _, err := fs.Stat(fsys, pattern); errors.Is(err, os.ErrNotExist) { continue } else if err != nil { return nil, err } - return parseFn(d) + return parseFn(fsys) } return nil, errors.New("unknown tensor format") diff --git a/convert/tokenizer_spm.go b/convert/tokenizer_spm.go index 75d9fe26..babf702c 100644 --- a/convert/tokenizer_spm.go +++ b/convert/tokenizer_spm.go @@ -5,8 +5,8 @@ import ( "encoding/json" "errors" "fmt" + "io/fs" "os" - "path/filepath" "slices" "google.golang.org/protobuf/proto" @@ -14,8 +14,8 @@ import ( "github.com/ollama/ollama/convert/sentencepiece" ) -func parseSentencePiece(d string) (*Vocabulary, error) { - bts, err := os.ReadFile(filepath.Join(d, "tokenizer.model")) +func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) { + bts, err := fs.ReadFile(fsys, "tokenizer.model") if err != nil { return nil, err } @@ -41,7 +41,7 @@ func parseSentencePiece(d string) (*Vocabulary, error) { } } - f, err := os.Open(filepath.Join(d, "added_tokens.json")) + f, err := fsys.Open("added_tokens.json") if errors.Is(err, os.ErrNotExist) { return &v, nil } else if err != nil { diff --git a/server/model.go b/server/model.go index 81272a34..f2946a0b 100644 --- a/server/model.go +++ b/server/model.go @@ -81,88 +81,43 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe return layers, nil } -func extractFromZipFile(p string, file *os.File, fn func(api.ProgressResponse)) error { - stat, err := file.Stat() - if err != nil { - return err - } - - r, err := zip.NewReader(file, stat.Size()) - if err != nil { - return err - } - - fn(api.ProgressResponse{Status: "unpacking model metadata"}) - for _, f := range r.File { - if !filepath.IsLocal(f.Name) { - return fmt.Errorf("%w: %s", zip.ErrInsecurePath, f.Name) - } - - n := filepath.Join(p, f.Name) - if err := os.MkdirAll(filepath.Dir(n), 0o750); err != nil { - return err - } - - // TODO(mxyng): this should not write out all files to disk - outfile, err := os.Create(n) - if err != nil { - return err - } - defer outfile.Close() - - infile, err := f.Open() - if err != nil { - return err - } - defer infile.Close() - - if _, err = io.Copy(outfile, infile); err != nil { - return err - } - - if err := outfile.Close(); err != nil { - return err - } - - if err := infile.Close(); err != nil { - return err - } - } - - return nil -} - -func parseFromZipFile(_ context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) { - tempDir, err := os.MkdirTemp(filepath.Dir(file.Name()), "") +func parseFromZipFile(_ context.Context, f *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) { + fi, err := f.Stat() if err != nil { return nil, err } - defer os.RemoveAll(tempDir) - if err := extractFromZipFile(tempDir, file, fn); err != nil { + r, err := zip.NewReader(f, fi.Size()) + if err != nil { return nil, err } + p, err := os.MkdirTemp(filepath.Dir(f.Name()), "") + if err != nil { + return nil, err + } + defer os.RemoveAll(p) + fn(api.ProgressResponse{Status: "converting model"}) - // TODO(mxyng): this should write directly into a layer // e.g. NewLayer(arch.Reader(), "application/vnd.ollama.image.model") - temp, err := os.CreateTemp(tempDir, "fp16") + t, err := os.CreateTemp(p, "fp16") if err != nil { return nil, err } - defer temp.Close() - defer os.Remove(temp.Name()) + defer t.Close() + defer os.Remove(t.Name()) - if err := convert.Convert(tempDir, temp); err != nil { + fn(api.ProgressResponse{Status: "converting model"}) + if err := convert.Convert(convert.NewZipReader(r, p, 32<<20), t); err != nil { return nil, err } - if _, err := temp.Seek(0, io.SeekStart); err != nil { + if _, err := t.Seek(0, io.SeekStart); err != nil { return nil, err } - layer, err := NewLayer(temp, "application/vnd.ollama.image.model") + layer, err := NewLayer(t, "application/vnd.ollama.image.model") if err != nil { return nil, err } diff --git a/server/model_test.go b/server/model_test.go index 5829adfc..0a2225d5 100644 --- a/server/model_test.go +++ b/server/model_test.go @@ -1,16 +1,11 @@ package server import ( - "archive/zip" "bytes" "encoding/json" - "errors" "fmt" - "io" "os" "path/filepath" - "slices" - "strings" "testing" "github.com/google/go-cmp/cmp" @@ -18,103 +13,6 @@ import ( "github.com/ollama/ollama/template" ) -func createZipFile(t *testing.T, name string) *os.File { - t.Helper() - - f, err := os.CreateTemp(t.TempDir(), "") - if err != nil { - t.Fatal(err) - } - - zf := zip.NewWriter(f) - defer zf.Close() - - zh, err := zf.CreateHeader(&zip.FileHeader{Name: name}) - if err != nil { - t.Fatal(err) - } - - if _, err := io.Copy(zh, bytes.NewReader([]byte(""))); err != nil { - t.Fatal(err) - } - - return f -} - -func TestExtractFromZipFile(t *testing.T) { - cases := []struct { - name string - expect []string - err error - }{ - { - name: "good", - expect: []string{"good"}, - }, - { - name: strings.Join([]string{"path", "..", "to", "good"}, string(os.PathSeparator)), - expect: []string{filepath.Join("to", "good")}, - }, - { - name: strings.Join([]string{"path", "..", "to", "..", "good"}, string(os.PathSeparator)), - expect: []string{"good"}, - }, - { - name: strings.Join([]string{"path", "to", "..", "..", "good"}, string(os.PathSeparator)), - expect: []string{"good"}, - }, - { - name: strings.Join([]string{"..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "bad"}, string(os.PathSeparator)), - err: zip.ErrInsecurePath, - }, - { - name: strings.Join([]string{"path", "..", "..", "to", "bad"}, string(os.PathSeparator)), - err: zip.ErrInsecurePath, - }, - } - - for _, tt := range cases { - t.Run(tt.name, func(t *testing.T) { - f := createZipFile(t, tt.name) - defer f.Close() - - tempDir := t.TempDir() - if err := extractFromZipFile(tempDir, f, func(api.ProgressResponse) {}); !errors.Is(err, tt.err) { - t.Fatal(err) - } - - var matches []string - if err := filepath.Walk(tempDir, func(p string, fi os.FileInfo, err error) error { - if err != nil { - return err - } - - if !fi.IsDir() { - matches = append(matches, p) - } - - return nil - }); err != nil { - t.Fatal(err) - } - - var actual []string - for _, match := range matches { - rel, err := filepath.Rel(tempDir, match) - if err != nil { - t.Error(err) - } - - actual = append(actual, rel) - } - - if !slices.Equal(actual, tt.expect) { - t.Fatalf("expected %d files, got %d", len(tt.expect), len(matches)) - } - }) - } -} - func readFile(t *testing.T, base, name string) *bytes.Buffer { t.Helper() From d8e2664c33e81af0549aa9e75c57e08317d0322d Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Wed, 31 Jul 2024 15:39:11 -0700 Subject: [PATCH 6/6] convert: fix parse functions --- convert/reader.go | 21 ++++++++++++--------- convert/tokenizer.go | 15 +++++++++------ 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/convert/reader.go b/convert/reader.go index 56a8ae89..ce95208e 100644 --- a/convert/reader.go +++ b/convert/reader.go @@ -56,22 +56,25 @@ func (t *tensorBase) SetRepacker(fn repacker) { type repacker func(string, []float32, []uint64) ([]float32, error) func parseTensors(fsys fs.FS) ([]Tensor, error) { - patterns := map[string]func(fs.FS, ...string) ([]Tensor, error){ - "model-*-of-*.safetensors": parseSafetensors, - "model.safetensors": parseSafetensors, - "pytorch_model-*-of-*.bin": parseTorch, - "pytorch_model.bin": parseTorch, - "consolidated.*.pth": parseTorch, + patterns := []struct { + Pattern string + Func func(fs.FS, ...string) ([]Tensor, error) + }{ + {"model-*-of-*.safetensors", parseSafetensors}, + {"model.safetensors", parseSafetensors}, + {"pytorch_model-*-of-*.bin", parseTorch}, + {"pytorch_model.bin", parseTorch}, + {"consolidated.*.pth", parseTorch}, } - for pattern, parseFn := range patterns { - matches, err := fs.Glob(fsys, pattern) + for _, pattern := range patterns { + matches, err := fs.Glob(fsys, pattern.Pattern) if err != nil { return nil, err } if len(matches) > 0 { - return parseFn(fsys, matches...) + return pattern.Func(fsys, matches...) } } diff --git a/convert/tokenizer.go b/convert/tokenizer.go index cca40eb0..0d42a6d8 100644 --- a/convert/tokenizer.go +++ b/convert/tokenizer.go @@ -220,19 +220,22 @@ func parseVocabularyFromTokenizer(fsys fs.FS) (*Vocabulary, error) { } func parseVocabulary(fsys fs.FS) (*Vocabulary, error) { - patterns := map[string]func(fs.FS) (*Vocabulary, error){ - "tokenizer.model": parseSentencePiece, - "tokenizer.json": parseVocabularyFromTokenizer, + patterns := []struct { + Pattern string + Func func(fs.FS) (*Vocabulary, error) + }{ + {"tokenizer.model", parseSentencePiece}, + {"tokenizer.json", parseVocabularyFromTokenizer}, } - for pattern, parseFn := range patterns { - if _, err := fs.Stat(fsys, pattern); errors.Is(err, os.ErrNotExist) { + for _, pattern := range patterns { + if _, err := fs.Stat(fsys, pattern.Pattern); errors.Is(err, os.ErrNotExist) { continue } else if err != nil { return nil, err } - return parseFn(fsys) + return pattern.Func(fsys) } return nil, errors.New("unknown tensor format")