feat: Update llama.cpp

2024-05-08 08:42:22 -04:00 · 2024-05-08 08:42:22 -04:00 · 2a39b99575
commit 2a39b99575
parent 9ce5cb376a
2 changed files with 10 additions and 3 deletions
--- a/llama_cpp/llama_cpp.py
+++ b/llama_cpp/llama_cpp.py
@ -296,7 +296,9 @@ LLAMA_VOCAB_TYPE_WPM = 3
 #     LLAMA_VOCAB_PRE_TYPE_GPT2           = 7,
 #     LLAMA_VOCAB_PRE_TYPE_REFACT         = 8,
 #     LLAMA_VOCAB_PRE_TYPE_COMMAND_R      = 9,
-#     LLAMA_VOCAB_PRE_TYPE_OLMO           = 10,
+#     LLAMA_VOCAB_PRE_TYPE_QWEN2          = 10,
+#     LLAMA_VOCAB_PRE_TYPE_OLMO           = 11,
+#     LLAMA_VOCAB_PRE_TYPE_DBRX           = 12,
 # };
 LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0
 LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1
@ -308,7 +310,9 @@ LLAMA_VOCAB_PRE_TYPE_STARCODER = 6
 LLAMA_VOCAB_PRE_TYPE_GPT2 = 7
 LLAMA_VOCAB_PRE_TYPE_REFACT = 8
 LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9
-LLAMA_VOCAB_PRE_TYPE_OLMO = 10
+LLAMA_VOCAB_PRE_TYPE_QWEN2 = 10
+LLAMA_VOCAB_PRE_TYPE_OLMO = 11
+LLAMA_VOCAB_PRE_TYPE_DBRX = 12


 # // note: these values should be synchronized with ggml_rope
@ -377,6 +381,7 @@ LLAMA_TOKEN_TYPE_BYTE = 6
 #     LLAMA_FTYPE_MOSTLY_IQ2_M         = 29, // except 1d tensors
 #     LLAMA_FTYPE_MOSTLY_IQ4_XS        = 30, // except 1d tensors
 #     LLAMA_FTYPE_MOSTLY_IQ1_M         = 31, // except 1d tensors
+#     LLAMA_FTYPE_MOSTLY_BF16          = 32, // except 1d tensors

 #     LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
 # };
@ -409,6 +414,8 @@ LLAMA_FTYPE_MOSTLY_IQ3_M = 27
 LLAMA_FTYPE_MOSTLY_IQ2_S = 28
 LLAMA_FTYPE_MOSTLY_IQ2_M = 29
 LLAMA_FTYPE_MOSTLY_IQ4_XS = 30
+LLAMA_FTYPE_MOSTLY_IQ1_M = 31
+LLAMA_FTYPE_MOSTLY_BF16 = 32
 LLAMA_FTYPE_GUESSED = 1024

 # enum llama_rope_scaling_type {
--- a/vendor/llama.cpp
+++ b/vendor/llama.cpp
@ -1 +1 @@
-Subproject commit c0e6fbf8c380718102bd25fcb8d2e55f8f9480d1
+Subproject commit 911b3900dded9a1cfe0f0e41b82c7a29baf3a217