diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index d8075e4..461122a 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -296,7 +296,9 @@ LLAMA_VOCAB_TYPE_WPM = 3 # LLAMA_VOCAB_PRE_TYPE_GPT2 = 7, # LLAMA_VOCAB_PRE_TYPE_REFACT = 8, # LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9, -# LLAMA_VOCAB_PRE_TYPE_OLMO = 10, +# LLAMA_VOCAB_PRE_TYPE_QWEN2 = 10, +# LLAMA_VOCAB_PRE_TYPE_OLMO = 11, +# LLAMA_VOCAB_PRE_TYPE_DBRX = 12, # }; LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0 LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1 @@ -308,7 +310,9 @@ LLAMA_VOCAB_PRE_TYPE_STARCODER = 6 LLAMA_VOCAB_PRE_TYPE_GPT2 = 7 LLAMA_VOCAB_PRE_TYPE_REFACT = 8 LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9 -LLAMA_VOCAB_PRE_TYPE_OLMO = 10 +LLAMA_VOCAB_PRE_TYPE_QWEN2 = 10 +LLAMA_VOCAB_PRE_TYPE_OLMO = 11 +LLAMA_VOCAB_PRE_TYPE_DBRX = 12 # // note: these values should be synchronized with ggml_rope @@ -377,6 +381,7 @@ LLAMA_TOKEN_TYPE_BYTE = 6 # LLAMA_FTYPE_MOSTLY_IQ2_M = 29, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ4_XS = 30, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ1_M = 31, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_BF16 = 32, // except 1d tensors # LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file # }; @@ -409,6 +414,8 @@ LLAMA_FTYPE_MOSTLY_IQ3_M = 27 LLAMA_FTYPE_MOSTLY_IQ2_S = 28 LLAMA_FTYPE_MOSTLY_IQ2_M = 29 LLAMA_FTYPE_MOSTLY_IQ4_XS = 30 +LLAMA_FTYPE_MOSTLY_IQ1_M = 31 +LLAMA_FTYPE_MOSTLY_BF16 = 32 LLAMA_FTYPE_GUESSED = 1024 # enum llama_rope_scaling_type { diff --git a/vendor/llama.cpp b/vendor/llama.cpp index c0e6fbf..911b390 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit c0e6fbf8c380718102bd25fcb8d2e55f8f9480d1 +Subproject commit 911b3900dded9a1cfe0f0e41b82c7a29baf3a217