diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index abd7b4c..92ec299 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -180,6 +180,8 @@ LLAMA_TOKEN_TYPE_BYTE = 6 # LLAMA_FTYPE_MOSTLY_Q5_K_M = 17, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q6_K = 18, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_IQ2_XS = 20, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_Q2_K_S = 21, // except 1d tensors # LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file # }; @@ -200,6 +202,9 @@ LLAMA_FTYPE_MOSTLY_Q4_K_M = 15 LLAMA_FTYPE_MOSTLY_Q5_K_S = 16 LLAMA_FTYPE_MOSTLY_Q5_K_M = 17 LLAMA_FTYPE_MOSTLY_Q6_K = 18 +LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19 +LLAMA_FTYPE_MOSTLY_IQ2_XS = 20 +LLAMA_FTYPE_MOSTLY_Q2_K_S = 21 LLAMA_FTYPE_GUESSED = 1024 # enum llama_rope_scaling_type { diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 6efb8eb..1d11838 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 6efb8eb30e7025b168f3fda3ff83b9b386428ad6 +Subproject commit 1d118386fea031f01550f8cd47a5c86296e5333f