From 9558ce7878bebde52025cf985f3975c8c1d0cee6 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Mon, 26 Feb 2024 11:40:58 -0500 Subject: [PATCH] feat: Update llama.cpp --- llama_cpp/llama_cpp.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index 5a934dc..fe20dba 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -256,12 +256,14 @@ LLAMA_TOKEN_TYPE_BYTE = 6 # LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ2_XS = 20, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q2_K_S = 21, // except 1d tensors -# LLAMA_FTYPE_MOSTLY_Q3_K_XS = 22, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_IQ3_XS = 22, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ1_S = 24, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ4_NL = 25, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ3_S = 26, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ3_M = 27, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_IQ2_S = 28, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_IQ2_M = 29, // except 1d tensors # LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file # }; @@ -285,12 +287,14 @@ LLAMA_FTYPE_MOSTLY_Q6_K = 18 LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19 LLAMA_FTYPE_MOSTLY_IQ2_XS = 20 LLAMA_FTYPE_MOSTLY_Q2_K_S = 21 -LLAMA_FTYPE_MOSTLY_Q3_K_XS = 22 +LLAMA_FTYPE_MOSTLY_IQ3_XS = 22 LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23 LLAMA_FTYPE_MOSTLY_IQ1_S = 24 LLAMA_FTYPE_MOSTLY_IQ4_NL = 25 LLAMA_FTYPE_MOSTLY_IQ3_S = 26 LLAMA_FTYPE_MOSTLY_IQ3_M = 27 +LLAMA_FTYPE_MOSTLY_IQ2_S = 28 +LLAMA_FTYPE_MOSTLY_IQ2_M = 29 LLAMA_FTYPE_GUESSED = 1024 # enum llama_rope_scaling_type {