From 359ae736432cae5cdba50b011839d277a4b1ec8d Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Sun, 14 Jan 2024 08:17:22 -0500 Subject: [PATCH] Update llama.cpp --- llama_cpp/llama_cpp.py | 4 ++++ vendor/llama.cpp | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index 989b67a..3b261cd 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -526,6 +526,7 @@ It might not exist for progress report where '.' is output repeatedly.""" # bool quantize_output_tensor; // quantize output.weight # bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored # bool pure; // disable k-quant mixtures and quantize all tensors to the same type +# void * imatrix; // pointer to importance matrix data # } llama_model_quantize_params; class llama_model_quantize_params(Structure): """Parameters for llama_model_quantize @@ -537,6 +538,7 @@ class llama_model_quantize_params(Structure): quantize_output_tensor (bool): quantize output.weight only_copy (bool): only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored pure (bool): disable k-quant mixtures and quantize all tensors to the same type + imatrix (ctypes.c_void_p): pointer to importance matrix data """ _fields_ = [ @@ -545,6 +547,8 @@ class llama_model_quantize_params(Structure): ("allow_requantize", c_bool), ("quantize_output_tensor", c_bool), ("only_copy", c_bool), + ("pure", c_bool), + ("imatrix", c_void_p), ] diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 76484fb..bb0c139 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 76484fbfd355df388f71d6edaa98e1692a74de7e +Subproject commit bb0c1392479398f9aba86d9ec98db0b95ede6e6d