From 7f52335c50be425d7dce6302ae38ecff87b0ee74 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Thu, 25 Apr 2024 21:21:29 -0400 Subject: [PATCH] feat: Update llama.cpp --- llama_cpp/llama_cpp.py | 4 ++++ vendor/llama.cpp | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index 7f5d265..3b96adc 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -811,6 +811,7 @@ It might not exist for progress report where '.' is output repeatedly.""" # bool quantize_output_tensor; // quantize output.weight # bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored # bool pure; // quantize all tensors to the default type +# bool keep_split; // quantize to the same number of shards # void * imatrix; // pointer to importance matrix data # void * kv_overrides; // pointer to vector containing overrides # } llama_model_quantize_params; @@ -826,6 +827,7 @@ class llama_model_quantize_params(ctypes.Structure): quantize_output_tensor (bool): quantize output.weight only_copy (bool): only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored pure (bool): quantize all tensors to the default type + keep_split (bool): quantize to the same number of shards imatrix (ctypes.c_void_p): pointer to importance matrix data kv_overrides (ctypes.c_void_p): pointer to vector containing overrides """ @@ -839,6 +841,7 @@ class llama_model_quantize_params(ctypes.Structure): quantize_output_tensor: bool only_copy: bool pure: bool + keep_split: bool imatrix: ctypes.c_void_p kv_overrides: ctypes.c_void_p @@ -851,6 +854,7 @@ class llama_model_quantize_params(ctypes.Structure): ("quantize_output_tensor", ctypes.c_bool), ("only_copy", ctypes.c_bool), ("pure", ctypes.c_bool), + ("keep_split", ctypes.c_bool), ("imatrix", ctypes.c_void_p), ("kv_overrides", ctypes.c_void_p), ] diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 784e11d..46e12c4 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 784e11dea1f5ce9638851b2b0dddb107e2a609c8 +Subproject commit 46e12c4692a37bdd31a0432fc5153d7d22bc7f72