diff --git a/CHANGELOG.md b/CHANGELOG.md index 0060af5..a6cb99b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- (llama.cpp) Fix struct misalignment bug + ## [0.1.64] ### Added diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index d6be0ea..a516829 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -150,47 +150,43 @@ llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p) # struct llama_context_params { +# int seed; // RNG seed, -1 for random # int n_ctx; // text context # int n_batch; // prompt processing batch size # int n_gpu_layers; // number of layers to store in VRAM # int main_gpu; // the GPU that is used for scratch and small tensors # float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs -# bool low_vram; // if true, reduce VRAM usage at the cost of performance -# int seed; // RNG seed, -1 for random +# // called with a progress value between 0 and 1, pass NULL to disable +# llama_progress_callback progress_callback; +# // context pointer passed to the progress callback +# void * progress_callback_user_data; +# // Keep the booleans together to avoid misalignment during copy-by-value. +# bool low_vram; // if true, reduce VRAM usage at the cost of performance # bool f16_kv; // use fp16 for KV cache # bool logits_all; // the llama_eval() call computes all logits, not just the last one # bool vocab_only; // only load the vocabulary, no weights # bool use_mmap; // use mmap if possible # bool use_mlock; // force system to keep model in RAM # bool embedding; // embedding mode only - - -# // called with a progress value between 0 and 1, pass NULL to disable -# llama_progress_callback progress_callback; -# // context pointer passed to the progress callback -# void * progress_callback_user_data; # }; class llama_context_params(Structure): _fields_ = [ + ("seed", c_int), ("n_ctx", c_int), ("n_batch", c_int), ("n_gpu_layers", c_int), ("main_gpu", c_int), ("tensor_split", c_float * LLAMA_MAX_DEVICES.value), + ("progress_callback", llama_progress_callback), + ("progress_callback_user_data", c_void_p), ("low_vram", c_bool), - ("seed", c_int), ("f16_kv", c_bool), - ( - "logits_all", - c_bool, - ), + ("logits_all", c_bool), ("vocab_only", c_bool), ("use_mmap", c_bool), ("use_mlock", c_bool), ("embedding", c_bool), - ("progress_callback", llama_progress_callback), - ("progress_callback_user_data", c_void_p), ] diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 8596af4..2322ec2 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 8596af427722775f0df4a7c90b9af067ba90d4ef +Subproject commit 2322ec223a21625dfe9bd73ee677444a98a24ac9