Update llama.cpp

This commit is contained in:
Andrei Betlen 2023-06-20 11:25:10 -04:00
parent 92b0013427
commit e37798777e
3 changed files with 16 additions and 16 deletions

View file

@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ## [Unreleased]
### Added
- (llama.cpp) Fix struct misalignment bug
## [0.1.64] ## [0.1.64]
### Added ### Added

View file

@ -150,47 +150,43 @@ llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p)
# struct llama_context_params { # struct llama_context_params {
# int seed; // RNG seed, -1 for random
# int n_ctx; // text context # int n_ctx; // text context
# int n_batch; // prompt processing batch size # int n_batch; // prompt processing batch size
# int n_gpu_layers; // number of layers to store in VRAM # int n_gpu_layers; // number of layers to store in VRAM
# int main_gpu; // the GPU that is used for scratch and small tensors # int main_gpu; // the GPU that is used for scratch and small tensors
# float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs # float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
# bool low_vram; // if true, reduce VRAM usage at the cost of performance # // called with a progress value between 0 and 1, pass NULL to disable
# int seed; // RNG seed, -1 for random # llama_progress_callback progress_callback;
# // context pointer passed to the progress callback
# void * progress_callback_user_data;
# // Keep the booleans together to avoid misalignment during copy-by-value.
# bool low_vram; // if true, reduce VRAM usage at the cost of performance
# bool f16_kv; // use fp16 for KV cache # bool f16_kv; // use fp16 for KV cache
# bool logits_all; // the llama_eval() call computes all logits, not just the last one # bool logits_all; // the llama_eval() call computes all logits, not just the last one
# bool vocab_only; // only load the vocabulary, no weights # bool vocab_only; // only load the vocabulary, no weights
# bool use_mmap; // use mmap if possible # bool use_mmap; // use mmap if possible
# bool use_mlock; // force system to keep model in RAM # bool use_mlock; // force system to keep model in RAM
# bool embedding; // embedding mode only # bool embedding; // embedding mode only
# // called with a progress value between 0 and 1, pass NULL to disable
# llama_progress_callback progress_callback;
# // context pointer passed to the progress callback
# void * progress_callback_user_data;
# }; # };
class llama_context_params(Structure): class llama_context_params(Structure):
_fields_ = [ _fields_ = [
("seed", c_int),
("n_ctx", c_int), ("n_ctx", c_int),
("n_batch", c_int), ("n_batch", c_int),
("n_gpu_layers", c_int), ("n_gpu_layers", c_int),
("main_gpu", c_int), ("main_gpu", c_int),
("tensor_split", c_float * LLAMA_MAX_DEVICES.value), ("tensor_split", c_float * LLAMA_MAX_DEVICES.value),
("progress_callback", llama_progress_callback),
("progress_callback_user_data", c_void_p),
("low_vram", c_bool), ("low_vram", c_bool),
("seed", c_int),
("f16_kv", c_bool), ("f16_kv", c_bool),
( ("logits_all", c_bool),
"logits_all",
c_bool,
),
("vocab_only", c_bool), ("vocab_only", c_bool),
("use_mmap", c_bool), ("use_mmap", c_bool),
("use_mlock", c_bool), ("use_mlock", c_bool),
("embedding", c_bool), ("embedding", c_bool),
("progress_callback", llama_progress_callback),
("progress_callback_user_data", c_void_p),
] ]

2
vendor/llama.cpp vendored

@ -1 +1 @@
Subproject commit 8596af427722775f0df4a7c90b9af067ba90d4ef Subproject commit 2322ec223a21625dfe9bd73ee677444a98a24ac9