Update llama.cpp
This commit is contained in:
parent
15e0e0a937
commit
f72b6e9b73
2 changed files with 8 additions and 2 deletions
|
@ -165,12 +165,16 @@ llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p)
|
|||
# int32_t n_gpu_layers; // number of layers to store in VRAM
|
||||
# int32_t main_gpu; // the GPU that is used for scratch and small tensors
|
||||
# float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
|
||||
|
||||
# // ref: https://github.com/ggerganov/llama.cpp/pull/2054
|
||||
# float rope_freq_base; // RoPE base frequency
|
||||
# float rope_freq_scale; // RoPE frequency scaling factor
|
||||
|
||||
# // called with a progress value between 0 and 1, pass NULL to disable
|
||||
# llama_progress_callback progress_callback;
|
||||
# // context pointer passed to the progress callback
|
||||
# void * progress_callback_user_data;
|
||||
|
||||
|
||||
# // Keep the booleans together to avoid misalignment during copy-by-value.
|
||||
# bool low_vram; // if true, reduce VRAM usage at the cost of performance
|
||||
# bool f16_kv; // use fp16 for KV cache
|
||||
|
@ -188,6 +192,8 @@ class llama_context_params(Structure):
|
|||
("n_gpu_layers", c_int32),
|
||||
("main_gpu", c_int32),
|
||||
("tensor_split", c_float * LLAMA_MAX_DEVICES.value),
|
||||
("rope_freq_base", c_float),
|
||||
("rope_freq_scale", c_float),
|
||||
("progress_callback", llama_progress_callback),
|
||||
("progress_callback_user_data", c_void_p),
|
||||
("low_vram", c_bool),
|
||||
|
|
2
vendor/llama.cpp
vendored
2
vendor/llama.cpp
vendored
|
@ -1 +1 @@
|
|||
Subproject commit a6803cab946c817fb7aaf2a40b317f5d3e373bd1
|
||||
Subproject commit 6e7cca404748dd4b1a3affd0d1296e37f4ac0a6f
|
Loading…
Reference in a new issue