Update llama.cpp

This commit is contained in:
Andrei Betlen 2023-12-22 14:10:34 -05:00
parent 15ee2106f6
commit 6473796343
2 changed files with 7 additions and 5 deletions

View file

@ -252,8 +252,8 @@ class llama_token_data_array(Structure):
llama_token_data_array_p = POINTER(llama_token_data_array) llama_token_data_array_p = POINTER(llama_token_data_array)
# typedef void (*llama_progress_callback)(float progress, void *ctx); # typedef bool (*llama_progress_callback)(float progress, void *ctx);
llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p) llama_progress_callback = ctypes.CFUNCTYPE(c_bool, c_float, c_void_p)
# // Input data for llama_decode # // Input data for llama_decode
@ -347,7 +347,9 @@ class llama_model_kv_override(Structure):
# int32_t main_gpu; // the GPU that is used for scratch and small tensors # int32_t main_gpu; // the GPU that is used for scratch and small tensors
# const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES) # const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)
# // called with a progress value between 0 and 1, pass NULL to disable # // Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
# // If the provided progress_callback returns true, model loading continues.
# // If it returns false, model loading is immediately aborted.
# llama_progress_callback progress_callback; # llama_progress_callback progress_callback;
# // context pointer passed to the progress callback # // context pointer passed to the progress callback
# void * progress_callback_user_data; # void * progress_callback_user_data;
@ -367,7 +369,7 @@ class llama_model_params(Structure):
n_gpu_layers (int): number of layers to store in VRAM n_gpu_layers (int): number of layers to store in VRAM
main_gpu (int): the GPU that is used for scratch and small tensors main_gpu (int): the GPU that is used for scratch and small tensors
tensor_split (ctypes.Array[ctypes.c_float]): how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES) tensor_split (ctypes.Array[ctypes.c_float]): how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)
progress_callback (llama_progress_callback): called with a progress value between 0 and 1, pass NULL to disable progress_callback (llama_progress_callback): called with a progress value between 0.0 and 1.0. Pass NULL to disable. If the provided progress_callback returns true, model loading continues. If it returns false, model loading is immediately aborted.
progress_callback_user_data (ctypes.c_void_p): context pointer passed to the progress callback progress_callback_user_data (ctypes.c_void_p): context pointer passed to the progress callback
kv_overrides (ctypes.Array[llama_model_kv_override]): override key-value pairs of the model meta data kv_overrides (ctypes.Array[llama_model_kv_override]): override key-value pairs of the model meta data
vocab_only (bool): only load the vocabulary, no weights vocab_only (bool): only load the vocabulary, no weights

2
vendor/llama.cpp vendored

@ -1 +1 @@
Subproject commit afefa319f1f59b002dfa0d1ef407a2c74bd9770b Subproject commit 7082d24cec35e9ce9147535a2224dfc67ee0a78c