Update llama.cpp

This commit is contained in:
Andrei Betlen 2024-05-14 09:30:04 -04:00
parent 43ba1526c8
commit 50f5c74ecf
2 changed files with 7 additions and 1 deletions

View file

@ -648,6 +648,9 @@ class llama_model_kv_override(ctypes.Structure):
# // proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices() # // proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
# const float * tensor_split; # const float * tensor_split;
# // comma separated list of RPC servers to use for offloading
# const char * rpc_servers;
# // Called with a progress value between 0.0 and 1.0. Pass NULL to disable. # // Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
# // If the provided progress_callback returns true, model loading continues. # // If the provided progress_callback returns true, model loading continues.
# // If it returns false, model loading is immediately aborted. # // If it returns false, model loading is immediately aborted.
@ -674,6 +677,7 @@ class llama_model_params(ctypes.Structure):
split_mode (int): how to split the model across multiple GPUs split_mode (int): how to split the model across multiple GPUs
main_gpu (int): the GPU that is used for the entire model. main_gpu interpretation depends on split_mode: LLAMA_SPLIT_NONE: the GPU that is used for the entire model LLAMA_SPLIT_ROW: the GPU that is used for small tensors and intermediate results LLAMA_SPLIT_LAYER: ignored main_gpu (int): the GPU that is used for the entire model. main_gpu interpretation depends on split_mode: LLAMA_SPLIT_NONE: the GPU that is used for the entire model LLAMA_SPLIT_ROW: the GPU that is used for small tensors and intermediate results LLAMA_SPLIT_LAYER: ignored
tensor_split (ctypes.Array[ctypes.ctypes.c_float]): proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices() tensor_split (ctypes.Array[ctypes.ctypes.c_float]): proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
rpc_servers (ctypes.c_char_p): comma separated list of RPC servers to use for offloading
progress_callback (llama_progress_callback): called with a progress value between 0.0 and 1.0. Pass NULL to disable. If the provided progress_callback returns true, model loading continues. If it returns false, model loading is immediately aborted. progress_callback (llama_progress_callback): called with a progress value between 0.0 and 1.0. Pass NULL to disable. If the provided progress_callback returns true, model loading continues. If it returns false, model loading is immediately aborted.
progress_callback_user_data (ctypes.ctypes.c_void_p): context pointer passed to the progress callback progress_callback_user_data (ctypes.ctypes.c_void_p): context pointer passed to the progress callback
kv_overrides (ctypes.Array[llama_model_kv_override]): override key-value pairs of the model meta data kv_overrides (ctypes.Array[llama_model_kv_override]): override key-value pairs of the model meta data
@ -687,6 +691,7 @@ class llama_model_params(ctypes.Structure):
split_mode: int split_mode: int
main_gpu: int main_gpu: int
tensor_split: CtypesArray[ctypes.c_float] tensor_split: CtypesArray[ctypes.c_float]
rpc_servers: ctypes.c_char_p
progress_callback: Callable[[float, ctypes.c_void_p], bool] progress_callback: Callable[[float, ctypes.c_void_p], bool]
progress_callback_user_data: ctypes.c_void_p progress_callback_user_data: ctypes.c_void_p
kv_overrides: CtypesArray[llama_model_kv_override] kv_overrides: CtypesArray[llama_model_kv_override]
@ -700,6 +705,7 @@ class llama_model_params(ctypes.Structure):
("split_mode", ctypes.c_int), ("split_mode", ctypes.c_int),
("main_gpu", ctypes.c_int32), ("main_gpu", ctypes.c_int32),
("tensor_split", ctypes.POINTER(ctypes.c_float)), ("tensor_split", ctypes.POINTER(ctypes.c_float)),
("rpc_servers", ctypes.c_char_p),
("progress_callback", llama_progress_callback), ("progress_callback", llama_progress_callback),
("progress_callback_user_data", ctypes.c_void_p), ("progress_callback_user_data", ctypes.c_void_p),
("kv_overrides", ctypes.POINTER(llama_model_kv_override)), ("kv_overrides", ctypes.POINTER(llama_model_kv_override)),

2
vendor/llama.cpp vendored

@ -1 +1 @@
Subproject commit 1c570d8beeebad95872dc738ea542a4a0022f78a Subproject commit 1265c670fd8e41e1947352c96c5179adda97fb2c