Update llama.cpp
This commit is contained in:
parent
43ba1526c8
commit
50f5c74ecf
2 changed files with 7 additions and 1 deletions
|
@ -648,6 +648,9 @@ class llama_model_kv_override(ctypes.Structure):
|
||||||
# // proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
|
# // proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
|
||||||
# const float * tensor_split;
|
# const float * tensor_split;
|
||||||
|
|
||||||
|
# // comma separated list of RPC servers to use for offloading
|
||||||
|
# const char * rpc_servers;
|
||||||
|
|
||||||
# // Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
|
# // Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
|
||||||
# // If the provided progress_callback returns true, model loading continues.
|
# // If the provided progress_callback returns true, model loading continues.
|
||||||
# // If it returns false, model loading is immediately aborted.
|
# // If it returns false, model loading is immediately aborted.
|
||||||
|
@ -674,6 +677,7 @@ class llama_model_params(ctypes.Structure):
|
||||||
split_mode (int): how to split the model across multiple GPUs
|
split_mode (int): how to split the model across multiple GPUs
|
||||||
main_gpu (int): the GPU that is used for the entire model. main_gpu interpretation depends on split_mode: LLAMA_SPLIT_NONE: the GPU that is used for the entire model LLAMA_SPLIT_ROW: the GPU that is used for small tensors and intermediate results LLAMA_SPLIT_LAYER: ignored
|
main_gpu (int): the GPU that is used for the entire model. main_gpu interpretation depends on split_mode: LLAMA_SPLIT_NONE: the GPU that is used for the entire model LLAMA_SPLIT_ROW: the GPU that is used for small tensors and intermediate results LLAMA_SPLIT_LAYER: ignored
|
||||||
tensor_split (ctypes.Array[ctypes.ctypes.c_float]): proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
|
tensor_split (ctypes.Array[ctypes.ctypes.c_float]): proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
|
||||||
|
rpc_servers (ctypes.c_char_p): comma separated list of RPC servers to use for offloading
|
||||||
progress_callback (llama_progress_callback): called with a progress value between 0.0 and 1.0. Pass NULL to disable. If the provided progress_callback returns true, model loading continues. If it returns false, model loading is immediately aborted.
|
progress_callback (llama_progress_callback): called with a progress value between 0.0 and 1.0. Pass NULL to disable. If the provided progress_callback returns true, model loading continues. If it returns false, model loading is immediately aborted.
|
||||||
progress_callback_user_data (ctypes.ctypes.c_void_p): context pointer passed to the progress callback
|
progress_callback_user_data (ctypes.ctypes.c_void_p): context pointer passed to the progress callback
|
||||||
kv_overrides (ctypes.Array[llama_model_kv_override]): override key-value pairs of the model meta data
|
kv_overrides (ctypes.Array[llama_model_kv_override]): override key-value pairs of the model meta data
|
||||||
|
@ -687,6 +691,7 @@ class llama_model_params(ctypes.Structure):
|
||||||
split_mode: int
|
split_mode: int
|
||||||
main_gpu: int
|
main_gpu: int
|
||||||
tensor_split: CtypesArray[ctypes.c_float]
|
tensor_split: CtypesArray[ctypes.c_float]
|
||||||
|
rpc_servers: ctypes.c_char_p
|
||||||
progress_callback: Callable[[float, ctypes.c_void_p], bool]
|
progress_callback: Callable[[float, ctypes.c_void_p], bool]
|
||||||
progress_callback_user_data: ctypes.c_void_p
|
progress_callback_user_data: ctypes.c_void_p
|
||||||
kv_overrides: CtypesArray[llama_model_kv_override]
|
kv_overrides: CtypesArray[llama_model_kv_override]
|
||||||
|
@ -700,6 +705,7 @@ class llama_model_params(ctypes.Structure):
|
||||||
("split_mode", ctypes.c_int),
|
("split_mode", ctypes.c_int),
|
||||||
("main_gpu", ctypes.c_int32),
|
("main_gpu", ctypes.c_int32),
|
||||||
("tensor_split", ctypes.POINTER(ctypes.c_float)),
|
("tensor_split", ctypes.POINTER(ctypes.c_float)),
|
||||||
|
("rpc_servers", ctypes.c_char_p),
|
||||||
("progress_callback", llama_progress_callback),
|
("progress_callback", llama_progress_callback),
|
||||||
("progress_callback_user_data", ctypes.c_void_p),
|
("progress_callback_user_data", ctypes.c_void_p),
|
||||||
("kv_overrides", ctypes.POINTER(llama_model_kv_override)),
|
("kv_overrides", ctypes.POINTER(llama_model_kv_override)),
|
||||||
|
|
2
vendor/llama.cpp
vendored
2
vendor/llama.cpp
vendored
|
@ -1 +1 @@
|
||||||
Subproject commit 1c570d8beeebad95872dc738ea542a4a0022f78a
|
Subproject commit 1265c670fd8e41e1947352c96c5179adda97fb2c
|
Loading…
Add table
Reference in a new issue