From 64737963433708ca038c26be9675f6b4aba294de Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 22 Dec 2023 14:10:34 -0500 Subject: [PATCH] Update llama.cpp --- llama_cpp/llama_cpp.py | 10 ++++++---- vendor/llama.cpp | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index 56243b1..3732b58 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -252,8 +252,8 @@ class llama_token_data_array(Structure): llama_token_data_array_p = POINTER(llama_token_data_array) -# typedef void (*llama_progress_callback)(float progress, void *ctx); -llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p) +# typedef bool (*llama_progress_callback)(float progress, void *ctx); +llama_progress_callback = ctypes.CFUNCTYPE(c_bool, c_float, c_void_p) # // Input data for llama_decode @@ -347,7 +347,9 @@ class llama_model_kv_override(Structure): # int32_t main_gpu; // the GPU that is used for scratch and small tensors # const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES) -# // called with a progress value between 0 and 1, pass NULL to disable +# // Called with a progress value between 0.0 and 1.0. Pass NULL to disable. +# // If the provided progress_callback returns true, model loading continues. +# // If it returns false, model loading is immediately aborted. # llama_progress_callback progress_callback; # // context pointer passed to the progress callback # void * progress_callback_user_data; @@ -367,7 +369,7 @@ class llama_model_params(Structure): n_gpu_layers (int): number of layers to store in VRAM main_gpu (int): the GPU that is used for scratch and small tensors tensor_split (ctypes.Array[ctypes.c_float]): how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES) - progress_callback (llama_progress_callback): called with a progress value between 0 and 1, pass NULL to disable + progress_callback (llama_progress_callback): called with a progress value between 0.0 and 1.0. Pass NULL to disable. If the provided progress_callback returns true, model loading continues. If it returns false, model loading is immediately aborted. progress_callback_user_data (ctypes.c_void_p): context pointer passed to the progress callback kv_overrides (ctypes.Array[llama_model_kv_override]): override key-value pairs of the model meta data vocab_only (bool): only load the vocabulary, no weights diff --git a/vendor/llama.cpp b/vendor/llama.cpp index afefa31..7082d24 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit afefa319f1f59b002dfa0d1ef407a2c74bd9770b +Subproject commit 7082d24cec35e9ce9147535a2224dfc67ee0a78c