From b83728ad1e9f10d4a642a7f011772be6389680b0 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 21 Jul 2023 12:33:27 -0400 Subject: [PATCH 1/2] Update llama.cpp --- llama_cpp/llama_cpp.py | 12 ++++-------- vendor/llama.cpp | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index c2c4ed1..eea26ac 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -164,7 +164,7 @@ llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p) # int32_t n_batch; // prompt processing batch size # int32_t n_gpu_layers; // number of layers to store in VRAM # int32_t main_gpu; // the GPU that is used for scratch and small tensors -# float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs +# const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES) # // ref: https://github.com/ggerganov/llama.cpp/pull/2054 # float rope_freq_base; // RoPE base frequency @@ -192,7 +192,7 @@ class llama_context_params(Structure): ("n_batch", c_int32), ("n_gpu_layers", c_int32), ("main_gpu", c_int32), - ("tensor_split", c_float * LLAMA_MAX_DEVICES.value), + ("tensor_split", POINTER(c_float)), ("rope_freq_base", c_float), ("rope_freq_scale", c_float), ("progress_callback", llama_progress_callback), @@ -933,22 +933,19 @@ _lib.llama_sample_frequency_and_presence_penalties.restype = None # /// @param candidates A vector of `llama_token_data` containing the candidate tokens, the logits must be directly extracted from the original generation context without being sorted. # /// @params guidance_ctx A separate context from the same model. Other than a negative prompt at the beginning, it should have all generated and user input tokens copied from the main context. # /// @params scale Guidance strength. 1.0f means no guidance. Higher values mean stronger guidance. -# /// @params smooth_factor Smooth factor between guidance logits and original logits. 1.0f means only use guidance logits. 0.0f means only original logits. # LLAMA_API void llama_sample_classifier_free_guidance( # struct llama_context * ctx, # llama_token_data_array * candidates, # struct llama_context * guidance_ctx, -# float scale, -# float smooth_factor); +# float scale); def llama_sample_classifier_free_guidance( ctx: llama_context_p, candidates, # type: _Pointer[llama_token_data_array] guidance_ctx: llama_context_p, scale: c_float, - smooth_factor: c_float, ): return _lib.llama_sample_classifier_free_guidance( - ctx, candidates, guidance_ctx, scale, smooth_factor + ctx, candidates, guidance_ctx, scale ) @@ -957,7 +954,6 @@ _lib.llama_sample_classifier_free_guidance.argtypes = [ llama_token_data_array_p, llama_context_p, c_float, - c_float, ] _lib.llama_sample_classifier_free_guidance.restype = None diff --git a/vendor/llama.cpp b/vendor/llama.cpp index e782c9e..d924522 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit e782c9e735f93ab4767ffc37462c523b73a17ddc +Subproject commit d924522a46c5ef097af4a88087d91673e8e87e4d From 231123ee1e718bcccb022ade87f802e5b0466228 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 21 Jul 2023 12:41:59 -0400 Subject: [PATCH 2/2] Update llama.cpp --- CHANGELOG.md | 4 ++++ pyproject.toml | 2 +- setup.py | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8196491..360b8e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.75] + +- Update llama.cpp + ## [0.1.74] ### Added diff --git a/pyproject.toml b/pyproject.toml index 64bb35c..02273b9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "llama_cpp_python" -version = "0.1.74" +version = "0.1.75" description = "Python bindings for the llama.cpp library" authors = ["Andrei Betlen "] license = "MIT" diff --git a/setup.py b/setup.py index 9182e65..48836df 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( description="A Python wrapper for llama.cpp", long_description=long_description, long_description_content_type="text/markdown", - version="0.1.74", + version="0.1.75", author="Andrei Betlen", author_email="abetlen@gmail.com", license="MIT",