From b83728ad1e9f10d4a642a7f011772be6389680b0 Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Fri, 21 Jul 2023 12:33:27 -0400
Subject: [PATCH 1/2] Update llama.cpp

---
 llama_cpp/llama_cpp.py | 12 ++++--------
 vendor/llama.cpp       |  2 +-
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
index c2c4ed1..eea26ac 100644
--- a/llama_cpp/llama_cpp.py
+++ b/llama_cpp/llama_cpp.py
@@ -164,7 +164,7 @@ llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p)
 #     int32_t  n_batch;                      // prompt processing batch size
 #     int32_t  n_gpu_layers;                 // number of layers to store in VRAM
 #     int32_t  main_gpu;                     // the GPU that is used for scratch and small tensors
-#     float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
+#     const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)
 
 #     // ref: https://github.com/ggerganov/llama.cpp/pull/2054
 #     float    rope_freq_base;  // RoPE base frequency
@@ -192,7 +192,7 @@ class llama_context_params(Structure):
         ("n_batch", c_int32),
         ("n_gpu_layers", c_int32),
         ("main_gpu", c_int32),
-        ("tensor_split", c_float * LLAMA_MAX_DEVICES.value),
+        ("tensor_split", POINTER(c_float)),
         ("rope_freq_base", c_float),
         ("rope_freq_scale", c_float),
         ("progress_callback", llama_progress_callback),
@@ -933,22 +933,19 @@ _lib.llama_sample_frequency_and_presence_penalties.restype = None
 # /// @param candidates A vector of `llama_token_data` containing the candidate tokens, the logits must be directly extracted from the original generation context without being sorted.
 # /// @params guidance_ctx A separate context from the same model. Other than a negative prompt at the beginning, it should have all generated and user input tokens copied from the main context.
 # /// @params scale Guidance strength. 1.0f means no guidance. Higher values mean stronger guidance.
-# /// @params smooth_factor Smooth factor between guidance logits and original logits. 1.0f means only use guidance logits. 0.0f means only original logits.
 # LLAMA_API void llama_sample_classifier_free_guidance(
 #             struct llama_context * ctx,
 #         llama_token_data_array * candidates,
 #             struct llama_context * guidance_ctx,
-#                             float   scale,
-#                             float   smooth_factor);
+#                             float   scale);
 def llama_sample_classifier_free_guidance(
     ctx: llama_context_p,
     candidates,  # type: _Pointer[llama_token_data_array]
     guidance_ctx: llama_context_p,
     scale: c_float,
-    smooth_factor: c_float,
 ):
     return _lib.llama_sample_classifier_free_guidance(
-        ctx, candidates, guidance_ctx, scale, smooth_factor
+        ctx, candidates, guidance_ctx, scale
     )
 
 
@@ -957,7 +954,6 @@ _lib.llama_sample_classifier_free_guidance.argtypes = [
     llama_token_data_array_p,
     llama_context_p,
     c_float,
-    c_float,
 ]
 _lib.llama_sample_classifier_free_guidance.restype = None
 
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
index e782c9e..d924522 160000
--- a/vendor/llama.cpp
+++ b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit e782c9e735f93ab4767ffc37462c523b73a17ddc
+Subproject commit d924522a46c5ef097af4a88087d91673e8e87e4d

From 231123ee1e718bcccb022ade87f802e5b0466228 Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Fri, 21 Jul 2023 12:41:59 -0400
Subject: [PATCH 2/2] Update llama.cpp

---
 CHANGELOG.md   | 4 ++++
 pyproject.toml | 2 +-
 setup.py       | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8196491..360b8e8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.1.75]
+
+- Update llama.cpp
+
 ## [0.1.74]
 
 ### Added
diff --git a/pyproject.toml b/pyproject.toml
index 64bb35c..02273b9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "llama_cpp_python"
-version = "0.1.74"
+version = "0.1.75"
 description = "Python bindings for the llama.cpp library"
 authors = ["Andrei Betlen <abetlen@gmail.com>"]
 license = "MIT"
diff --git a/setup.py b/setup.py
index 9182e65..48836df 100644
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@ setup(
     description="A Python wrapper for llama.cpp",
     long_description=long_description,
     long_description_content_type="text/markdown",
-    version="0.1.74",
+    version="0.1.75",
     author="Andrei Betlen",
     author_email="abetlen@gmail.com",
     license="MIT",