From c970d41a85381fd55235136f123422df0bf0c7e7 Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Wed, 24 Jan 2024 10:38:30 -0500
Subject: [PATCH 1/8] fix: llama_log_set should be able to accept null pointer

---
 llama_cpp/llama_cpp.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
index 5de837f..d31a5da 100644
--- a/llama_cpp/llama_cpp.py
+++ b/llama_cpp/llama_cpp.py
@@ -2528,7 +2528,7 @@ _lib.llama_print_system_info.restype = c_char_p
 # // If this is not called, or NULL is supplied, everything is output on stderr.
 # LLAMA_API void llama_log_set(ggml_log_callback log_callback, void * user_data);
 def llama_log_set(
-    log_callback: "ctypes._FuncPointer", user_data: c_void_p  # type: ignore
+    log_callback: Union["ctypes._FuncPointer", c_void_p], user_data: c_void_p  # type: ignore
 ):
     """Set callback for all future logging events.
 
@@ -2536,7 +2536,7 @@ def llama_log_set(
     return _lib.llama_log_set(log_callback, user_data)
 
 
-_lib.llama_log_set.argtypes = [llama_log_callback, c_void_p]
+_lib.llama_log_set.argtypes = [ctypes.c_void_p, c_void_p]
 _lib.llama_log_set.restype = None
 
 

From c343baaba83595fb7c6aa90e6aa8c3a6507d9430 Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Wed, 24 Jan 2024 10:40:50 -0500
Subject: [PATCH 2/8] Update llama.cpp

---
 vendor/llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vendor/llama.cpp b/vendor/llama.cpp
index 26d6076..c9b316c 160000
--- a/vendor/llama.cpp
+++ b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 26d607608d794efa56df3bdb6043a2f94c1d632c
+Subproject commit c9b316c78fba31e65879a2ec91cbafd341b88cce

From 5b258bf840992f20ab6748ada2c2d122247169dd Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Wed, 24 Jan 2024 10:51:15 -0500
Subject: [PATCH 3/8] docs: Update README with more param common examples

---
 README.md | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index f97ea0f..f602b17 100644
--- a/README.md
+++ b/README.md
@@ -104,6 +104,7 @@ CMAKE_ARGS="-DLLAMA_HIPBLAS=on" pip install llama-cpp-python
 ### Windows Notes
 
 If you run into issues where it complains it can't find `'nmake'` `'?'` or CMAKE_C_COMPILER, you can extract w64devkit as [mentioned in llama.cpp repo](https://github.com/ggerganov/llama.cpp#openblas) and add those manually to CMAKE_ARGS before running `pip` install:
+
 ```ps
 $env:CMAKE_GENERATOR = "MinGW Makefiles"
 $env:CMAKE_ARGS = "-DLLAMA_OPENBLAS=on -DCMAKE_C_COMPILER=C:/w64devkit/bin/gcc.exe -DCMAKE_CXX_COMPILER=C:/w64devkit/bin/g++.exe" 
@@ -118,17 +119,19 @@ Detailed MacOS Metal GPU install documentation is available at [docs/install/mac
 #### M1 Mac Performance Issue
 
 Note: If you are using Apple Silicon (M1) Mac, make sure you have installed a version of Python that supports arm64 architecture. For example:
-```
+
+```bash
 wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh
 bash Miniforge3-MacOSX-arm64.sh
 ```
+
 Otherwise, while installing it will build the llama.cpp x86 version which will be 10x slower on Apple Silicon (M1) Mac.
 
 #### M Series Mac Error: `(mach-o file, but is an incompatible architecture (have 'x86_64', need 'arm64'))`
 
 Try installing with
 
-```
+```bash
 CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DLLAMA_METAL=on" pip install --upgrade --verbose --force-reinstall --no-cache-dir llama-cpp-python
 ```
 
@@ -152,7 +155,12 @@ Below is a short example demonstrating how to use the high-level API to for basi
 
 ```python
 >>> from llama_cpp import Llama
->>> llm = Llama(model_path="./models/7B/llama-model.gguf")
+>>> llm = Llama(
+      model_path="./models/7B/llama-model.gguf",
+      # n_gpu_layers=-1, # Uncomment to use GPU acceleration 
+      # seed=1337, # Uncomment to set a specific seed
+      # n_ctx=2048, # Uncomment to increase the context window
+)
 >>> output = llm(
       "Q: Name the planets in the solar system? A: ", # Prompt
       max_tokens=32, # Generate up to 32 tokens
@@ -191,7 +199,10 @@ Note that `chat_format` option must be set for the particular model you are usin
 
 ```python
 >>> from llama_cpp import Llama
->>> llm = Llama(model_path="path/to/llama-2/llama-model.gguf", chat_format="llama-2")
+>>> llm = Llama(
+      model_path="path/to/llama-2/llama-model.gguf",
+      chat_format="llama-2"
+)
 >>> llm.create_chat_completion(
       messages = [
           {"role": "system", "content": "You are an assistant who perfectly describes images."},

From d6fb16e05524eb99a28436e73d7f45b1ca6f8b6d Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Thu, 25 Jan 2024 10:51:48 -0500
Subject: [PATCH 4/8] docs: Update README

---
 README.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f602b17..7813c96 100644
--- a/README.md
+++ b/README.md
@@ -163,7 +163,7 @@ Below is a short example demonstrating how to use the high-level API to for basi
 )
 >>> output = llm(
       "Q: Name the planets in the solar system? A: ", # Prompt
-      max_tokens=32, # Generate up to 32 tokens
+      max_tokens=32, # Generate up to 32 tokens, set to None to generate up to the end of the context window
       stop=["Q:", "\n"], # Stop generating just before the model would generate a new question
       echo=True # Echo the prompt back in the output
 ) # Generate a completion, can also call create_completion
@@ -425,6 +425,9 @@ pip install -e .[all]
 make clean
 ```
 
+You can also test out specific commits of `lama.cpp` by checking out the desired commit in the `vendor/llama.cpp` submodule and then running `make clean` and `pip install -e .` again. Any changes in the `llama.h` API will require
+changes to the `llama_cpp/llama_cpp.py` file to match the new API (additional changes may be required elsewhere).
+
 ## FAQ
 
 ### Are there pre-built binaries / binary wheels available?

From dc5a436224c15c2a985b4e5d43b37d0df2d63b6e Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Thu, 25 Jan 2024 11:19:34 -0500
Subject: [PATCH 5/8] Update llama.cpp

---
 vendor/llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vendor/llama.cpp b/vendor/llama.cpp
index c9b316c..ddc5a50 160000
--- a/vendor/llama.cpp
+++ b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit c9b316c78fba31e65879a2ec91cbafd341b88cce
+Subproject commit ddc5a5033f948dc7ab0a3a6ec2d914d13c274077

From 2588f34a22076525ea62e727fb292d07d20f463a Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Thu, 25 Jan 2024 11:22:42 -0500
Subject: [PATCH 6/8] Update llama.cpp

---
 vendor/llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vendor/llama.cpp b/vendor/llama.cpp
index ddc5a50..faa3526 160000
--- a/vendor/llama.cpp
+++ b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit ddc5a5033f948dc7ab0a3a6ec2d914d13c274077
+Subproject commit faa3526a1eba458120987ed8269e5616385a76f4

From cde7514c3d28e6d52f272614e9957208c344dde5 Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Thu, 25 Jan 2024 11:23:18 -0500
Subject: [PATCH 7/8] feat(server): include llama-cpp-python version in openapi
 spec

---
 llama_cpp/server/app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py
index fed0a6d..368022c 100644
--- a/llama_cpp/server/app.py
+++ b/llama_cpp/server/app.py
@@ -118,7 +118,7 @@ def create_app(
     app = FastAPI(
         middleware=middleware,
         title="🦙 llama.cpp Python API",
-        version="0.0.1",
+        version=llama_cpp.__version__,
     )
     app.add_middleware(
         CORSMiddleware,

From f5cc6b30538c34192227d3c46f08f213dce6443c Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Thu, 25 Jan 2024 11:28:16 -0500
Subject: [PATCH 8/8] Bump version

---
 CHANGELOG.md          | 8 ++++++++
 llama_cpp/__init__.py | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4fff919..8a94ef5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.2.33]
+
+- feat: Update llama.cpp to ggerganov/llama.cpp@faa3526a1eba458120987ed8269e5616385a76f4
+- feat(server): include llama-cpp-python version in openapi spec by @abetlen in cde7514c3d28e6d52f272614e9957208c344dde5
+- fix: use both eos and bos tokens as stop sequences for hf-tokenizer-config chat format. by @abetlen in 5b982d0f8c6f35242c8862ffdce00e17cea0b44f
+- fix: GGUF metadata KV overrides, re #1011 by @phiharri in #1116
+- fix: llama_log_set should be able to accept null pointer by @abetlen in c970d41a85381fd55235136f123422df0bf0c7e7
+
 ## [0.2.32]
 
 - feat: Update llama.cpp to ggerganov/llama.cpp@504dc37be8446fb09b1ede70300250ad41be32a2
diff --git a/llama_cpp/__init__.py b/llama_cpp/__init__.py
index dda8335..55f695e 100644
--- a/llama_cpp/__init__.py
+++ b/llama_cpp/__init__.py
@@ -1,4 +1,4 @@
 from .llama_cpp import *
 from .llama import *
 
-__version__ = "0.2.32"
\ No newline at end of file
+__version__ = "0.2.33"
\ No newline at end of file