From ac55d0a175115d1e719672ce1cb1bec776c738b1 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 10 May 2024 02:38:10 -0400 Subject: [PATCH 1/5] fix: Clear kv cache to avoid kv bug when image is evaluated first --- llama_cpp/llama_chat_format.py | 1 + 1 file changed, 1 insertion(+) diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py index d795b30..e2d7e27 100644 --- a/llama_cpp/llama_chat_format.py +++ b/llama_cpp/llama_chat_format.py @@ -2637,6 +2637,7 @@ class Llava15ChatHandler: # Evaluate prompt llama.reset() + llama._ctx.kv_cache_clear() for type_, value in split_text: if type_ == "text": tokens = llama.tokenize(value.encode("utf8"), add_bos=False, special=True) From eafb6ec5e8b4540e0b58e44b4deaaf592eb5014a Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 10 May 2024 08:39:55 -0400 Subject: [PATCH 2/5] feat: Update llama.cpp --- vendor/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 911b390..25c6e82 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 911b3900dded9a1cfe0f0e41b82c7a29baf3a217 +Subproject commit 25c6e82e7a1ad25a42b0894e87d9b5c557409516 From 73165021bbe888d58ce1adc66a2bcd6a0ecf10dd Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 10 May 2024 09:44:18 -0400 Subject: [PATCH 3/5] chore: Bump version --- CHANGELOG.md | 5 +++++ llama_cpp/__init__.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 578d08d..9f36b44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.2.73] + +- feat: Update llama.cpp to ggerganov/llama.cpp@25c6e82e7a1ad25a42b0894e87d9b5c557409516 +- fix: Clear kv cache at beginning of image chat formats to avoid bug when image is evaluated first by @abetlen in ac55d0a175115d1e719672ce1cb1bec776c738b1 + ## [0.2.72] - fix(security): Remote Code Execution by Server-Side Template Injection in Model Metadata by @retr0reg in b454f40a9a1787b2b5659cd2cb00819d983185df diff --git a/llama_cpp/__init__.py b/llama_cpp/__init__.py index 0165379..d6e8d0b 100644 --- a/llama_cpp/__init__.py +++ b/llama_cpp/__init__.py @@ -1,4 +1,4 @@ from .llama_cpp import * from .llama import * -__version__ = "0.2.72" \ No newline at end of file +__version__ = "0.2.73" \ No newline at end of file From 7f59856fa6f3e23f07e12fc15aeb9359dc6c3bb4 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 10 May 2024 10:18:47 -0400 Subject: [PATCH 4/5] fix: Enable CUDA backend for llava. Closes #1324 --- CMakeLists.txt | 3 ++- Makefile | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 70f9b99..dcaddb6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,8 +51,9 @@ if (LLAMA_BUILD) ) if (LLAVA_BUILD) - if (LLAMA_CUBLAS) + if (LLAMA_CUBLAS OR LLAMA_CUDA) add_compile_definitions(GGML_USE_CUBLAS) + add_compile_definitions(GGML_USE_CUDA) endif() if (LLAMA_METAL) diff --git a/Makefile b/Makefile index 4ae0110..83085d2 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ build.debug: CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Debug" python3 -m pip install --verbose --config-settings=cmake.verbose=true --config-settings=logging.level=INFO --config-settings=install.strip=false --editable . build.cuda: - CMAKE_ARGS="-DLLAMA_CUBLAS=on" python3 -m pip install --verbose -e . + CMAKE_ARGS="-DLLAMA_CUDA=on" python3 -m pip install --verbose -e . build.opencl: CMAKE_ARGS="-DLLAMA_CLBLAST=on" python3 -m pip install --verbose -e . From 1547202b77c0932e36b6b3b8319c54d62ac2d127 Mon Sep 17 00:00:00 2001 From: Peng Yu Date: Fri, 10 May 2024 10:35:51 -0400 Subject: [PATCH 5/5] docs: Fix typo in README.md (#1444) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5f3ff68..d5ced86 100644 --- a/README.md +++ b/README.md @@ -550,7 +550,7 @@ llm = Llama.from_pretrained( n_ctx=2048, # n_ctx should be increased to accommodate the image embedding ) -respoonse = llm.create_chat_completion( +response = llm.create_chat_completion( messages = [ { "role": "user",