Merge branch 'main' of https://github.com/abetlen/llama-cpp-python into main
This commit is contained in:
commit
3fe8e9a8f3
6 changed files with 11 additions and 4 deletions
|
@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
## [0.2.73]
|
||||||
|
|
||||||
|
- feat: Update llama.cpp to ggerganov/llama.cpp@25c6e82e7a1ad25a42b0894e87d9b5c557409516
|
||||||
|
- fix: Clear kv cache at beginning of image chat formats to avoid bug when image is evaluated first by @abetlen in ac55d0a175115d1e719672ce1cb1bec776c738b1
|
||||||
|
|
||||||
## [0.2.72]
|
## [0.2.72]
|
||||||
|
|
||||||
- fix(security): Remote Code Execution by Server-Side Template Injection in Model Metadata by @retr0reg in b454f40a9a1787b2b5659cd2cb00819d983185df
|
- fix(security): Remote Code Execution by Server-Side Template Injection in Model Metadata by @retr0reg in b454f40a9a1787b2b5659cd2cb00819d983185df
|
||||||
|
|
|
@ -51,8 +51,9 @@ if (LLAMA_BUILD)
|
||||||
)
|
)
|
||||||
|
|
||||||
if (LLAVA_BUILD)
|
if (LLAVA_BUILD)
|
||||||
if (LLAMA_CUBLAS)
|
if (LLAMA_CUBLAS OR LLAMA_CUDA)
|
||||||
add_compile_definitions(GGML_USE_CUBLAS)
|
add_compile_definitions(GGML_USE_CUBLAS)
|
||||||
|
add_compile_definitions(GGML_USE_CUDA)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (LLAMA_METAL)
|
if (LLAMA_METAL)
|
||||||
|
|
2
Makefile
2
Makefile
|
@ -16,7 +16,7 @@ build.debug:
|
||||||
CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Debug" python3 -m pip install --verbose --config-settings=cmake.verbose=true --config-settings=logging.level=INFO --config-settings=install.strip=false --editable .
|
CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Debug" python3 -m pip install --verbose --config-settings=cmake.verbose=true --config-settings=logging.level=INFO --config-settings=install.strip=false --editable .
|
||||||
|
|
||||||
build.cuda:
|
build.cuda:
|
||||||
CMAKE_ARGS="-DLLAMA_CUBLAS=on" python3 -m pip install --verbose -e .
|
CMAKE_ARGS="-DLLAMA_CUDA=on" python3 -m pip install --verbose -e .
|
||||||
|
|
||||||
build.opencl:
|
build.opencl:
|
||||||
CMAKE_ARGS="-DLLAMA_CLBLAST=on" python3 -m pip install --verbose -e .
|
CMAKE_ARGS="-DLLAMA_CLBLAST=on" python3 -m pip install --verbose -e .
|
||||||
|
|
|
@ -550,7 +550,7 @@ llm = Llama.from_pretrained(
|
||||||
n_ctx=2048, # n_ctx should be increased to accommodate the image embedding
|
n_ctx=2048, # n_ctx should be increased to accommodate the image embedding
|
||||||
)
|
)
|
||||||
|
|
||||||
respoonse = llm.create_chat_completion(
|
response = llm.create_chat_completion(
|
||||||
messages = [
|
messages = [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from .llama_cpp import *
|
from .llama_cpp import *
|
||||||
from .llama import *
|
from .llama import *
|
||||||
|
|
||||||
__version__ = "0.2.72"
|
__version__ = "0.2.73"
|
|
@ -2637,6 +2637,7 @@ class Llava15ChatHandler:
|
||||||
|
|
||||||
# Evaluate prompt
|
# Evaluate prompt
|
||||||
llama.reset()
|
llama.reset()
|
||||||
|
llama._ctx.kv_cache_clear()
|
||||||
for type_, value in split_text:
|
for type_, value in split_text:
|
||||||
if type_ == "text":
|
if type_ == "text":
|
||||||
tokens = llama.tokenize(value.encode("utf8"), add_bos=False, special=True)
|
tokens = llama.tokenize(value.encode("utf8"), add_bos=False, special=True)
|
||||||
|
|
Loading…
Reference in a new issue