From 4887973c2257437166d73cd2f34eb1fafcfca2e9 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Sun, 27 Aug 2023 12:59:20 -0400 Subject: [PATCH] Update llama.cpp --- llama_cpp/llama.py | 6 +++--- llama_cpp/llama_cpp.py | 47 +++++++++++++++++++----------------------- tests/test_llama.py | 6 +++--- vendor/llama.cpp | 2 +- 4 files changed, 28 insertions(+), 33 deletions(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index 49c98fd..22625d8 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -445,17 +445,17 @@ class Llama: """ assert self.model is not None output = b"" - size = 8 + size = 32 buffer = (ctypes.c_char * size)() for token in tokens: - n = llama_cpp.llama_token_to_str_with_model( + n = llama_cpp.llama_token_to_piece_with_model( self.model, llama_cpp.llama_token(token), buffer, size ) assert n <= size output += bytes(buffer[:n]) # NOTE: Llama1 models automatically added a space at the start of the prompt # this line removes a leading space if the first token is a beginning of sentence token - return output + return output[1:] if len(tokens) > 0 and tokens[0] == self.token_bos() else output def set_cache(self, cache: Optional[BaseLlamaCache]): """Set the cache. diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index 1731878..8cb442d 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -973,48 +973,43 @@ _lib.llama_tokenize_with_model.argtypes = [ _lib.llama_tokenize_with_model.restype = c_int -# // Token Id -> String. Uses the vocabulary in the provided context -# // Does not write null terminator to the buffer -# LLAMA_API int llama_token_to_str( +# // Token Id -> Piece. +# // Uses the vocabulary in the provided context. +# // Does not write null terminator to the buffer. +# // User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens. +# LLAMA_API int llama_token_to_piece( # const struct llama_context * ctx, -# llama_token token, -# char * buf, -# int length); -def llama_token_to_str( +# llama_token token, +# char * buf, +# int length); +def llama_token_to_piece( ctx: llama_context_p, token: llama_token, buf: bytes, length: c_int ) -> int: - return _lib.llama_token_to_str(ctx, token, buf, length) + return _lib.llama_token_to_piece(ctx, token, buf, length) -_lib.llama_tokenize_with_model.argtypes = [ - llama_model_p, - c_char_p, - llama_token_p, - c_int, - c_bool, -] -_lib.llama_tokenize_with_model.restype = c_int +_lib.llama_token_to_piece.argtypes = [llama_context_p, llama_token, c_char_p, c_int] +_lib.llama_token_to_piece.restype = c_int -# LLAMA_API int llama_token_to_str_with_model( -# const struct llama_model * model, -# llama_token token, -# char * buf, -# int length); -def llama_token_to_str_with_model( +# LLAMA_API int llama_token_to_piece_with_model( +# const struct llama_model * model, +# llama_token token, +# char * buf, +# int length); +def llama_token_to_piece_with_model( model: llama_model_p, token: llama_token, buf: bytes, length: c_int ) -> int: - return _lib.llama_token_to_str_with_model(model, token, buf, length) + return _lib.llama_token_to_piece_with_model(model, token, buf, length) -_lib.llama_token_to_str_with_model.argtypes = [ +_lib.llama_token_to_piece_with_model.argtypes = [ llama_model_p, llama_token, c_char_p, c_int, ] -_lib.llama_token_to_str_with_model.restype = c_int - +_lib.llama_token_to_piece_with_model.restype = c_int # // # // Grammar diff --git a/tests/test_llama.py b/tests/test_llama.py index e038a89..c240122 100644 --- a/tests/test_llama.py +++ b/tests/test_llama.py @@ -14,16 +14,16 @@ def test_llama_cpp_tokenization(): tokens = llama.tokenize(text) assert tokens[0] == llama.token_bos() - assert tokens == [1, 10994, 2787] + assert tokens == [1, 15043, 2787] detokenized = llama.detokenize(tokens) assert detokenized == text tokens = llama.tokenize(text, add_bos=False) assert tokens[0] != llama.token_bos() - assert tokens == [10994, 2787] + assert tokens == [15043, 2787] detokenized = llama.detokenize(tokens) - assert detokenized == text + assert detokenized != text @pytest.mark.skip(reason="bug in tokenization where leading space is always inserted even if not after eos") diff --git a/vendor/llama.cpp b/vendor/llama.cpp index c1ac54b..c10704d 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit c1ac54b77aaba10d029084d152be786102010eb2 +Subproject commit c10704d01e21e3dbe4d6ca1026ebff85349dd239