Update llama.cpp
This commit is contained in:
parent
9ab49bc1d4
commit
4887973c22
4 changed files with 28 additions and 33 deletions
|
@ -445,17 +445,17 @@ class Llama:
|
||||||
"""
|
"""
|
||||||
assert self.model is not None
|
assert self.model is not None
|
||||||
output = b""
|
output = b""
|
||||||
size = 8
|
size = 32
|
||||||
buffer = (ctypes.c_char * size)()
|
buffer = (ctypes.c_char * size)()
|
||||||
for token in tokens:
|
for token in tokens:
|
||||||
n = llama_cpp.llama_token_to_str_with_model(
|
n = llama_cpp.llama_token_to_piece_with_model(
|
||||||
self.model, llama_cpp.llama_token(token), buffer, size
|
self.model, llama_cpp.llama_token(token), buffer, size
|
||||||
)
|
)
|
||||||
assert n <= size
|
assert n <= size
|
||||||
output += bytes(buffer[:n])
|
output += bytes(buffer[:n])
|
||||||
# NOTE: Llama1 models automatically added a space at the start of the prompt
|
# NOTE: Llama1 models automatically added a space at the start of the prompt
|
||||||
# this line removes a leading space if the first token is a beginning of sentence token
|
# this line removes a leading space if the first token is a beginning of sentence token
|
||||||
return output
|
return output[1:] if len(tokens) > 0 and tokens[0] == self.token_bos() else output
|
||||||
|
|
||||||
def set_cache(self, cache: Optional[BaseLlamaCache]):
|
def set_cache(self, cache: Optional[BaseLlamaCache]):
|
||||||
"""Set the cache.
|
"""Set the cache.
|
||||||
|
|
|
@ -973,48 +973,43 @@ _lib.llama_tokenize_with_model.argtypes = [
|
||||||
_lib.llama_tokenize_with_model.restype = c_int
|
_lib.llama_tokenize_with_model.restype = c_int
|
||||||
|
|
||||||
|
|
||||||
# // Token Id -> String. Uses the vocabulary in the provided context
|
# // Token Id -> Piece.
|
||||||
# // Does not write null terminator to the buffer
|
# // Uses the vocabulary in the provided context.
|
||||||
# LLAMA_API int llama_token_to_str(
|
# // Does not write null terminator to the buffer.
|
||||||
|
# // User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.
|
||||||
|
# LLAMA_API int llama_token_to_piece(
|
||||||
# const struct llama_context * ctx,
|
# const struct llama_context * ctx,
|
||||||
# llama_token token,
|
# llama_token token,
|
||||||
# char * buf,
|
# char * buf,
|
||||||
# int length);
|
# int length);
|
||||||
def llama_token_to_str(
|
def llama_token_to_piece(
|
||||||
ctx: llama_context_p, token: llama_token, buf: bytes, length: c_int
|
ctx: llama_context_p, token: llama_token, buf: bytes, length: c_int
|
||||||
) -> int:
|
) -> int:
|
||||||
return _lib.llama_token_to_str(ctx, token, buf, length)
|
return _lib.llama_token_to_piece(ctx, token, buf, length)
|
||||||
|
|
||||||
|
|
||||||
_lib.llama_tokenize_with_model.argtypes = [
|
_lib.llama_token_to_piece.argtypes = [llama_context_p, llama_token, c_char_p, c_int]
|
||||||
llama_model_p,
|
_lib.llama_token_to_piece.restype = c_int
|
||||||
c_char_p,
|
|
||||||
llama_token_p,
|
|
||||||
c_int,
|
|
||||||
c_bool,
|
|
||||||
]
|
|
||||||
_lib.llama_tokenize_with_model.restype = c_int
|
|
||||||
|
|
||||||
|
|
||||||
# LLAMA_API int llama_token_to_str_with_model(
|
# LLAMA_API int llama_token_to_piece_with_model(
|
||||||
# const struct llama_model * model,
|
# const struct llama_model * model,
|
||||||
# llama_token token,
|
# llama_token token,
|
||||||
# char * buf,
|
# char * buf,
|
||||||
# int length);
|
# int length);
|
||||||
def llama_token_to_str_with_model(
|
def llama_token_to_piece_with_model(
|
||||||
model: llama_model_p, token: llama_token, buf: bytes, length: c_int
|
model: llama_model_p, token: llama_token, buf: bytes, length: c_int
|
||||||
) -> int:
|
) -> int:
|
||||||
return _lib.llama_token_to_str_with_model(model, token, buf, length)
|
return _lib.llama_token_to_piece_with_model(model, token, buf, length)
|
||||||
|
|
||||||
|
|
||||||
_lib.llama_token_to_str_with_model.argtypes = [
|
_lib.llama_token_to_piece_with_model.argtypes = [
|
||||||
llama_model_p,
|
llama_model_p,
|
||||||
llama_token,
|
llama_token,
|
||||||
c_char_p,
|
c_char_p,
|
||||||
c_int,
|
c_int,
|
||||||
]
|
]
|
||||||
_lib.llama_token_to_str_with_model.restype = c_int
|
_lib.llama_token_to_piece_with_model.restype = c_int
|
||||||
|
|
||||||
|
|
||||||
# //
|
# //
|
||||||
# // Grammar
|
# // Grammar
|
||||||
|
|
|
@ -14,16 +14,16 @@ def test_llama_cpp_tokenization():
|
||||||
|
|
||||||
tokens = llama.tokenize(text)
|
tokens = llama.tokenize(text)
|
||||||
assert tokens[0] == llama.token_bos()
|
assert tokens[0] == llama.token_bos()
|
||||||
assert tokens == [1, 10994, 2787]
|
assert tokens == [1, 15043, 2787]
|
||||||
detokenized = llama.detokenize(tokens)
|
detokenized = llama.detokenize(tokens)
|
||||||
assert detokenized == text
|
assert detokenized == text
|
||||||
|
|
||||||
tokens = llama.tokenize(text, add_bos=False)
|
tokens = llama.tokenize(text, add_bos=False)
|
||||||
assert tokens[0] != llama.token_bos()
|
assert tokens[0] != llama.token_bos()
|
||||||
assert tokens == [10994, 2787]
|
assert tokens == [15043, 2787]
|
||||||
|
|
||||||
detokenized = llama.detokenize(tokens)
|
detokenized = llama.detokenize(tokens)
|
||||||
assert detokenized == text
|
assert detokenized != text
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="bug in tokenization where leading space is always inserted even if not after eos")
|
@pytest.mark.skip(reason="bug in tokenization where leading space is always inserted even if not after eos")
|
||||||
|
|
2
vendor/llama.cpp
vendored
2
vendor/llama.cpp
vendored
|
@ -1 +1 @@
|
||||||
Subproject commit c1ac54b77aaba10d029084d152be786102010eb2
|
Subproject commit c10704d01e21e3dbe4d6ca1026ebff85349dd239
|
Loading…
Reference in a new issue