feat: Update llama.cpp
This commit is contained in:
parent
893a27a736
commit
159cc4e5d9
3 changed files with 33 additions and 11 deletions
|
@ -181,20 +181,20 @@ class _LlamaModel:
|
||||||
)
|
)
|
||||||
return list(tokens[:n_tokens])
|
return list(tokens[:n_tokens])
|
||||||
|
|
||||||
def token_to_piece(self, token: int) -> bytes:
|
def token_to_piece(self, token: int, special: bool = False) -> bytes:
|
||||||
assert self.model is not None
|
assert self.model is not None
|
||||||
buf = ctypes.create_string_buffer(32)
|
buf = ctypes.create_string_buffer(32)
|
||||||
llama_cpp.llama_token_to_piece(self.model, token, buf, 32)
|
llama_cpp.llama_token_to_piece(self.model, token, buf, 32, special)
|
||||||
return bytes(buf)
|
return bytes(buf)
|
||||||
|
|
||||||
def detokenize(self, tokens: List[int]) -> bytes:
|
def detokenize(self, tokens: List[int], special: bool = False) -> bytes:
|
||||||
assert self.model is not None
|
assert self.model is not None
|
||||||
output = b""
|
output = b""
|
||||||
size = 32
|
size = 32
|
||||||
buffer = (ctypes.c_char * size)()
|
buffer = (ctypes.c_char * size)()
|
||||||
for token in tokens:
|
for token in tokens:
|
||||||
n = llama_cpp.llama_token_to_piece(
|
n = llama_cpp.llama_token_to_piece(
|
||||||
self.model, llama_cpp.llama_token(token), buffer, size
|
self.model, llama_cpp.llama_token(token), buffer, size, special
|
||||||
)
|
)
|
||||||
assert n <= size
|
assert n <= size
|
||||||
output += bytes(buffer[:n])
|
output += bytes(buffer[:n])
|
||||||
|
@ -597,13 +597,13 @@ def _tokenize(model: _LlamaModel, text: str, add_bos: bool, special: bool) -> li
|
||||||
return list(result)
|
return list(result)
|
||||||
|
|
||||||
|
|
||||||
def _token_to_piece(model: _LlamaModel, token: int) -> str:
|
def _token_to_piece(model: _LlamaModel, token: int, special: bool = False) -> str:
|
||||||
assert model.model is not None
|
assert model.model is not None
|
||||||
result = (ctypes.c_char * 8)(0)
|
result = (ctypes.c_char * 8)(0)
|
||||||
n_tokens = llama_cpp.llama_token_to_piece(model.model, token, result, len(result))
|
n_tokens = llama_cpp.llama_token_to_piece(model.model, token, result, len(result), special)
|
||||||
if n_tokens < 0:
|
if n_tokens < 0:
|
||||||
result = (ctypes.c_char * -n_tokens)(0)
|
result = (ctypes.c_char * -n_tokens)(0)
|
||||||
check = llama_cpp.llama_token_to_piece(model.model, token, result, len(result))
|
check = llama_cpp.llama_token_to_piece(model.model, token, result, len(result), special)
|
||||||
if check != -n_tokens:
|
if check != -n_tokens:
|
||||||
raise RuntimeError(f"Failed to get piece: token={token}")
|
raise RuntimeError(f"Failed to get piece: token={token}")
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -2380,6 +2380,18 @@ def llama_token_get_type(
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
|
# // Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)
|
||||||
|
# LLAMA_API bool llama_token_is_eog(const struct llama_model * model, llama_token token);
|
||||||
|
@ctypes_function(
|
||||||
|
"llama_token_is_eog", [llama_model_p_ctypes, llama_token], ctypes.c_bool
|
||||||
|
)
|
||||||
|
def llama_token_is_eog(
|
||||||
|
model: llama_model_p, token: Union[llama_token, int], /
|
||||||
|
) -> bool:
|
||||||
|
"""Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)"""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
# // Special tokens
|
# // Special tokens
|
||||||
|
|
||||||
|
|
||||||
|
@ -2434,7 +2446,7 @@ def llama_add_eos_token(model: llama_model_p, /) -> int:
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
# // codellama infill tokens
|
# // Codellama infill tokens
|
||||||
# LLAMA_API llama_token llama_token_prefix(const struct llama_model * model); // Beginning of infill prefix
|
# LLAMA_API llama_token llama_token_prefix(const struct llama_model * model); // Beginning of infill prefix
|
||||||
@ctypes_function("llama_token_prefix", [llama_model_p_ctypes], llama_token)
|
@ctypes_function("llama_token_prefix", [llama_model_p_ctypes], llama_token)
|
||||||
def llama_token_prefix(model: llama_model_p) -> int:
|
def llama_token_prefix(model: llama_model_p) -> int:
|
||||||
|
@ -2524,11 +2536,13 @@ def llama_tokenize(
|
||||||
# // Uses the vocabulary in the provided context.
|
# // Uses the vocabulary in the provided context.
|
||||||
# // Does not write null terminator to the buffer.
|
# // Does not write null terminator to the buffer.
|
||||||
# // User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.
|
# // User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.
|
||||||
|
# // @param special If true, special tokens are rendered in the output.
|
||||||
# LLAMA_API int32_t llama_token_to_piece(
|
# LLAMA_API int32_t llama_token_to_piece(
|
||||||
# const struct llama_model * model,
|
# const struct llama_model * model,
|
||||||
# llama_token token,
|
# llama_token token,
|
||||||
# char * buf,
|
# char * buf,
|
||||||
# int32_t length);
|
# int32_t length,
|
||||||
|
# bool special);
|
||||||
@ctypes_function(
|
@ctypes_function(
|
||||||
"llama_token_to_piece",
|
"llama_token_to_piece",
|
||||||
[
|
[
|
||||||
|
@ -2536,6 +2550,7 @@ def llama_tokenize(
|
||||||
llama_token,
|
llama_token,
|
||||||
ctypes.c_char_p,
|
ctypes.c_char_p,
|
||||||
ctypes.c_int32,
|
ctypes.c_int32,
|
||||||
|
ctypes.c_bool,
|
||||||
],
|
],
|
||||||
ctypes.c_int32,
|
ctypes.c_int32,
|
||||||
)
|
)
|
||||||
|
@ -2544,13 +2559,20 @@ def llama_token_to_piece(
|
||||||
token: Union[llama_token, int],
|
token: Union[llama_token, int],
|
||||||
buf: Union[ctypes.c_char_p, bytes, CtypesArray[ctypes.c_char]],
|
buf: Union[ctypes.c_char_p, bytes, CtypesArray[ctypes.c_char]],
|
||||||
length: Union[ctypes.c_int, int],
|
length: Union[ctypes.c_int, int],
|
||||||
|
special: Union[ctypes.c_bool, bool],
|
||||||
/,
|
/,
|
||||||
) -> int:
|
) -> int:
|
||||||
"""Token Id -> Piece.
|
"""Token Id -> Piece.
|
||||||
Uses the vocabulary in the provided context.
|
Uses the vocabulary in the provided context.
|
||||||
Does not write null terminator to the buffer.
|
Does not write null terminator to the buffer.
|
||||||
User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.
|
User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.
|
||||||
"""
|
|
||||||
|
Args:
|
||||||
|
model: The model to use for tokenization.
|
||||||
|
token: The token to convert.
|
||||||
|
buf: The buffer to write the token to.
|
||||||
|
length: The length of the buffer.
|
||||||
|
special: If true, special tokens are rendered in the output."""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
|
|
2
vendor/llama.cpp
vendored
2
vendor/llama.cpp
vendored
|
@ -1 +1 @@
|
||||||
Subproject commit 3b8f1ec4b18770531d0b1d792f3edf08254e4f0c
|
Subproject commit 5cf5e7d490dfdd2e70bface2d35dfd14aa44b4fb
|
Loading…
Add table
Reference in a new issue