feat: Update llama.cpp
This commit is contained in:
parent
893a27a736
commit
159cc4e5d9
3 changed files with 33 additions and 11 deletions
|
@ -181,20 +181,20 @@ class _LlamaModel:
|
|||
)
|
||||
return list(tokens[:n_tokens])
|
||||
|
||||
def token_to_piece(self, token: int) -> bytes:
|
||||
def token_to_piece(self, token: int, special: bool = False) -> bytes:
|
||||
assert self.model is not None
|
||||
buf = ctypes.create_string_buffer(32)
|
||||
llama_cpp.llama_token_to_piece(self.model, token, buf, 32)
|
||||
llama_cpp.llama_token_to_piece(self.model, token, buf, 32, special)
|
||||
return bytes(buf)
|
||||
|
||||
def detokenize(self, tokens: List[int]) -> bytes:
|
||||
def detokenize(self, tokens: List[int], special: bool = False) -> bytes:
|
||||
assert self.model is not None
|
||||
output = b""
|
||||
size = 32
|
||||
buffer = (ctypes.c_char * size)()
|
||||
for token in tokens:
|
||||
n = llama_cpp.llama_token_to_piece(
|
||||
self.model, llama_cpp.llama_token(token), buffer, size
|
||||
self.model, llama_cpp.llama_token(token), buffer, size, special
|
||||
)
|
||||
assert n <= size
|
||||
output += bytes(buffer[:n])
|
||||
|
@ -597,13 +597,13 @@ def _tokenize(model: _LlamaModel, text: str, add_bos: bool, special: bool) -> li
|
|||
return list(result)
|
||||
|
||||
|
||||
def _token_to_piece(model: _LlamaModel, token: int) -> str:
|
||||
def _token_to_piece(model: _LlamaModel, token: int, special: bool = False) -> str:
|
||||
assert model.model is not None
|
||||
result = (ctypes.c_char * 8)(0)
|
||||
n_tokens = llama_cpp.llama_token_to_piece(model.model, token, result, len(result))
|
||||
n_tokens = llama_cpp.llama_token_to_piece(model.model, token, result, len(result), special)
|
||||
if n_tokens < 0:
|
||||
result = (ctypes.c_char * -n_tokens)(0)
|
||||
check = llama_cpp.llama_token_to_piece(model.model, token, result, len(result))
|
||||
check = llama_cpp.llama_token_to_piece(model.model, token, result, len(result), special)
|
||||
if check != -n_tokens:
|
||||
raise RuntimeError(f"Failed to get piece: token={token}")
|
||||
else:
|
||||
|
|
|
@ -2380,6 +2380,18 @@ def llama_token_get_type(
|
|||
...
|
||||
|
||||
|
||||
# // Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)
|
||||
# LLAMA_API bool llama_token_is_eog(const struct llama_model * model, llama_token token);
|
||||
@ctypes_function(
|
||||
"llama_token_is_eog", [llama_model_p_ctypes, llama_token], ctypes.c_bool
|
||||
)
|
||||
def llama_token_is_eog(
|
||||
model: llama_model_p, token: Union[llama_token, int], /
|
||||
) -> bool:
|
||||
"""Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)"""
|
||||
...
|
||||
|
||||
|
||||
# // Special tokens
|
||||
|
||||
|
||||
|
@ -2434,7 +2446,7 @@ def llama_add_eos_token(model: llama_model_p, /) -> int:
|
|||
...
|
||||
|
||||
|
||||
# // codellama infill tokens
|
||||
# // Codellama infill tokens
|
||||
# LLAMA_API llama_token llama_token_prefix(const struct llama_model * model); // Beginning of infill prefix
|
||||
@ctypes_function("llama_token_prefix", [llama_model_p_ctypes], llama_token)
|
||||
def llama_token_prefix(model: llama_model_p) -> int:
|
||||
|
@ -2524,11 +2536,13 @@ def llama_tokenize(
|
|||
# // Uses the vocabulary in the provided context.
|
||||
# // Does not write null terminator to the buffer.
|
||||
# // User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.
|
||||
# // @param special If true, special tokens are rendered in the output.
|
||||
# LLAMA_API int32_t llama_token_to_piece(
|
||||
# const struct llama_model * model,
|
||||
# llama_token token,
|
||||
# char * buf,
|
||||
# int32_t length);
|
||||
# int32_t length,
|
||||
# bool special);
|
||||
@ctypes_function(
|
||||
"llama_token_to_piece",
|
||||
[
|
||||
|
@ -2536,6 +2550,7 @@ def llama_tokenize(
|
|||
llama_token,
|
||||
ctypes.c_char_p,
|
||||
ctypes.c_int32,
|
||||
ctypes.c_bool,
|
||||
],
|
||||
ctypes.c_int32,
|
||||
)
|
||||
|
@ -2544,13 +2559,20 @@ def llama_token_to_piece(
|
|||
token: Union[llama_token, int],
|
||||
buf: Union[ctypes.c_char_p, bytes, CtypesArray[ctypes.c_char]],
|
||||
length: Union[ctypes.c_int, int],
|
||||
special: Union[ctypes.c_bool, bool],
|
||||
/,
|
||||
) -> int:
|
||||
"""Token Id -> Piece.
|
||||
Uses the vocabulary in the provided context.
|
||||
Does not write null terminator to the buffer.
|
||||
User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.
|
||||
"""
|
||||
|
||||
Args:
|
||||
model: The model to use for tokenization.
|
||||
token: The token to convert.
|
||||
buf: The buffer to write the token to.
|
||||
length: The length of the buffer.
|
||||
special: If true, special tokens are rendered in the output."""
|
||||
...
|
||||
|
||||
|
||||
|
|
2
vendor/llama.cpp
vendored
2
vendor/llama.cpp
vendored
|
@ -1 +1 @@
|
|||
Subproject commit 3b8f1ec4b18770531d0b1d792f3edf08254e4f0c
|
||||
Subproject commit 5cf5e7d490dfdd2e70bface2d35dfd14aa44b4fb
|
Loading…
Reference in a new issue