Use _with_model variants for tokenization

This commit is contained in:
Andrei Betlen 2023-08-25 13:43:16 -04:00
parent 80389f71da
commit 48cf43b427

View file

@ -408,11 +408,11 @@ class Llama:
Returns: Returns:
A list of tokens. A list of tokens.
""" """
assert self.ctx is not None assert self.model is not None
n_ctx = self._n_ctx n_ctx = self._n_ctx
tokens = (llama_cpp.llama_token * n_ctx)() tokens = (llama_cpp.llama_token * n_ctx)()
n_tokens = llama_cpp.llama_tokenize( n_tokens = llama_cpp.llama_tokenize_with_model(
self.ctx, self.model,
text, text,
tokens, tokens,
llama_cpp.c_int(n_ctx), llama_cpp.c_int(n_ctx),
@ -421,8 +421,8 @@ class Llama:
if n_tokens < 0: if n_tokens < 0:
n_tokens = abs(n_tokens) n_tokens = abs(n_tokens)
tokens = (llama_cpp.llama_token * n_tokens)() tokens = (llama_cpp.llama_token * n_tokens)()
n_tokens = llama_cpp.llama_tokenize( n_tokens = llama_cpp.llama_tokenize_with_model(
self.ctx, self.model,
text, text,
tokens, tokens,
llama_cpp.c_int(n_tokens), llama_cpp.c_int(n_tokens),
@ -443,15 +443,15 @@ class Llama:
Returns: Returns:
The detokenized string. The detokenized string.
""" """
assert self.ctx is not None assert self.model is not None
output = b"" output = b""
buffer_size = 8 size = 8
buffer = (ctypes.c_char * buffer_size)() buffer = (ctypes.c_char * size)()
for token in tokens: for token in tokens:
n = llama_cpp.llama_token_to_str( n = llama_cpp.llama_token_to_str_with_model(
self.ctx, llama_cpp.llama_token(token), buffer, buffer_size self.model, llama_cpp.llama_token(token), buffer, size
) )
assert n <= buffer_size assert n <= size
output += bytes(buffer[:n]) output += bytes(buffer[:n])
# NOTE: Llama1 models automatically added a space at the start of the prompt # NOTE: Llama1 models automatically added a space at the start of the prompt
# this line removes a leading space if the first token is a beginning of sentence token # this line removes a leading space if the first token is a beginning of sentence token