Also ignore errors on input prompts
This commit is contained in:
parent
3c130f00ca
commit
5f81400fcb
3 changed files with 5 additions and 5 deletions
|
@ -201,7 +201,7 @@ n_keep = {self.params.n_keep}
|
|||
# tokenize a prompt
|
||||
def _tokenize(self, prompt, bos=True):
|
||||
_arr = (llama_cpp.llama_token * (len(prompt) + 1))()
|
||||
_n = llama_cpp.llama_tokenize(self.ctx, prompt.encode("utf8"), _arr, len(_arr), bos)
|
||||
_n = llama_cpp.llama_tokenize(self.ctx, prompt.encode("utf8", errors="ignore"), _arr, len(_arr), bos)
|
||||
return _arr[:_n]
|
||||
|
||||
def set_color(self, c):
|
||||
|
|
|
@ -358,7 +358,7 @@ class Llama:
|
|||
if self.verbose:
|
||||
llama_cpp.llama_reset_timings(self.ctx)
|
||||
|
||||
tokens = self.tokenize(input.encode("utf-8"))
|
||||
tokens = self.tokenize(input.encode("utf-8", errors="ignore"))
|
||||
self.reset()
|
||||
self.eval(tokens)
|
||||
n_tokens = len(tokens)
|
||||
|
@ -416,7 +416,7 @@ class Llama:
|
|||
completion_tokens: List[llama_cpp.llama_token] = []
|
||||
# Add blank space to start of prompt to match OG llama tokenizer
|
||||
prompt_tokens: List[llama_cpp.llama_token] = self.tokenize(
|
||||
b" " + prompt.encode("utf-8")
|
||||
b" " + prompt.encode("utf-8", errors="ignore")
|
||||
)
|
||||
text: bytes = b""
|
||||
returned_characters: int = 0
|
||||
|
@ -431,7 +431,7 @@ class Llama:
|
|||
)
|
||||
|
||||
if stop != []:
|
||||
stop_sequences = [s.encode("utf-8") for s in stop]
|
||||
stop_sequences = [s.encode("utf-8", errors="ignore") for s in stop]
|
||||
else:
|
||||
stop_sequences = []
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ def test_llama_patch(monkeypatch):
|
|||
monkeypatch.setattr("llama_cpp.llama_cpp.llama_eval", mock_eval)
|
||||
|
||||
output_text = " jumps over the lazy dog."
|
||||
output_tokens = llama.tokenize(output_text.encode("utf-8"))
|
||||
output_tokens = llama.tokenize(output_text.encode("utf-8", errors="ignore"))
|
||||
token_eos = llama.token_eos()
|
||||
n = 0
|
||||
|
||||
|
|
Loading…
Reference in a new issue