Remove usage of eval_tokens for cache check
This commit is contained in:
parent
fe331ec589
commit
7fc7bc30e7
1 changed files with 2 additions and 2 deletions
|
@ -735,10 +735,10 @@ class Llama:
|
|||
try:
|
||||
cache_item = self.cache[prompt_tokens]
|
||||
cache_prefix_len = Llama.longest_token_prefix(
|
||||
cache_item.eval_tokens, prompt_tokens
|
||||
cache_item.input_ids.tolist(), prompt_tokens
|
||||
)
|
||||
eval_prefix_len = Llama.longest_token_prefix(
|
||||
self.eval_tokens, prompt_tokens
|
||||
self._input_ids.tolist(), prompt_tokens
|
||||
)
|
||||
if cache_prefix_len > eval_prefix_len:
|
||||
self.load_state(cache_item)
|
||||
|
|
Loading…
Reference in a new issue