Remove usage of eval_tokens for cache check

This commit is contained in:
Andrei Betlen 2023-05-26 20:12:05 -04:00
parent fe331ec589
commit 7fc7bc30e7

View file

@ -735,10 +735,10 @@ class Llama:
try: try:
cache_item = self.cache[prompt_tokens] cache_item = self.cache[prompt_tokens]
cache_prefix_len = Llama.longest_token_prefix( cache_prefix_len = Llama.longest_token_prefix(
cache_item.eval_tokens, prompt_tokens cache_item.input_ids.tolist(), prompt_tokens
) )
eval_prefix_len = Llama.longest_token_prefix( eval_prefix_len = Llama.longest_token_prefix(
self.eval_tokens, prompt_tokens self._input_ids.tolist(), prompt_tokens
) )
if cache_prefix_len > eval_prefix_len: if cache_prefix_len > eval_prefix_len:
self.load_state(cache_item) self.load_state(cache_item)