Remove usage of eval_tokens for cache check
This commit is contained in:
parent
fe331ec589
commit
7fc7bc30e7
1 changed files with 2 additions and 2 deletions
|
@ -735,10 +735,10 @@ class Llama:
|
||||||
try:
|
try:
|
||||||
cache_item = self.cache[prompt_tokens]
|
cache_item = self.cache[prompt_tokens]
|
||||||
cache_prefix_len = Llama.longest_token_prefix(
|
cache_prefix_len = Llama.longest_token_prefix(
|
||||||
cache_item.eval_tokens, prompt_tokens
|
cache_item.input_ids.tolist(), prompt_tokens
|
||||||
)
|
)
|
||||||
eval_prefix_len = Llama.longest_token_prefix(
|
eval_prefix_len = Llama.longest_token_prefix(
|
||||||
self.eval_tokens, prompt_tokens
|
self._input_ids.tolist(), prompt_tokens
|
||||||
)
|
)
|
||||||
if cache_prefix_len > eval_prefix_len:
|
if cache_prefix_len > eval_prefix_len:
|
||||||
self.load_state(cache_item)
|
self.load_state(cache_item)
|
||||||
|
|
Loading…
Reference in a new issue