From 7fc7bc30e712c10d633a7acf912134ae92c0fbe3 Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Fri, 26 May 2023 20:12:05 -0400
Subject: [PATCH] Remove usage of eval_tokens for cache check

---
 llama_cpp/llama.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
index 4f10227..064b982 100644
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@@ -735,10 +735,10 @@ class Llama:
             try:
                 cache_item = self.cache[prompt_tokens]
                 cache_prefix_len = Llama.longest_token_prefix(
-                    cache_item.eval_tokens, prompt_tokens
+                    cache_item.input_ids.tolist(), prompt_tokens
                 )
                 eval_prefix_len = Llama.longest_token_prefix(
-                    self.eval_tokens, prompt_tokens
+                    self._input_ids.tolist(), prompt_tokens
                 )
                 if cache_prefix_len > eval_prefix_len:
                     self.load_state(cache_item)