From 21acd7901fd43e8c3782f49851b418048d74deca Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Sat, 10 Jun 2023 12:22:31 -0400
Subject: [PATCH] Re-enable cache

---
 llama_cpp/llama.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
index 02fe774..4b6ce8c 100644
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@@ -831,9 +831,7 @@ class Llama:
                 "logprobs is not supported for models created with logits_all=False"
             )
 
-        # Temporarily disable usage of the cache
-        # See: https://github.com/abetlen/llama-cpp-python/issues/348#issuecomment-1583072408
-        if self.cache and False:
+        if self.cache:
             try:
                 cache_item = self.cache[prompt_tokens]
                 cache_prefix_len = Llama.longest_token_prefix(
@@ -1071,14 +1069,14 @@ class Llama:
                         }
                     ],
                 }
-            if self.cache and False:
+            if self.cache:
                 if self.verbose:
                     print("Llama._create_completion: cache save", file=sys.stderr)
                 self.cache[prompt_tokens + completion_tokens] = self.save_state()
                 print("Llama._create_completion: cache saved", file=sys.stderr)
             return
 
-        if self.cache and False:
+        if self.cache:
             if self.verbose:
                 print("Llama._create_completion: cache save", file=sys.stderr)
             self.cache[prompt_tokens + completion_tokens] = self.save_state()