From b9c53b88a1c2732d4d8a6f81057dcfd61585b843 Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Fri, 24 Mar 2023 14:58:10 -0400
Subject: [PATCH] Use n_ctx provided from actual context not params

---
 llama_cpp/llama.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
index 7d67646..414a987 100644
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@@ -60,12 +60,12 @@ class Llama:
             stop = [s.encode("utf-8") for s in stop]
 
         prompt_tokens = llama_cpp.llama_tokenize(
-            self.ctx, prompt.encode("utf-8"), self.tokens, self.params.n_ctx, True
+            self.ctx, prompt.encode("utf-8"), self.tokens, llama_cpp.llama_n_ctx(self.ctx), True
         )
 
-        if prompt_tokens + max_tokens > llama_cpp.llama_n_ctx(self.ctx):
+        if prompt_tokens + max_tokens > self.params.n_ctx:
             raise ValueError(
-                f"Requested tokens exceed context window of {self.params.n_ctx}"
+                f"Requested tokens exceed context window of {llama_cpp.llama_n_ctx(self.ctx)}"
             )
 
         # Process prompt in chunks to avoid running out of memory