From b9c53b88a1c2732d4d8a6f81057dcfd61585b843 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 24 Mar 2023 14:58:10 -0400 Subject: [PATCH] Use n_ctx provided from actual context not params --- llama_cpp/llama.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index 7d67646..414a987 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -60,12 +60,12 @@ class Llama: stop = [s.encode("utf-8") for s in stop] prompt_tokens = llama_cpp.llama_tokenize( - self.ctx, prompt.encode("utf-8"), self.tokens, self.params.n_ctx, True + self.ctx, prompt.encode("utf-8"), self.tokens, llama_cpp.llama_n_ctx(self.ctx), True ) - if prompt_tokens + max_tokens > llama_cpp.llama_n_ctx(self.ctx): + if prompt_tokens + max_tokens > self.params.n_ctx: raise ValueError( - f"Requested tokens exceed context window of {self.params.n_ctx}" + f"Requested tokens exceed context window of {llama_cpp.llama_n_ctx(self.ctx)}" ) # Process prompt in chunks to avoid running out of memory