Use n_ctx provided from actual context not params
This commit is contained in:
parent
2cc499512c
commit
b9c53b88a1
1 changed files with 3 additions and 3 deletions
|
@ -60,12 +60,12 @@ class Llama:
|
||||||
stop = [s.encode("utf-8") for s in stop]
|
stop = [s.encode("utf-8") for s in stop]
|
||||||
|
|
||||||
prompt_tokens = llama_cpp.llama_tokenize(
|
prompt_tokens = llama_cpp.llama_tokenize(
|
||||||
self.ctx, prompt.encode("utf-8"), self.tokens, self.params.n_ctx, True
|
self.ctx, prompt.encode("utf-8"), self.tokens, llama_cpp.llama_n_ctx(self.ctx), True
|
||||||
)
|
)
|
||||||
|
|
||||||
if prompt_tokens + max_tokens > llama_cpp.llama_n_ctx(self.ctx):
|
if prompt_tokens + max_tokens > self.params.n_ctx:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Requested tokens exceed context window of {self.params.n_ctx}"
|
f"Requested tokens exceed context window of {llama_cpp.llama_n_ctx(self.ctx)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Process prompt in chunks to avoid running out of memory
|
# Process prompt in chunks to avoid running out of memory
|
||||||
|
|
Loading…
Reference in a new issue