bugfix: truncate completion max_tokens to fit context length by default
This commit is contained in:
parent
6f70cc4b7d
commit
a86bfdf0a5
1 changed files with 6 additions and 10 deletions
|
@ -824,18 +824,14 @@ class Llama:
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
llama_cpp.llama_reset_timings(self.ctx)
|
llama_cpp.llama_reset_timings(self.ctx)
|
||||||
|
|
||||||
|
if len(prompt_tokens) >= llama_cpp.llama_n_ctx(self.ctx):
|
||||||
|
raise ValueError(
|
||||||
|
f"Requested tokens exceed context window of {llama_cpp.llama_n_ctx(self.ctx)}"
|
||||||
|
)
|
||||||
|
|
||||||
if max_tokens <= 0:
|
if max_tokens <= 0:
|
||||||
# Unlimited, depending on n_ctx.
|
# Unlimited, depending on n_ctx.
|
||||||
if len(prompt_tokens) >= int(llama_cpp.llama_n_ctx(self.ctx)):
|
max_tokens = llama_cpp.llama_n_ctx(self.ctx) - len(prompt_tokens)
|
||||||
raise ValueError(
|
|
||||||
f"Requested tokens exceed context window of {llama_cpp.llama_n_ctx(self.ctx)}"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
max_tokens = int(llama_cpp.llama_n_ctx(self.ctx)) - len(prompt_tokens)
|
|
||||||
elif len(prompt_tokens) + max_tokens > int(llama_cpp.llama_n_ctx(self.ctx)):
|
|
||||||
raise ValueError(
|
|
||||||
f"Requested tokens ({len(prompt_tokens)}) exceed context window of {self._n_ctx}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Truncate max_tokens if requested tokens would exceed the context window
|
# Truncate max_tokens if requested tokens would exceed the context window
|
||||||
max_tokens = (
|
max_tokens = (
|
||||||
|
|
Loading…
Reference in a new issue