Truncate max_tokens if it exceeds context length

2023-06-09 10:57:36 -04:00 · 2023-06-09 10:57:36 -04:00 · 556c7edf47
commit 556c7edf47
parent fb2c5f7fd9
1 changed files with 8 additions and 1 deletions
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@ -811,9 +811,16 @@ class Llama:
        if self.verbose:
            llama_cpp.llama_reset_timings(self.ctx)
-        if len(prompt_tokens) + max_tokens > self._n_ctx:
+        if len(prompt_tokens) > self._n_ctx:
            raise ValueError(f"Requested tokens exceed context window of {self._n_ctx}")
        # Truncate max_tokens if requested tokens would exceed the context window
        max_tokens = (
            max_tokens
            if max_tokens + len(prompt_tokens) < self._n_ctx
            else (self._n_ctx - len(prompt_tokens))
        )
        if stop != []:
            stop_sequences = [s.encode("utf-8") for s in stop]
        else: