From 8f35bddd7eab82b2609c61d76e74a4509b57c26b Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Fri, 26 May 2023 20:23:49 -0400
Subject: [PATCH] Fix stop sequence performance bug.

---
 CHANGELOG.md       |  6 +++++-
 llama_cpp/llama.py | 10 ++++++----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6eb04cd..8b5fbec 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,4 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
-- Added first version of the changelog
\ No newline at end of file
+- Added first version of the changelog
+
+### Fixed
+
+- Performance bug in stop sequence check slowing down streaming.
\ No newline at end of file
diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
index 012bb86..d7dc625 100644
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@@ -775,20 +775,22 @@ class Llama:
                 break
 
             if stream:
+                remaining_tokens = completion_tokens[returned_tokens:]
+                remaining_text = self.detokenize(remaining_tokens)
+                remaining_length = len(remaining_text)
+
                 # We want to avoid yielding any characters from
                 # the generated text if they are part of a stop
                 # sequence.
                 first_stop_position = 0
                 for s in stop_sequences:
-                    for i in range(len(s), 0, -1):
-                        if all_text.endswith(s[:i]):
+                    for i in range(min(len(s), remaining_length), 0, -1):
+                        if remaining_text.endswith(s[:i]):
                             if i > first_stop_position:
                                 first_stop_position = i
                             break
 
                 token_end_position = 0
-                remaining_tokens = completion_tokens[returned_tokens:]
-                remaining_length = len(self.detokenize(remaining_tokens))
                 for token in remaining_tokens:
                     token_end_position += len(self.detokenize([token]))
                     # Check if stop sequence is in the token