diff --git a/examples/low_level_api/low_level_api_chat_cpp.py b/examples/low_level_api/low_level_api_chat_cpp.py index 4a7cfc1..c383bf6 100644 --- a/examples/low_level_api/low_level_api_chat_cpp.py +++ b/examples/low_level_api/low_level_api_chat_cpp.py @@ -356,10 +356,7 @@ n_keep = {self.params.n_keep} def output(self): self.remaining_tokens = self.params.n_predict for id in self.generate(): - try: - yield llama_cpp.llama_token_to_str(self.ctx, id).decode("utf-8", errors="ignore") - except UnicodeDecodeError: - pass + yield llama_cpp.llama_token_to_str(self.ctx, id).decode("utf-8", errors="ignore") # read user input def read_input(self):