diff --git a/llama_cpp/_internals.py b/llama_cpp/_internals.py index d7409f6..b404601 100644 --- a/llama_cpp/_internals.py +++ b/llama_cpp/_internals.py @@ -203,7 +203,7 @@ class _LlamaModel: # NOTE: Llama1 models automatically added a space at the start of the prompt # this line removes a leading space if the first token is a beginning of sentence token return ( - output[1:] if len(tokens) > 0 and tokens[0] == self.token_bos() else output + output[1:] if len(tokens) > 0 and tokens[0] == self.token_bos() and output[0:1] == b' ' else output ) # Extra @@ -812,4 +812,4 @@ class _LlamaSamplingContext: def accept(self, ctx_main: _LlamaContext, id: int, apply_grammar: bool): if apply_grammar and self.grammar is not None: ctx_main.grammar_accept_token(self.grammar, id) - self.prev.append(id) \ No newline at end of file + self.prev.append(id)