diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
index e6d150d..b57a41e 100644
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@@ -50,10 +50,13 @@ class Llama:
         repeat_penalty: float = 1.1,
         top_k: int = 40,
     ):
-        text = ""
+        text = b""
         finish_reason = "length"
         completion_tokens = 0
 
+        if stop is not None:
+            stop = [s.encode("utf-8") for s in stop]
+
         prompt_tokens = llama_cpp.llama_tokenize(
             self.ctx, prompt.encode("utf-8"), self.tokens, self.params.n_ctx, True
         )
@@ -81,7 +84,8 @@ class Llama:
             if token == llama_cpp.llama_token_eos():
                 finish_reason = "stop"
                 break
-            text += llama_cpp.llama_token_to_str(self.ctx, token).decode("utf-8")
+            # text += llama_cpp.llama_token_to_str(self.ctx, token).decode("utf-8")
+            text += llama_cpp.llama_token_to_str(self.ctx, token)
             self.tokens[prompt_tokens + i] = token
             completion_tokens += 1
 
@@ -100,6 +104,8 @@ class Llama:
                 self.n_threads,
             )
 
+        text = text.decode("utf-8")
+
         if echo:
             text = prompt + text
 
@@ -111,6 +117,7 @@ class Llama:
                 self.ctx,
             )[:logprobs]
 
+
         return {
             "id": f"cmpl-{str(uuid.uuid4())}", # Likely to change
             "object": "text_completion",