diff --git a/llm/ext_server/server.cpp b/llm/ext_server/server.cpp
index 61ef20dc..52ac653f 100644
--- a/llm/ext_server/server.cpp
+++ b/llm/ext_server/server.cpp
@@ -1032,7 +1032,7 @@ struct llama_server_context
             slot.has_next_token = false;
         }
 
-        if (!slot.cache_tokens.empty() && result.tok == llama_token_eos(model))
+        if (llama_token_is_eog(model, result.tok))
         {
             slot.stopped_eos = true;
             slot.has_next_token = false;
@@ -1144,12 +1144,15 @@ struct llama_server_context
 
         res.result_json = json
         {
-            {"content",    tkn.text_to_send},
             {"stop",       false},
             {"slot_id",    slot.id},
             {"multimodal", multimodal}
         };
 
+        if (!llama_token_is_eog(model, tkn.tok)) {
+            res.result_json["content"] = tkn.text_to_send;
+        }
+
         if (slot.sparams.n_probs > 0)
         {
             std::vector<completion_token_output> probs_output = {};
diff --git a/llm/llama.cpp b/llm/llama.cpp
index f4ab2a41..f364eb6f 160000
--- a/llm/llama.cpp
+++ b/llm/llama.cpp
@@ -1 +1 @@
-Subproject commit f4ab2a41476600a98067a9474ea8f9e6db41bcfa
+Subproject commit f364eb6fb5d46118a76fa045f487318de4c24961