From dc39cc0fa410f8b46954ad507b705052947da6bc Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Fri, 19 May 2023 02:04:30 -0400
Subject: [PATCH] Use server sent events function for streaming completion

---
 llama_cpp/server/app.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py
index e8f62e8..3f95bdd 100644
--- a/llama_cpp/server/app.py
+++ b/llama_cpp/server/app.py
@@ -259,8 +259,15 @@ def create_completion(
         )
     )
     if request.stream:
+
+        async def server_sent_events(
+            chunks: Iterator[llama_cpp.CompletionChunk],
+        ):
+            for chunk in chunks:
+                yield dict(data=json.dumps(chunk))
+
         chunks: Iterator[llama_cpp.CompletionChunk] = completion_or_chunks  # type: ignore
-        return EventSourceResponse(dict(data=json.dumps(chunk)) for chunk in chunks)
+        return EventSourceResponse(server_sent_events(chunks))
     completion: llama_cpp.Completion = completion_or_chunks  # type: ignore
     return completion