feat: add disable_ping_events flag (#1257)

for backward compatibility, this is false by default it can be set to true to disable EventSource pings which are not supported by some OpenAI clients. fixes https://github.com/abetlen/llama-cpp-python/issues/1256
2024-04-17 14:08:19 +00:00 · 2024-04-17 14:08:19 +00:00 · b73c73c0c6
commit b73c73c0c6
parent 4924455dec
2 changed files with 16 additions and 0 deletions
--- a/llama_cpp/server/app.py
+++ b/llama_cpp/server/app.py
@ -87,6 +87,13 @@ def get_llama_proxy():
            llama_outer_lock.release()


+_ping_message_factory = None
+
+def set_ping_message_factory(factory):
+   global  _ping_message_factory
+   _ping_message_factory = factory
+
+
 def create_app(
    settings: Settings | None = None,
    server_settings: ServerSettings | None = None,
@ -138,6 +145,9 @@ def create_app(
    assert model_settings is not None
    set_llama_proxy(model_settings=model_settings)

+    if server_settings.disable_ping_events:
+        set_ping_message_factory(lambda: bytes())
+
    return app


@ -302,6 +312,7 @@ async def create_completion(
                iterator=iterator(),
            ),
            sep="\n",
+            ping_message_factory=_ping_message_factory,
        )
    else:
        return iterator_or_completion
@ -470,6 +481,7 @@ async def create_chat_completion(
                iterator=iterator(),
            ),
            sep="\n",
+            ping_message_factory=_ping_message_factory,
        )
    else:
        return iterator_or_completion
--- a/llama_cpp/server/settings.py
+++ b/llama_cpp/server/settings.py
@ -195,6 +195,10 @@ class ServerSettings(BaseSettings):
        default=True,
        description="Whether to interrupt requests when a new request is received.",
    )
+    disable_ping_events: bool = Field(
+        default=False,
+        description="Disable EventSource pings (may be needed for some clients).",
+    )


 class Settings(ServerSettings, ModelSettings):