From b73c73c0c67f559fd9c7d620ad3d4e24d5c4bc4c Mon Sep 17 00:00:00 2001 From: khimaros Date: Wed, 17 Apr 2024 14:08:19 +0000 Subject: [PATCH] feat: add `disable_ping_events` flag (#1257) for backward compatibility, this is false by default it can be set to true to disable EventSource pings which are not supported by some OpenAI clients. fixes https://github.com/abetlen/llama-cpp-python/issues/1256 --- llama_cpp/server/app.py | 12 ++++++++++++ llama_cpp/server/settings.py | 4 ++++ 2 files changed, 16 insertions(+) diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index 8211323..b6ed9b1 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -87,6 +87,13 @@ def get_llama_proxy(): llama_outer_lock.release() +_ping_message_factory = None + +def set_ping_message_factory(factory): + global _ping_message_factory + _ping_message_factory = factory + + def create_app( settings: Settings | None = None, server_settings: ServerSettings | None = None, @@ -138,6 +145,9 @@ def create_app( assert model_settings is not None set_llama_proxy(model_settings=model_settings) + if server_settings.disable_ping_events: + set_ping_message_factory(lambda: bytes()) + return app @@ -302,6 +312,7 @@ async def create_completion( iterator=iterator(), ), sep="\n", + ping_message_factory=_ping_message_factory, ) else: return iterator_or_completion @@ -470,6 +481,7 @@ async def create_chat_completion( iterator=iterator(), ), sep="\n", + ping_message_factory=_ping_message_factory, ) else: return iterator_or_completion diff --git a/llama_cpp/server/settings.py b/llama_cpp/server/settings.py index 811c6ca..934aecd 100644 --- a/llama_cpp/server/settings.py +++ b/llama_cpp/server/settings.py @@ -195,6 +195,10 @@ class ServerSettings(BaseSettings): default=True, description="Whether to interrupt requests when a new request is received.", ) + disable_ping_events: bool = Field( + default=False, + description="Disable EventSource pings (may be needed for some clients).", + ) class Settings(ServerSettings, ModelSettings):