diff --git a/llama_cpp/server/__main__.py b/llama_cpp/server/__main__.py index 44ee1f0..80cbe01 100644 --- a/llama_cpp/server/__main__.py +++ b/llama_cpp/server/__main__.py @@ -196,7 +196,7 @@ CreateChatCompletionResponse = create_model_from_typeddict(llama_cpp.ChatComplet "/v1/chat/completions", response_model=CreateChatCompletionResponse, ) -async def create_chat_completion( +def create_chat_completion( request: CreateChatCompletionRequest, ) -> Union[llama_cpp.ChatCompletion, EventSourceResponse]: completion_or_chunks = llama.create_chat_completion(