diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index 5c2f608..a2dad56 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -651,8 +651,9 @@ def _logit_bias_tokens_to_input_ids( @router.post( "/v1/completions", + summary="Completion" ) -@router.post("/v1/engines/copilot-codex/completions") +@router.post("/v1/engines/copilot-codex/completions", include_in_schema=False) async def create_completion( request: Request, body: CreateCompletionRequest, @@ -727,6 +728,7 @@ class CreateEmbeddingRequest(BaseModel): @router.post( "/v1/embeddings", + summary="Embedding" ) async def create_embedding( request: CreateEmbeddingRequest, llama: llama_cpp.Llama = Depends(get_llama) @@ -816,6 +818,7 @@ class CreateChatCompletionRequest(BaseModel): @router.post( "/v1/chat/completions", + summary="Chat" ) async def create_chat_completion( request: Request, @@ -880,7 +883,7 @@ class ModelList(TypedDict): data: List[ModelData] -@router.get("/v1/models") +@router.get("/v1/models", summary="Models") async def get_models( settings: Settings = Depends(get_settings), ) -> ModelList: