Implement openai api compatible authentication (#1010)

2023-12-21 19:44:49 +01:00 · 2023-12-21 19:44:49 +01:00 · 33cc623346
commit 33cc623346
parent 788394c096
1 changed files with 36 additions and 2 deletions
--- a/llama_cpp/server/app.py
+++ b/llama_cpp/server/app.py
@ -14,11 +14,12 @@ import llama_cpp
 import anyio
 from anyio.streams.memory import MemoryObjectSendStream
 from starlette.concurrency import run_in_threadpool, iterate_in_threadpool
-from fastapi import Depends, FastAPI, APIRouter, Request, Response
+from fastapi import Depends, FastAPI, APIRouter, Request, Response, HTTPException, status
 from fastapi.middleware import Middleware
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
 from fastapi.routing import APIRoute
+from fastapi.security import HTTPBearer
 from pydantic import BaseModel, Field
 from pydantic_settings import BaseSettings
 from sse_starlette.sse import EventSourceResponse
@ -163,6 +164,10 @@ class Settings(BaseSettings):
        default=True,
        description="Whether to interrupt requests when a new request is received.",
    )
+    api_key: Optional[str] = Field(
+        default=None,
+        description="API key for authentication. If set all requests need to be authenticated."
+    )


 class ErrorResponse(TypedDict):
@ -314,6 +319,9 @@ class RouteErrorHandler(APIRoute):
                elapsed_time_ms = int((time.perf_counter() - start_sec) * 1000)
                response.headers["openai-processing-ms"] = f"{elapsed_time_ms}"
                return response
+            except HTTPException as unauthorized:
+                # api key check failed
+                raise unauthorized
            except Exception as exc:
                json_body = await request.json()
                try:
@ -658,6 +666,27 @@ def _logit_bias_tokens_to_input_ids(
    return to_bias


+# Setup Bearer authentication scheme
+bearer_scheme = HTTPBearer(auto_error=False)
+
+
+async def authenticate(settings: Settings = Depends(get_settings), authorization: Optional[str] = Depends(bearer_scheme)):
+    # Skip API key check if it's not set in settings
+    if settings.api_key is None:
+        return True
+
+    # check bearer credentials against the api_key
+    if authorization and authorization.credentials == settings.api_key:
+        # api key is valid
+        return authorization.credentials
+
+    # raise http error 401
+    raise HTTPException(
+        status_code=status.HTTP_401_UNAUTHORIZED,
+        detail="Invalid API key",
+    )
+
+
@router.post(
    "/v1/completions",
    summary="Completion"
@ -667,6 +696,7 @@ async def create_completion(
    request: Request,
    body: CreateCompletionRequest,
    llama: llama_cpp.Llama = Depends(get_llama),
+    authenticated: str = Depends(authenticate),
 ) -> llama_cpp.Completion:
    if isinstance(body.prompt, list):
        assert len(body.prompt) <= 1
@ -740,7 +770,9 @@ class CreateEmbeddingRequest(BaseModel):
    summary="Embedding"
 )
 async def create_embedding(
-    request: CreateEmbeddingRequest, llama: llama_cpp.Llama = Depends(get_llama)
+    request: CreateEmbeddingRequest,
+    llama: llama_cpp.Llama = Depends(get_llama),
+    authenticated: str = Depends(authenticate),
 ):
    return await run_in_threadpool(
        llama.create_embedding, **request.model_dump(exclude={"user"})
@ -834,6 +866,7 @@ async def create_chat_completion(
    body: CreateChatCompletionRequest,
    llama: llama_cpp.Llama = Depends(get_llama),
    settings: Settings = Depends(get_settings),
+    authenticated: str = Depends(authenticate),
 ) -> llama_cpp.ChatCompletion:
    exclude = {
        "n",
@ -895,6 +928,7 @@ class ModelList(TypedDict):
@router.get("/v1/models", summary="Models")
 async def get_models(
    settings: Settings = Depends(get_settings),
+    authenticated: str = Depends(authenticate),
 ) -> ModelList:
    assert llama is not None
    return {