diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 56524e0..a73e347 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -26,7 +26,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi sse-starlette httpx uvicorn + python -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi sse-starlette httpx uvicorn pydantic-settings pip install . -v - name: Test with pytest run: | @@ -49,7 +49,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi sse-starlette httpx uvicorn + python -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi sse-starlette httpx uvicorn pydantic-settings pip install . -v - name: Test with pytest run: | @@ -72,7 +72,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi sse-starlette httpx uvicorn + python -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi sse-starlette httpx uvicorn pydantic-settings pip install . -v - name: Test with pytest run: | diff --git a/docker/cuda_simple/Dockerfile b/docker/cuda_simple/Dockerfile index 24906d5..e4a2f07 100644 --- a/docker/cuda_simple/Dockerfile +++ b/docker/cuda_simple/Dockerfile @@ -8,7 +8,7 @@ COPY . . # Install the package RUN apt update && apt install -y python3 python3-pip -RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette +RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings RUN LLAMA_CUBLAS=1 pip install llama-cpp-python diff --git a/docker/open_llama/Dockerfile b/docker/open_llama/Dockerfile index f0ef5f7..7788f33 100644 --- a/docker/open_llama/Dockerfile +++ b/docker/open_llama/Dockerfile @@ -14,7 +14,7 @@ RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-reco ninja-build \ build-essential -RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette +RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings # Perform the conditional installations based on the image RUN echo "Image: ${IMAGE}" && \ diff --git a/docker/openblas_simple/Dockerfile b/docker/openblas_simple/Dockerfile index 1a95cae..8231bdb 100644 --- a/docker/openblas_simple/Dockerfile +++ b/docker/openblas_simple/Dockerfile @@ -7,7 +7,7 @@ COPY . . # Install the package RUN apt update && apt install -y libopenblas-dev ninja-build build-essential -RUN python -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette +RUN python -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings RUN LLAMA_OPENBLAS=1 pip install llama_cpp_python --verbose diff --git a/docker/simple/Dockerfile b/docker/simple/Dockerfile index ad36b98..77680c8 100644 --- a/docker/simple/Dockerfile +++ b/docker/simple/Dockerfile @@ -18,7 +18,7 @@ RUN mkdir /app WORKDIR /app COPY . /app -RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette +RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings RUN make build && make clean diff --git a/llama_cpp/server/__main__.py b/llama_cpp/server/__main__.py index 748a2af..2110db3 100644 --- a/llama_cpp/server/__main__.py +++ b/llama_cpp/server/__main__.py @@ -3,7 +3,7 @@ To run this example: ```bash -pip install fastapi uvicorn sse-starlette +pip install fastapi uvicorn sse-starlette pydantic-settings export MODEL=../models/7B/... ``` @@ -30,7 +30,7 @@ from llama_cpp.server.app import create_app, Settings if __name__ == "__main__": parser = argparse.ArgumentParser() - for name, field in Settings.__fields__.items(): + for name, field in Settings.__model_fields__.items(): description = field.field_info.description if field.default is not None and description is not None: description += f" (default: {field.default})" diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index 5d47160..ffd07fa 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -12,7 +12,8 @@ from anyio.streams.memory import MemoryObjectSendStream from starlette.concurrency import run_in_threadpool, iterate_in_threadpool from fastapi import Depends, FastAPI, APIRouter, Request from fastapi.middleware.cors import CORSMiddleware -from pydantic import BaseModel, BaseSettings, Field, create_model_from_typeddict +from pydantic import BaseModel, Field +from pydantic_settings import BaseSettings from sse_starlette.sse import EventSourceResponse @@ -309,7 +310,6 @@ class CreateCompletionRequest(BaseModel): } -CreateCompletionResponse = create_model_from_typeddict(llama_cpp.Completion) def make_logit_bias_processor( @@ -347,7 +347,6 @@ def make_logit_bias_processor( @router.post( "/v1/completions", - response_model=CreateCompletionResponse, ) async def create_completion( request: Request, @@ -416,12 +415,10 @@ class CreateEmbeddingRequest(BaseModel): } -CreateEmbeddingResponse = create_model_from_typeddict(llama_cpp.Embedding) @router.post( "/v1/embeddings", - response_model=CreateEmbeddingResponse, ) async def create_embedding( request: CreateEmbeddingRequest, llama: llama_cpp.Llama = Depends(get_llama) @@ -479,19 +476,17 @@ class CreateChatCompletionRequest(BaseModel): } -CreateChatCompletionResponse = create_model_from_typeddict(llama_cpp.ChatCompletion) @router.post( "/v1/chat/completions", - response_model=CreateChatCompletionResponse, ) async def create_chat_completion( request: Request, body: CreateChatCompletionRequest, llama: llama_cpp.Llama = Depends(get_llama), settings: Settings = Depends(get_settings), -) -> Union[llama_cpp.ChatCompletion, EventSourceResponse]: +) -> Union[llama_cpp.ChatCompletion]: # type: ignore exclude = { "n", "logit_bias", @@ -551,10 +546,9 @@ class ModelList(TypedDict): data: List[ModelData] -GetModelResponse = create_model_from_typeddict(ModelList) -@router.get("/v1/models", response_model=GetModelResponse) +@router.get("/v1/models") async def get_models( settings: Settings = Depends(get_settings), ) -> ModelList: diff --git a/pyproject.toml b/pyproject.toml index b3ad3b4..841a868 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ httpx = "^0.24.1" scikit-build = "0.17.6" [tool.poetry.extras] -server = ["uvicorn", "fastapi", "sse-starlette"] +server = ["uvicorn>=0.22.0", "fastapi>=0.100.0", "pydantic-settings>=2.0.1", "sse-starlette>=1.6.1"] [build-system] requires = [ diff --git a/setup.py b/setup.py index 32101eb..1d7ecbc 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ setup( packages=["llama_cpp", "llama_cpp.server"], install_requires=["typing-extensions>=4.5.0", "numpy>=1.20.0", "diskcache>=5.6.1"], extras_require={ - "server": ["uvicorn>=0.21.1", "fastapi>=0.95.0", "sse-starlette>=1.3.3"], + "server": ["uvicorn>=0.22.1", "fastapi>=0.100.0", "pydantic-settings>=2.0.1", "sse-starlette>=1.6.1"], }, python_requires=">=3.7", classifiers=[