ARG CUDA_IMAGE="12.1.1-devel-ubuntu22.04" FROM nvidia/cuda:${CUDA_IMAGE} # We need to set the host to 0.0.0.0 to allow outside access ENV HOST 0.0.0.0 COPY . . # Install the package RUN apt update && apt install -y python3 python3-pip RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings RUN LLAMA_CUBLAS=1 pip install llama-cpp-python # Run the server CMD python3 -m llama_cpp.server