diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 92b6e5b..16a6012 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -28,4 +28,33 @@ jobs: # if: startsWith(github.ref, 'refs/tags') uses: pypa/gh-action-pypi-publish@release/v1 with: - password: ${{ secrets.PYPI_API_TOKEN }} \ No newline at end of file + password: ${{ secrets.PYPI_API_TOKEN }} + + docker: + name: Build and push Docker image + runs-on: ubuntu-latest + needs: build-n-publish + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Login to GitHub Container Registry + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push + uses: docker/build-push-action@v4 + with: + push: true # push to registry + pull: true # always fetch the latest base images + platforms: linux/amd64,linux/arm64 # build for both amd64 and arm64 + tags: ghcr.io/abetlen/llama-cpp-python:latest \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..b500a0b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3-buster + +# We need to set the host to 0.0.0.0 to allow outside access +ENV HOST 0.0.0.0 + +# Install the package +RUN pip install llama-cpp-python[server] + +# Run the server +CMD python3 -m llama_cpp.server \ No newline at end of file diff --git a/README.md b/README.md index 2c8c0a5..81ad723 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,14 @@ python3 -m llama_cpp.server Navigate to [http://localhost:8000/docs](http://localhost:8000/docs) to see the OpenAPI documentation. +## Docker image + +A Docker image is available on [GHCR](https://ghcr.io/abetlen/llama-cpp-python). To run the server: + +```bash +docker run --rm -it -p8000:8000 -v /path/to/models:/models -eMODEL=/models/ggml-vicuna-7b-4bit.bin ghcr.io/abetlen/llama-cpp-python:latest +``` + ## Low-level API The low-level API is a direct `ctypes` binding to the C API provided by `llama.cpp`.