Add Dockerfile + build workflow

2023-04-12 11:53:39 +02:00 · 2023-04-12 11:53:39 +02:00 · c14201dc0f
commit c14201dc0f
parent 9f1e565594
3 changed files with 48 additions and 1 deletions
--- a/.github/workflows/publish.yaml
+++ b/.github/workflows/publish.yaml
@ -28,4 +28,33 @@ jobs:
      # if: startsWith(github.ref, 'refs/tags')
      uses: pypa/gh-action-pypi-publish@release/v1
      with:
-        password: ${{ secrets.PYPI_API_TOKEN }}
+        password: ${{ secrets.PYPI_API_TOKEN }}
  docker:
    name: Build and push Docker image
    runs-on: ubuntu-latest
    needs: build-n-publish
    steps:
      - name: Checkout
        uses: actions/checkout@v3
      - name: Set up QEMU
        uses: docker/setup-qemu-action@v2
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v2
      - name: Login to GitHub Container Registry
        uses: docker/login-action@v2 
        with:
          registry: ghcr.io
          username: ${{ github.repository_owner }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Build and push
        uses: docker/build-push-action@v4
        with:
          push: true # push to registry
          pull: true # always fetch the latest base images
          platforms: linux/amd64,linux/arm64 # build for both amd64 and arm64
          tags: ghcr.io/abetlen/llama-cpp-python:latest
--- a/10
+++ b/10
@ -0,0 +1,10 @@
 FROM python:3-buster
 # We need to set the host to 0.0.0.0 to allow outside access
 ENV HOST 0.0.0.0
 # Install the package
 RUN pip install llama-cpp-python[server]
 # Run the server
 CMD python3 -m llama_cpp.server
--- a/README.md
+++ b/README.md
@ -66,6 +66,14 @@ python3 -m llama_cpp.server
 Navigate to [http://localhost:8000/docs](http://localhost:8000/docs) to see the OpenAPI documentation.
 ## Docker image
 A Docker image is available on [GHCR](https://ghcr.io/abetlen/llama-cpp-python). To run the server:
 ```bash
 docker run --rm -it -p8000:8000 -v /path/to/models:/models -eMODEL=/models/ggml-vicuna-7b-4bit.bin ghcr.io/abetlen/llama-cpp-python:latest
 ```
 ## Low-level API
 The low-level API is a direct `ctypes` binding to the C API provided by `llama.cpp`.