From c14201dc0f7c544232e5f894726f8ca4d92424f7 Mon Sep 17 00:00:00 2001
From: Niek van der Maas <mail@niekvandermaas.nl>
Date: Wed, 12 Apr 2023 11:53:39 +0200
Subject: [PATCH] Add Dockerfile + build workflow

---
 .github/workflows/publish.yaml | 31 ++++++++++++++++++++++++++++++-
 Dockerfile                     | 10 ++++++++++
 README.md                      |  8 ++++++++
 3 files changed, 48 insertions(+), 1 deletion(-)
 create mode 100644 Dockerfile

diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml
index 92b6e5b..16a6012 100644
--- a/.github/workflows/publish.yaml
+++ b/.github/workflows/publish.yaml
@@ -28,4 +28,33 @@ jobs:
       # if: startsWith(github.ref, 'refs/tags')
       uses: pypa/gh-action-pypi-publish@release/v1
       with:
-        password: ${{ secrets.PYPI_API_TOKEN }}
\ No newline at end of file
+        password: ${{ secrets.PYPI_API_TOKEN }}
+
+  docker:
+    name: Build and push Docker image
+    runs-on: ubuntu-latest
+    needs: build-n-publish
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v2
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v2 
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and push
+        uses: docker/build-push-action@v4
+        with:
+          push: true # push to registry
+          pull: true # always fetch the latest base images
+          platforms: linux/amd64,linux/arm64 # build for both amd64 and arm64
+          tags: ghcr.io/abetlen/llama-cpp-python:latest
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..b500a0b
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,10 @@
+FROM python:3-buster
+
+# We need to set the host to 0.0.0.0 to allow outside access
+ENV HOST 0.0.0.0
+
+# Install the package
+RUN pip install llama-cpp-python[server]
+
+# Run the server
+CMD python3 -m llama_cpp.server
\ No newline at end of file
diff --git a/README.md b/README.md
index 2c8c0a5..81ad723 100644
--- a/README.md
+++ b/README.md
@@ -66,6 +66,14 @@ python3 -m llama_cpp.server
 
 Navigate to [http://localhost:8000/docs](http://localhost:8000/docs) to see the OpenAPI documentation.
 
+## Docker image
+
+A Docker image is available on [GHCR](https://ghcr.io/abetlen/llama-cpp-python). To run the server:
+
+```bash
+docker run --rm -it -p8000:8000 -v /path/to/models:/models -eMODEL=/models/ggml-vicuna-7b-4bit.bin ghcr.io/abetlen/llama-cpp-python:latest
+```
+
 ## Low-level API
 
 The low-level API is a direct `ctypes` binding to the C API provided by `llama.cpp`.