ollama/Dockerfile
Daniel Hiltgen cd5c8f6471
Optimize container images for startup (#6547)
* Optimize container images for startup

This change adjusts how to handle runner payloads to support
container builds where we keep them extracted in the filesystem.
This makes it easier to optimize the cpu/cuda vs cpu/rocm images for
size, and should result in faster startup times for container images.

* Refactor payload logic and add buildx support for faster builds

* Move payloads around

* Review comments

* Converge to buildx based helper scripts

* Use docker buildx action for release
2024-09-12 12:10:30 -07:00

61 lines
2 KiB
Docker

# Build stage
FROM golang:1.22-bookworm as build
# Install necessary dependencies
RUN apt update && apt install -y \
wget \
gnupg \
software-properties-common \
git \
apt-utils
# Install Intel oneAPI
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \
apt update && \
apt install -y --no-install-recommends \
intel-oneapi-mkl \
intel-oneapi-compiler-dpcpp-cpp \
intel-oneapi-mkl-devel \
gcc \
g++ \
pkg-config \
cmake
WORKDIR /app
ARG GIN_MODE=release
ADD . .
RUN . /opt/intel/oneapi/setvars.sh && \
OLLAMA_CUSTOM_CPU_DEFS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_NATIVE=ON" go generate ./... && \
go build -ldflags="-s -w"
FROM debian:bookworm-slim
RUN apt update && apt install -y --no-install-recommends \
wget \
gnupg \
software-properties-common \
supervisor && \
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \
apt update && \
apt install -y --no-install-recommends intel-oneapi-runtime-libs && \
apt clean && \
rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY --from=build /app/supervisord.conf /app/supervisord.conf
COPY --from=build /app/ollama /app/ollama
COPY --from=build /app/run_model.sh /app/run_model.sh
COPY --from=build /app/serve.sh /app/serve.sh
ENV MODEL_NAME="llama"
ENV OLLAMA_HOST="0.0.0.0:8080"
EXPOSE 8080
CMD ["supervisord", "-c", "/app/supervisord.conf"]