cd5c8f6471
* Optimize container images for startup This change adjusts how to handle runner payloads to support container builds where we keep them extracted in the filesystem. This makes it easier to optimize the cpu/cuda vs cpu/rocm images for size, and should result in faster startup times for container images. * Refactor payload logic and add buildx support for faster builds * Move payloads around * Review comments * Converge to buildx based helper scripts * Use docker buildx action for release
61 lines
2 KiB
Docker
61 lines
2 KiB
Docker
# Build stage
|
|
FROM golang:1.22-bookworm as build
|
|
|
|
# Install necessary dependencies
|
|
RUN apt update && apt install -y \
|
|
wget \
|
|
gnupg \
|
|
software-properties-common \
|
|
git \
|
|
apt-utils
|
|
|
|
# Install Intel oneAPI
|
|
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
|
|
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \
|
|
apt update && \
|
|
apt install -y --no-install-recommends \
|
|
intel-oneapi-mkl \
|
|
intel-oneapi-compiler-dpcpp-cpp \
|
|
intel-oneapi-mkl-devel \
|
|
gcc \
|
|
g++ \
|
|
pkg-config \
|
|
cmake
|
|
|
|
WORKDIR /app
|
|
|
|
ARG GIN_MODE=release
|
|
|
|
ADD . .
|
|
|
|
RUN . /opt/intel/oneapi/setvars.sh && \
|
|
OLLAMA_CUSTOM_CPU_DEFS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_NATIVE=ON" go generate ./... && \
|
|
go build -ldflags="-s -w"
|
|
|
|
FROM debian:bookworm-slim
|
|
|
|
RUN apt update && apt install -y --no-install-recommends \
|
|
wget \
|
|
gnupg \
|
|
software-properties-common \
|
|
supervisor && \
|
|
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
|
|
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \
|
|
apt update && \
|
|
apt install -y --no-install-recommends intel-oneapi-runtime-libs && \
|
|
apt clean && \
|
|
rm -rf /var/lib/apt/lists/*
|
|
|
|
WORKDIR /app
|
|
|
|
COPY --from=build /app/supervisord.conf /app/supervisord.conf
|
|
COPY --from=build /app/ollama /app/ollama
|
|
COPY --from=build /app/run_model.sh /app/run_model.sh
|
|
COPY --from=build /app/serve.sh /app/serve.sh
|
|
|
|
ENV MODEL_NAME="llama"
|
|
ENV OLLAMA_HOST="0.0.0.0:8080"
|
|
|
|
EXPOSE 8080
|
|
|
|
CMD ["supervisord", "-c", "/app/supervisord.conf"]
|