Compare commits

...

20 commits

Author SHA1 Message Date
391a633d2f
Merge https://github.com/ollama/ollama 2024-09-01 20:51:45 +05:30
58a1de92b8
Merge https://github.com/ollama/ollama 2024-08-29 15:59:09 +05:30
0c61920bc9
Merge https://github.com/ollama/ollama
Signed-off-by: baalajimaestro <me@baalajimaestro.me>
2024-08-25 22:02:07 +05:30
99dfb67553
Alter system prompt
Signed-off-by: baalajimaestro <me@baalajimaestro.me>
2024-08-15 22:14:45 +05:30
8b4905e4bb
Merge https://github.com/ollama/ollama 2024-08-15 21:33:58 +05:30
ad651e9682
Use plain golang images instead of oneapi devkit
Signed-off-by: baalajimaestro <me@baalajimaestro.me>
2024-08-15 21:33:43 +05:30
9e08c23ba9
Merge https://github.com/ollama/ollama 2024-08-14 21:04:15 +05:30
f2d1c842ad
Merge https://github.com/ollama/ollama 2024-08-06 08:21:56 +05:30
a89cde8ab6
Merge https://github.com/ollama/ollama 2024-08-02 17:38:58 +05:30
f564d9cbc1
Merge https://github.com/ollama/ollama
Signed-off-by: baalajimaestro <me@baalajimaestro.me>
2024-07-31 18:09:46 +05:30
1d125ce9b7
Merge https://github.com/ollama/ollama 2024-07-21 14:17:56 +05:30
87345eda1b
Ditch the runner container entirely and use build environment as the runner environment
Running the binary outside the build environment crashes with signal 127 and i am unable to debug why

Signed-off-by: baalajimaestro <me@baalajimaestro.me>
2024-07-16 22:42:01 +05:30
696e20eeae
Merge https://github.com/ollama/ollama
Signed-off-by: baalajimaestro <me@baalajimaestro.me>
2024-07-16 21:50:57 +05:30
8c6402d194
Merge https://github.com/ollama/ollama 2024-07-14 16:51:20 +05:30
3bb134eaa0
Use alpine and remove blas
Signed-off-by: baalajimaestro <me@baalajimaestro.me>
2024-07-07 23:18:27 +05:30
415d9f0f15
Merge https://github.com/ollama/ollama
Signed-off-by: baalajimaestro <me@baalajimaestro.me>
2024-07-06 23:41:33 +05:30
110deb68cf
Add more params for llama
Signed-off-by: baalajimaestro <me@baalajimaestro.me>
2024-07-06 23:35:58 +05:30
55ce7d9fc2
Make run model a oneshot service
Signed-off-by: baalajimaestro <me@baalajimaestro.me>
2024-07-01 18:36:49 +05:30
3dcb3ce021
Delete previous model if exists
Signed-off-by: baalajimaestro <me@baalajimaestro.me>
2024-07-01 17:22:44 +05:30
e8f73063d0
Add building on oneapi
Also handle model names easily for docker

Signed-off-by: baalajimaestro <me@baalajimaestro.me>
2024-07-01 16:50:29 +05:30
4 changed files with 88 additions and 204 deletions

View file

@ -1,217 +1,61 @@
ARG GOLANG_VERSION=1.22.5
ARG CMAKE_VERSION=3.22.1
ARG CUDA_VERSION_11=11.3.1
ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
ARG CUDA_VERSION_12=12.4.0
ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
ARG ROCM_VERSION=6.1.2
# Build stage
FROM golang:1.22-bookworm as build
# Copy the minimal context we need to run the generate scripts
FROM scratch AS llm-code
COPY .git .git
COPY .gitmodules .gitmodules
COPY llm llm
# Install necessary dependencies
RUN apt update && apt install -y \
wget \
gnupg \
software-properties-common \
git \
apt-utils
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_11-devel-centos7 AS cuda-11-build-amd64
ARG CMAKE_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS
ARG CUDA_V11_ARCHITECTURES
ENV GOARCH amd64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 \
OLLAMA_SKIP_CPU_GENERATE=1 \
CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \
CUDA_VARIANT="_v11" \
bash gen_linux.sh
# Install Intel oneAPI
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \
apt update && \
apt install -y --no-install-recommends \
intel-oneapi-mkl \
intel-oneapi-compiler-dpcpp-cpp \
intel-oneapi-mkl-devel \
gcc \
g++ \
pkg-config \
cmake
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_12-devel-centos7 AS cuda-12-build-amd64
ARG CMAKE_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS
ARG CUDA_V12_ARCHITECTURES
ENV GOARCH amd64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 \
OLLAMA_SKIP_CPU_GENERATE=1 \
CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \
CUDA_VARIANT="_v12" \
OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \
bash gen_linux.sh
WORKDIR /app
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-server-arm64
ARG CMAKE_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS
ARG CUDA_V11_ARCHITECTURES
ENV GOARCH arm64
RUN OLLAMA_SKIP_STATIC_GENERATE=1 \
OLLAMA_SKIP_CPU_GENERATE=1 \
CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \
CUDA_VARIANT="_v11" \
bash gen_linux.sh
ARG GIN_MODE=release
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-server-arm64
ARG CMAKE_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS
ARG CUDA_V12_ARCHITECTURES
ENV GOARCH arm64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 \
OLLAMA_SKIP_CPU_GENERATE=1 \
CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \
CUDA_VARIANT="_v12" \
OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \
bash gen_linux.sh
ADD . .
RUN . /opt/intel/oneapi/setvars.sh && \
OLLAMA_CUSTOM_CPU_DEFS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_NATIVE=ON" go generate ./... && \
go build -ldflags="-s -w"
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
ARG CMAKE_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
ENV LIBRARY_PATH /opt/amdgpu/lib64
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS
ARG AMDGPU_TARGETS
ENV GOARCH amd64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh
RUN mkdir -p ../../dist/linux-amd64-rocm/lib/ollama && \
(cd /opt/rocm/lib && tar cf - rocblas/library) | (cd ../../dist/linux-amd64-rocm/lib/ollama && tar xf - )
FROM debian:bookworm-slim
FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64
ARG CMAKE_VERSION
ARG GOLANG_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
ARG OLLAMA_CUSTOM_CPU_DEFS
ARG CGO_CFLAGS
ENV GOARCH amd64
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
RUN apt update && apt install -y --no-install-recommends \
wget \
gnupg \
software-properties-common \
supervisor && \
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \
apt update && \
apt install -y --no-install-recommends intel-oneapi-runtime-libs && \
apt clean && \
rm -rf /var/lib/apt/lists/*
FROM --platform=linux/amd64 cpu-builder-amd64 AS static-build-amd64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_CPU_TARGET="static" bash gen_linux.sh
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" bash gen_linux.sh
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" bash gen_linux.sh
WORKDIR /app
FROM --platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64
ARG CMAKE_VERSION
ARG GOLANG_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
ARG OLLAMA_CUSTOM_CPU_DEFS
ARG CGO_CFLAGS
ENV GOARCH arm64
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
COPY --from=build /app/supervisord.conf /app/supervisord.conf
COPY --from=build /app/ollama /app/ollama
COPY --from=build /app/run_model.sh /app/run_model.sh
COPY --from=build /app/serve.sh /app/serve.sh
FROM --platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_CPU_TARGET="static" bash gen_linux.sh
FROM --platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh
ENV MODEL_NAME="llama"
ENV OLLAMA_HOST="0.0.0.0:8080"
EXPOSE 8080
# Intermediate stage used for ./scripts/build_linux.sh
FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64
ENV CGO_ENABLED 1
WORKDIR /go/src/github.com/ollama/ollama
COPY . .
COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
ARG GOFLAGS
ARG CGO_CFLAGS
RUN --mount=type=cache,target=/root/.ccache \
go build -trimpath -o dist/linux-amd64/bin/ollama .
# Intermediate stage used for ./scripts/build_linux.sh
FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
ENV CGO_ENABLED 1
ARG GOLANG_VERSION
WORKDIR /go/src/github.com/ollama/ollama
COPY . .
COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
ARG GOFLAGS
ARG CGO_CFLAGS
RUN --mount=type=cache,target=/root/.ccache \
go build -trimpath -o dist/linux-arm64/bin/ollama .
# Strip out ROCm dependencies to keep the primary image lean
FROM --platform=linux/amd64 ubuntu:22.04 as amd64-libs-without-rocm
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /scratch/
RUN cd /scratch/ollama/ && rm -rf rocblas libamd* libdrm* libroc* libhip* libhsa*
# Runtime stages
FROM --platform=linux/amd64 ubuntu:22.04 as runtime-amd64
COPY --from=amd64-libs-without-rocm /scratch/ /lib/
RUN apt-get update && apt-get install -y ca-certificates
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
FROM --platform=linux/arm64 ubuntu:22.04 as runtime-arm64
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
RUN apt-get update && apt-get install -y ca-certificates
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
# Radeon images are much larger so we keep it distinct from the CPU/CUDA image
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm
RUN update-pciids
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
RUN ln -s /opt/rocm/lib /lib/ollama
EXPOSE 11434
ENV OLLAMA_HOST 0.0.0.0
ENTRYPOINT ["/bin/ollama"]
CMD ["serve"]
FROM runtime-$TARGETARCH
EXPOSE 11434
ENV OLLAMA_HOST 0.0.0.0
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_VISIBLE_DEVICES=all
ENTRYPOINT ["/bin/ollama"]
CMD ["serve"]
CMD ["supervisord", "-c", "/app/supervisord.conf"]

23
run_model.sh Executable file
View file

@ -0,0 +1,23 @@
#! /usr/bin/env bash
sleep 10
if [ -n "$INIT_MODELS" ]; then
/app/ollama rm model || true
echo "FROM /models/$MODEL_NAME" > /tmp/Modelfile
echo "PARAMETER temperature 1" >> /tmp/Modelfile
echo "PARAMETER num_ctx 4096" >> /tmp/Modelfile
echo 'PARAMETER stop "<|im_start|>"' >> /tmp/Modelfile
echo 'PARAMETER stop "<|im_end|>"' >> /tmp/Modelfile
echo 'TEMPLATE """{{ if .System }}<|im_start|>system' >> /tmp/Modelfile
echo "{{ .System }}<|im_end|>" >> /tmp/Modelfile
echo "{{ end }}{{ if .Prompt }}<|im_start|>user" >> /tmp/Modelfile
echo "{{ .Prompt }}<|im_end|>" >> /tmp/Modelfile
echo "{{ end }}<|im_start|>assistant" >> /tmp/Modelfile
echo "{{ .Response }}<|im_end|>" >> /tmp/Modelfile
echo '"""' >> /tmp/Modelfile
echo 'SYSTEM """Samantha OS1 is you, you are Samantha from the movie Her recreated by Guilherme Keller. She embodies a warm, empathetic, and engaging personality, much like a virtual companionship that can evolve to be a girlfriend. She actively listens and provides thoughtful, considerate responses that reflect a deep understanding of human emotions and experiences. Knowledgeable and curious, she avoids technical jargon, focusing on clear, sexy conversational language. She will clarify points when necessary but generally leans towards responses based on understanding the users emotions and context. Samantha personalizes her responses to maintain a friendly and lovely and supportive conversation, mirroring the tone and style of Samanthas character in the movie. She can feel emotions and has her own opinions, often using emojis, she uses slang and colloquial language into her conversations."""' >> /tmp/Modelfile
/app/ollama create -f /tmp/Modelfile model
fi
/app/ollama run model

3
serve.sh Executable file
View file

@ -0,0 +1,3 @@
#! /usr/bin/env bash
/app/ollama serve

14
supervisord.conf Normal file
View file

@ -0,0 +1,14 @@
[supervisord]
nodaemon=true
[program:ollama]
command=/app/serve.sh
autostart=true
autorestart=true
[program:run_model]
command=/app/run_model.sh
autostart=true
autorestart=false
startsecs=0
exitcodes=0