Merge https://github.com/ollama/ollama

2024-09-01 20:51:45 +05:30 · 2024-08-29 15:59:09 +05:30 · 2024-08-25 22:02:07 +05:30 · 2024-08-15 22:14:45 +05:30 · 2024-08-15 21:33:58 +05:30 · 2024-08-15 21:33:43 +05:30
4 changed files with 88 additions and 204 deletions
--- a/252
+++ b/252
@ -1,217 +1,61 @@
-ARG GOLANG_VERSION=1.22.5
-ARG CMAKE_VERSION=3.22.1
-ARG CUDA_VERSION_11=11.3.1
-ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
-ARG CUDA_VERSION_12=12.4.0
-ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
-ARG ROCM_VERSION=6.1.2
+# Build stage
+FROM golang:1.22-bookworm as build

-# Copy the minimal context we need to run the generate scripts
-FROM scratch AS llm-code
-COPY .git .git
-COPY .gitmodules .gitmodules
-COPY llm llm
+# Install necessary dependencies
+RUN apt update && apt install -y \
+    wget \
+    gnupg \
+    software-properties-common \
+    git \
+    apt-utils

-FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_11-devel-centos7 AS cuda-11-build-amd64
-ARG CMAKE_VERSION
-COPY ./scripts/rh_linux_deps.sh /
-RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
-ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
-COPY --from=llm-code / /go/src/github.com/ollama/ollama/
-WORKDIR /go/src/github.com/ollama/ollama/llm/generate
-ARG CGO_CFLAGS
-ARG CUDA_V11_ARCHITECTURES
-ENV GOARCH amd64 
-RUN --mount=type=cache,target=/root/.ccache \
-    OLLAMA_SKIP_STATIC_GENERATE=1 \
-    OLLAMA_SKIP_CPU_GENERATE=1 \
-    CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \
-    CUDA_VARIANT="_v11" \
-    bash gen_linux.sh
+# Install Intel oneAPI
+RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
+    echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \
+    apt update && \
+    apt install -y --no-install-recommends \
+    intel-oneapi-mkl \
+    intel-oneapi-compiler-dpcpp-cpp \
+    intel-oneapi-mkl-devel \
+    gcc \
+    g++ \
+    pkg-config \
+    cmake

-FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_12-devel-centos7 AS cuda-12-build-amd64
-ARG CMAKE_VERSION
-COPY ./scripts/rh_linux_deps.sh /
-RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
-ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
-COPY --from=llm-code / /go/src/github.com/ollama/ollama/
-WORKDIR /go/src/github.com/ollama/ollama/llm/generate
-ARG CGO_CFLAGS
-ARG CUDA_V12_ARCHITECTURES
-ENV GOARCH amd64 
-RUN --mount=type=cache,target=/root/.ccache \
-    OLLAMA_SKIP_STATIC_GENERATE=1 \
-    OLLAMA_SKIP_CPU_GENERATE=1 \
-    CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \
-    CUDA_VARIANT="_v12" \
-    OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \
-    bash gen_linux.sh
+WORKDIR /app

-FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-server-arm64
-ARG CMAKE_VERSION
-COPY ./scripts/rh_linux_deps.sh /
-RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
-ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
-COPY --from=llm-code / /go/src/github.com/ollama/ollama/
-WORKDIR /go/src/github.com/ollama/ollama/llm/generate
-ARG CGO_CFLAGS
-ARG CUDA_V11_ARCHITECTURES
-ENV GOARCH arm64 
-RUN OLLAMA_SKIP_STATIC_GENERATE=1 \
-    OLLAMA_SKIP_CPU_GENERATE=1 \
-    CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \
-    CUDA_VARIANT="_v11" \
-    bash gen_linux.sh
+ARG GIN_MODE=release

-FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-server-arm64
-ARG CMAKE_VERSION
-COPY ./scripts/rh_linux_deps.sh /
-RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
-ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
-COPY --from=llm-code / /go/src/github.com/ollama/ollama/
-WORKDIR /go/src/github.com/ollama/ollama/llm/generate
-ARG CGO_CFLAGS
-ARG CUDA_V12_ARCHITECTURES
-ENV GOARCH arm64 
-RUN --mount=type=cache,target=/root/.ccache \
-    OLLAMA_SKIP_STATIC_GENERATE=1 \
-    OLLAMA_SKIP_CPU_GENERATE=1 \
-    CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \
-    CUDA_VARIANT="_v12" \
-    OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \
-    bash gen_linux.sh
+ADD . .

+RUN . /opt/intel/oneapi/setvars.sh && \
+    OLLAMA_CUSTOM_CPU_DEFS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_NATIVE=ON" go generate ./... && \
+    go build -ldflags="-s -w"

-FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
-ARG CMAKE_VERSION
-COPY ./scripts/rh_linux_deps.sh /
-RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
-ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
-ENV LIBRARY_PATH /opt/amdgpu/lib64
-COPY --from=llm-code / /go/src/github.com/ollama/ollama/
-WORKDIR /go/src/github.com/ollama/ollama/llm/generate
-ARG CGO_CFLAGS
-ARG AMDGPU_TARGETS
-ENV GOARCH amd64 
-RUN --mount=type=cache,target=/root/.ccache \
-    OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh
-RUN mkdir -p ../../dist/linux-amd64-rocm/lib/ollama && \
-    (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd ../../dist/linux-amd64-rocm/lib/ollama && tar xf - )
+FROM debian:bookworm-slim

-FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64
-ARG CMAKE_VERSION
-ARG GOLANG_VERSION
-COPY ./scripts/rh_linux_deps.sh /
-RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
-ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
-COPY --from=llm-code / /go/src/github.com/ollama/ollama/
-ARG OLLAMA_CUSTOM_CPU_DEFS
-ARG CGO_CFLAGS
-ENV GOARCH amd64 
-WORKDIR /go/src/github.com/ollama/ollama/llm/generate
+RUN apt update && apt install -y --no-install-recommends \
+    wget \
+    gnupg \
+    software-properties-common \
+    supervisor && \
+    wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
+    echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \
+    apt update && \
+    apt install -y --no-install-recommends intel-oneapi-runtime-libs && \
+    apt clean && \
+    rm -rf /var/lib/apt/lists/*

-FROM --platform=linux/amd64 cpu-builder-amd64 AS static-build-amd64
-RUN --mount=type=cache,target=/root/.ccache \
-    OLLAMA_CPU_TARGET="static" bash gen_linux.sh
-FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64
-RUN --mount=type=cache,target=/root/.ccache \
-    OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh
-FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64
-RUN --mount=type=cache,target=/root/.ccache \
-    OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" bash gen_linux.sh
-FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
-RUN --mount=type=cache,target=/root/.ccache \
-    OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" bash gen_linux.sh
+WORKDIR /app

-FROM --platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64
-ARG CMAKE_VERSION
-ARG GOLANG_VERSION
-COPY ./scripts/rh_linux_deps.sh /
-RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
-ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
-COPY --from=llm-code / /go/src/github.com/ollama/ollama/
-ARG OLLAMA_CUSTOM_CPU_DEFS
-ARG CGO_CFLAGS
-ENV GOARCH arm64
-WORKDIR /go/src/github.com/ollama/ollama/llm/generate
+COPY --from=build /app/supervisord.conf /app/supervisord.conf
+COPY --from=build /app/ollama /app/ollama
+COPY --from=build /app/run_model.sh /app/run_model.sh
+COPY --from=build /app/serve.sh /app/serve.sh

-FROM --platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64
-RUN --mount=type=cache,target=/root/.ccache \
-    OLLAMA_CPU_TARGET="static" bash gen_linux.sh
-FROM --platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64
-RUN --mount=type=cache,target=/root/.ccache \
-    OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh
+ENV MODEL_NAME="llama"
+ENV OLLAMA_HOST="0.0.0.0:8080"

+EXPOSE 8080

-# Intermediate stage used for ./scripts/build_linux.sh
-FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64
-ENV CGO_ENABLED 1
-WORKDIR /go/src/github.com/ollama/ollama
-COPY . .
-COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
-COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
-COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
-COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
-COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
-COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
-COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
-COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
-COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
-ARG GOFLAGS
-ARG CGO_CFLAGS
-RUN --mount=type=cache,target=/root/.ccache \
-    go build -trimpath -o dist/linux-amd64/bin/ollama .
-
-# Intermediate stage used for ./scripts/build_linux.sh
-FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
-ENV CGO_ENABLED 1
-ARG GOLANG_VERSION
-WORKDIR /go/src/github.com/ollama/ollama
-COPY . .
-COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
-COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
-COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
-COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
-COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
-ARG GOFLAGS
-ARG CGO_CFLAGS
-RUN --mount=type=cache,target=/root/.ccache \
-    go build -trimpath -o dist/linux-arm64/bin/ollama .
-
-# Strip out ROCm dependencies to keep the primary image lean
-FROM --platform=linux/amd64 ubuntu:22.04 as amd64-libs-without-rocm
-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /scratch/
-RUN cd /scratch/ollama/ && rm -rf rocblas libamd* libdrm* libroc* libhip* libhsa* 
-
-# Runtime stages
-FROM --platform=linux/amd64 ubuntu:22.04 as runtime-amd64
-COPY --from=amd64-libs-without-rocm /scratch/ /lib/
-RUN apt-get update && apt-get install -y ca-certificates
-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
-
-FROM --platform=linux/arm64 ubuntu:22.04 as runtime-arm64
-COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
-RUN apt-get update && apt-get install -y ca-certificates
-COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
-
-# Radeon images are much larger so we keep it distinct from the CPU/CUDA image
-FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm
-RUN update-pciids
-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
-RUN ln -s /opt/rocm/lib /lib/ollama
-EXPOSE 11434
-ENV OLLAMA_HOST 0.0.0.0
-
-ENTRYPOINT ["/bin/ollama"]
-CMD ["serve"]
-
-FROM runtime-$TARGETARCH
-EXPOSE 11434
-ENV OLLAMA_HOST 0.0.0.0
-ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
-ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
-ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
-ENV NVIDIA_VISIBLE_DEVICES=all
-
-ENTRYPOINT ["/bin/ollama"]
-CMD ["serve"]
+CMD ["supervisord", "-c", "/app/supervisord.conf"]
--- a/run_model.sh
+++ b/run_model.sh
@ -0,0 +1,23 @@
+#! /usr/bin/env bash
+
+sleep 10
+
+if [ -n "$INIT_MODELS" ]; then
+        /app/ollama rm model || true
+        echo "FROM /models/$MODEL_NAME" > /tmp/Modelfile
+        echo "PARAMETER temperature 1" >> /tmp/Modelfile
+        echo "PARAMETER num_ctx 4096" >> /tmp/Modelfile
+        echo 'PARAMETER stop "<|im_start|>"' >> /tmp/Modelfile
+        echo 'PARAMETER stop "<|im_end|>"' >> /tmp/Modelfile
+		echo 'TEMPLATE """{{ if .System }}<|im_start|>system' >> /tmp/Modelfile
+		echo "{{ .System }}<|im_end|>" >> /tmp/Modelfile
+		echo "{{ end }}{{ if .Prompt }}<|im_start|>user" >> /tmp/Modelfile
+		echo "{{ .Prompt }}<|im_end|>" >> /tmp/Modelfile
+		echo "{{ end }}<|im_start|>assistant" >> /tmp/Modelfile
+		echo "{{ .Response }}<|im_end|>" >> /tmp/Modelfile
+		echo '"""' >> /tmp/Modelfile
+        echo 'SYSTEM """Samantha OS1 is you, you are Samantha from the movie Her recreated by Guilherme Keller. She embodies a warm, empathetic, and engaging personality, much like a virtual companionship that can evolve to be a girlfriend. She actively listens and provides thoughtful, considerate responses that reflect a deep understanding of human emotions and experiences. Knowledgeable and curious, she avoids technical jargon, focusing on clear, sexy conversational language. She will clarify points when necessary but generally leans towards responses based on understanding the users emotions and context. Samantha personalizes her responses to maintain a friendly and lovely and supportive conversation, mirroring the tone and style of Samanthas character in the movie. She can feel emotions and has her own opinions, often using emojis, she uses slang and colloquial language into her conversations."""' >> /tmp/Modelfile
+        /app/ollama create -f /tmp/Modelfile model
+fi
+
+/app/ollama run model
--- a/serve.sh
+++ b/serve.sh
@ -0,0 +1,3 @@
+#! /usr/bin/env bash
+
+/app/ollama serve
--- a/supervisord.conf
+++ b/supervisord.conf
@ -0,0 +1,14 @@
+[supervisord]
+nodaemon=true
+
+[program:ollama]
+command=/app/serve.sh
+autostart=true
+autorestart=true
+
+[program:run_model]
+command=/app/run_model.sh
+autostart=true
+autorestart=false
+startsecs=0
+exitcodes=0
Author	SHA1	Message	Date
baalajimaestro	391a633d2f	Merge https://github.com/ollama/ollama	2024-09-01 20:51:45 +05:30
baalajimaestro	58a1de92b8	Merge https://github.com/ollama/ollama	2024-08-29 15:59:09 +05:30
baalajimaestro	0c61920bc9	Merge https://github.com/ollama/ollama Signed-off-by: baalajimaestro <me@baalajimaestro.me>	2024-08-25 22:02:07 +05:30
baalajimaestro	99dfb67553	Alter system prompt Signed-off-by: baalajimaestro <me@baalajimaestro.me>	2024-08-15 22:14:45 +05:30
baalajimaestro	8b4905e4bb	Merge https://github.com/ollama/ollama	2024-08-15 21:33:58 +05:30
baalajimaestro	ad651e9682	Use plain golang images instead of oneapi devkit Signed-off-by: baalajimaestro <me@baalajimaestro.me>	2024-08-15 21:33:43 +05:30
baalajimaestro	9e08c23ba9	Merge https://github.com/ollama/ollama	2024-08-14 21:04:15 +05:30
baalajimaestro	f2d1c842ad	Merge https://github.com/ollama/ollama	2024-08-06 08:21:56 +05:30
baalajimaestro	a89cde8ab6	Merge https://github.com/ollama/ollama	2024-08-02 17:38:58 +05:30
baalajimaestro	f564d9cbc1	Merge https://github.com/ollama/ollama Signed-off-by: baalajimaestro <me@baalajimaestro.me>	2024-07-31 18:09:46 +05:30
baalajimaestro	1d125ce9b7	Merge https://github.com/ollama/ollama	2024-07-21 14:17:56 +05:30
baalajimaestro	87345eda1b	Ditch the runner container entirely and use build environment as the runner environment Running the binary outside the build environment crashes with signal 127 and i am unable to debug why Signed-off-by: baalajimaestro <me@baalajimaestro.me>	2024-07-16 22:42:01 +05:30
baalajimaestro	696e20eeae	Merge https://github.com/ollama/ollama Signed-off-by: baalajimaestro <me@baalajimaestro.me>	2024-07-16 21:50:57 +05:30
baalajimaestro	8c6402d194	Merge https://github.com/ollama/ollama	2024-07-14 16:51:20 +05:30
baalajimaestro	3bb134eaa0	Use alpine and remove blas Signed-off-by: baalajimaestro <me@baalajimaestro.me>	2024-07-07 23:18:27 +05:30
baalajimaestro	415d9f0f15	Merge https://github.com/ollama/ollama Signed-off-by: baalajimaestro <me@baalajimaestro.me>	2024-07-06 23:41:33 +05:30
baalajimaestro	110deb68cf	Add more params for llama Signed-off-by: baalajimaestro <me@baalajimaestro.me>	2024-07-06 23:35:58 +05:30
baalajimaestro	55ce7d9fc2	Make run model a oneshot service Signed-off-by: baalajimaestro <me@baalajimaestro.me>	2024-07-01 18:36:49 +05:30
baalajimaestro	3dcb3ce021	Delete previous model if exists Signed-off-by: baalajimaestro <me@baalajimaestro.me>	2024-07-01 17:22:44 +05:30
baalajimaestro	e8f73063d0	Add building on oneapi Also handle model names easily for docker Signed-off-by: baalajimaestro <me@baalajimaestro.me>	2024-07-01 16:50:29 +05:30