Make CPU builds parallel and customizable AMD GPUs

The linux build now support parallel CPU builds to speed things up.
This also exposes AMD GPU targets as an optional setting for advaced
users who want to alter our default set.
This commit is contained in:
Daniel Hiltgen 2024-01-21 12:57:13 -08:00
parent fa8c990e58
commit df54c723ae
4 changed files with 68 additions and 44 deletions

View file

@ -10,91 +10,102 @@ COPY llm llm
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64 FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
ARG CMAKE_VERSION ARG CMAKE_VERSION
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/ COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
ARG CGO_CFLAGS
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64 FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
ARG CMAKE_VERSION ARG CMAKE_VERSION
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/ COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
ARG CGO_CFLAGS
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete AS rocm-5-build-amd64 FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete AS rocm-5-build-amd64
ARG CMAKE_VERSION ARG CMAKE_VERSION
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
ENV LIBRARY_PATH /opt/amdgpu/lib64 ENV LIBRARY_PATH /opt/amdgpu/lib64
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/ COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
ARG CGO_CFLAGS
ARG AMDGPU_TARGETS
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
FROM --platform=linux/amd64 rocm/dev-centos-7:6.0-complete AS rocm-6-build-amd64 FROM --platform=linux/amd64 rocm/dev-centos-7:6.0-complete AS rocm-6-build-amd64
ARG CMAKE_VERSION ARG CMAKE_VERSION
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
ENV LIBRARY_PATH /opt/amdgpu/lib64 ENV LIBRARY_PATH /opt/amdgpu/lib64
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/ COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
ARG CGO_CFLAGS
ARG AMDGPU_TARGETS
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
FROM --platform=linux/amd64 centos:7 AS cpu-build-amd64 FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64
ARG CMAKE_VERSION ARG CMAKE_VERSION
ARG GOLANG_VERSION ARG GOLANG_VERSION
ARG OLLAMA_CUSTOM_CPU_DEFS
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/ COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
ARG OLLAMA_CUSTOM_CPU_DEFS
ARG CGO_CFLAGS
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
RUN sh gen_linux.sh
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64
RUN OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64
RUN OLLAMA_CPU_TARGET="cpu_avx" sh gen_linux.sh
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
RUN OLLAMA_CPU_TARGET="cpu_avx2" sh gen_linux.sh
FROM --platform=linux/arm64 centos:7 AS cpu-build-arm64 FROM --platform=linux/arm64 centos:7 AS cpu-build-arm64
ARG CMAKE_VERSION ARG CMAKE_VERSION
ARG GOLANG_VERSION ARG GOLANG_VERSION
ARG OLLAMA_CUSTOM_CPU_DEFS
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/ COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
RUN sh gen_linux.sh # Note, we only build the "base" CPU variant on arm since avx/avx2 are x86 features
ARG OLLAMA_CUSTOM_CPU_DEFS
ARG CGO_CFLAGS
RUN OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
# Intermediate stage used for ./scripts/build_linux.sh # Intermediate stage used for ./scripts/build_linux.sh
FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64 FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64
ENV CGO_ENABLED 1 ENV CGO_ENABLED 1
ARG GOFLAGS
ARG CGO_CFLAGS
WORKDIR /go/src/github.com/jmorganca/ollama WORKDIR /go/src/github.com/jmorganca/ollama
COPY . . COPY . .
COPY --from=cpu_avx-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
COPY --from=cpu_avx2-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/ COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
COPY --from=rocm-5-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/ COPY --from=rocm-5-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
COPY --from=rocm-6-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/ COPY --from=rocm-6-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
ARG GOFLAGS
ARG CGO_CFLAGS
RUN go build . RUN go build .
# Intermediate stage used for ./scripts/build_linux.sh # Intermediate stage used for ./scripts/build_linux.sh
FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64 FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
ENV CGO_ENABLED 1 ENV CGO_ENABLED 1
ARG GOLANG_VERSION ARG GOLANG_VERSION
ARG GOFLAGS
ARG CGO_CFLAGS
WORKDIR /go/src/github.com/jmorganca/ollama WORKDIR /go/src/github.com/jmorganca/ollama
COPY . . COPY . .
COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/ COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
ARG GOFLAGS
ARG CGO_CFLAGS
RUN go build . RUN go build .
# Runtime stages # Runtime stages

View file

@ -74,7 +74,8 @@ Typically the build scripts will auto-detect ROCm, however, if your Linux distro
or installation approach uses unusual paths, you can specify the location by or installation approach uses unusual paths, you can specify the location by
specifying an environment variable `ROCM_PATH` to the location of the ROCm specifying an environment variable `ROCM_PATH` to the location of the ROCm
install (typically `/opt/rocm`), and `CLBlast_DIR` to the location of the install (typically `/opt/rocm`), and `CLBlast_DIR` to the location of the
CLBlast install (typically `/usr/lib/cmake/CLBlast`). CLBlast install (typically `/usr/lib/cmake/CLBlast`). You can also customize
the AMD GPU targets by setting AMDGPU_TARGETS (e.g. `AMDGPU_TARGETS="gfx1101;gfx1102"`)
``` ```
go generate ./... go generate ./...

View file

@ -16,6 +16,10 @@ set -o pipefail
# See https://llvm.org/docs/AMDGPUUsage.html#processors for reference # See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
amdGPUs() { amdGPUs() {
if [ -n "${AMDGPU_TARGETS}" ]; then
echo "${AMDGPU_TARGETS}"
return
fi
GPU_LIST=( GPU_LIST=(
"gfx803" "gfx803"
"gfx900" "gfx900"
@ -73,36 +77,42 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
# -DLLAMA_AVX512_VNNI -- 2021 Intel Alder Lake # -DLLAMA_AVX512_VNNI -- 2021 Intel Alder Lake
COMMON_CPU_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off" COMMON_CPU_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off"
# if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta) #
# # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}" #
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu" CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
echo "Building LCD CPU" BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu"
build echo "Building LCD CPU"
compress_libs build
compress_libs
fi
# if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx" ]; then
# ~2011 CPU Dynamic library with more capabilities turned on to optimize performance #
# Approximately 400% faster than LCD on same CPU # ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
# # Approximately 400% faster than LCD on same CPU
init_vars #
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}" init_vars
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx" CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
echo "Building AVX CPU" BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx"
build echo "Building AVX CPU"
compress_libs build
compress_libs
fi
# if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx2" ]; then
# ~2013 CPU Dynamic library #
# Approximately 10% faster than AVX on same CPU # ~2013 CPU Dynamic library
# # Approximately 10% faster than AVX on same CPU
init_vars #
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}" init_vars
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx2" CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}"
echo "Building AVX2 CPU" BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx2"
build echo "Building AVX2 CPU"
compress_libs build
compress_libs
fi
fi fi
else else
echo "Skipping CPU generation step as requested" echo "Skipping CPU generation step as requested"

View file

@ -6,6 +6,7 @@ export VERSION=${VERSION:-0.0.0}
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'" export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'"
BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"} BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"}
export AMDGPU_TARGETS=${AMDGPU_TARGETS:=""}
mkdir -p dist mkdir -p dist
for TARGETARCH in ${BUILD_ARCH}; do for TARGETARCH in ${BUILD_ARCH}; do
@ -14,6 +15,7 @@ for TARGETARCH in ${BUILD_ARCH}; do
--build-arg=GOFLAGS \ --build-arg=GOFLAGS \
--build-arg=CGO_CFLAGS \ --build-arg=CGO_CFLAGS \
--build-arg=OLLAMA_CUSTOM_CPU_DEFS \ --build-arg=OLLAMA_CUSTOM_CPU_DEFS \
--build-arg=AMDGPU_TARGETS \
--target build-$TARGETARCH \ --target build-$TARGETARCH \
-f Dockerfile \ -f Dockerfile \
-t builder:$TARGETARCH \ -t builder:$TARGETARCH \