Make CPU builds parallel and customizable AMD GPUs
The linux build now support parallel CPU builds to speed things up. This also exposes AMD GPU targets as an optional setting for advaced users who want to alter our default set.
This commit is contained in:
parent
fa8c990e58
commit
df54c723ae
4 changed files with 68 additions and 44 deletions
41
Dockerfile
41
Dockerfile
|
@ -10,91 +10,102 @@ COPY llm llm
|
||||||
|
|
||||||
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
|
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
ARG CGO_CFLAGS
|
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||||
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
||||||
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
||||||
|
ARG CGO_CFLAGS
|
||||||
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
||||||
|
|
||||||
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
|
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
ARG CGO_CFLAGS
|
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||||
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
|
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
|
||||||
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
||||||
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
||||||
|
ARG CGO_CFLAGS
|
||||||
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
||||||
|
|
||||||
FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete AS rocm-5-build-amd64
|
FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete AS rocm-5-build-amd64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
ARG CGO_CFLAGS
|
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||||
ENV LIBRARY_PATH /opt/amdgpu/lib64
|
ENV LIBRARY_PATH /opt/amdgpu/lib64
|
||||||
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
||||||
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
||||||
|
ARG CGO_CFLAGS
|
||||||
|
ARG AMDGPU_TARGETS
|
||||||
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
||||||
|
|
||||||
FROM --platform=linux/amd64 rocm/dev-centos-7:6.0-complete AS rocm-6-build-amd64
|
FROM --platform=linux/amd64 rocm/dev-centos-7:6.0-complete AS rocm-6-build-amd64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
ARG CGO_CFLAGS
|
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||||
ENV LIBRARY_PATH /opt/amdgpu/lib64
|
ENV LIBRARY_PATH /opt/amdgpu/lib64
|
||||||
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
||||||
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
||||||
|
ARG CGO_CFLAGS
|
||||||
|
ARG AMDGPU_TARGETS
|
||||||
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
||||||
|
|
||||||
FROM --platform=linux/amd64 centos:7 AS cpu-build-amd64
|
FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
ARG GOLANG_VERSION
|
ARG GOLANG_VERSION
|
||||||
ARG OLLAMA_CUSTOM_CPU_DEFS
|
|
||||||
ARG CGO_CFLAGS
|
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
||||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||||
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
||||||
|
ARG OLLAMA_CUSTOM_CPU_DEFS
|
||||||
|
ARG CGO_CFLAGS
|
||||||
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
||||||
RUN sh gen_linux.sh
|
|
||||||
|
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64
|
||||||
|
RUN OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
|
||||||
|
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64
|
||||||
|
RUN OLLAMA_CPU_TARGET="cpu_avx" sh gen_linux.sh
|
||||||
|
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
|
||||||
|
RUN OLLAMA_CPU_TARGET="cpu_avx2" sh gen_linux.sh
|
||||||
|
|
||||||
FROM --platform=linux/arm64 centos:7 AS cpu-build-arm64
|
FROM --platform=linux/arm64 centos:7 AS cpu-build-arm64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
ARG GOLANG_VERSION
|
ARG GOLANG_VERSION
|
||||||
ARG OLLAMA_CUSTOM_CPU_DEFS
|
|
||||||
ARG CGO_CFLAGS
|
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
||||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||||
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
||||||
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
||||||
RUN sh gen_linux.sh
|
# Note, we only build the "base" CPU variant on arm since avx/avx2 are x86 features
|
||||||
|
ARG OLLAMA_CUSTOM_CPU_DEFS
|
||||||
|
ARG CGO_CFLAGS
|
||||||
|
RUN OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
|
||||||
|
|
||||||
# Intermediate stage used for ./scripts/build_linux.sh
|
# Intermediate stage used for ./scripts/build_linux.sh
|
||||||
FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64
|
FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64
|
||||||
ENV CGO_ENABLED 1
|
ENV CGO_ENABLED 1
|
||||||
ARG GOFLAGS
|
|
||||||
ARG CGO_CFLAGS
|
|
||||||
WORKDIR /go/src/github.com/jmorganca/ollama
|
WORKDIR /go/src/github.com/jmorganca/ollama
|
||||||
COPY . .
|
COPY . .
|
||||||
|
COPY --from=cpu_avx-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
||||||
|
COPY --from=cpu_avx2-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
||||||
COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
||||||
COPY --from=rocm-5-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
COPY --from=rocm-5-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
||||||
COPY --from=rocm-6-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
COPY --from=rocm-6-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
||||||
|
ARG GOFLAGS
|
||||||
|
ARG CGO_CFLAGS
|
||||||
RUN go build .
|
RUN go build .
|
||||||
|
|
||||||
# Intermediate stage used for ./scripts/build_linux.sh
|
# Intermediate stage used for ./scripts/build_linux.sh
|
||||||
FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
|
FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
|
||||||
ENV CGO_ENABLED 1
|
ENV CGO_ENABLED 1
|
||||||
ARG GOLANG_VERSION
|
ARG GOLANG_VERSION
|
||||||
ARG GOFLAGS
|
|
||||||
ARG CGO_CFLAGS
|
|
||||||
WORKDIR /go/src/github.com/jmorganca/ollama
|
WORKDIR /go/src/github.com/jmorganca/ollama
|
||||||
COPY . .
|
COPY . .
|
||||||
COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
||||||
|
ARG GOFLAGS
|
||||||
|
ARG CGO_CFLAGS
|
||||||
RUN go build .
|
RUN go build .
|
||||||
|
|
||||||
# Runtime stages
|
# Runtime stages
|
||||||
|
|
|
@ -74,7 +74,8 @@ Typically the build scripts will auto-detect ROCm, however, if your Linux distro
|
||||||
or installation approach uses unusual paths, you can specify the location by
|
or installation approach uses unusual paths, you can specify the location by
|
||||||
specifying an environment variable `ROCM_PATH` to the location of the ROCm
|
specifying an environment variable `ROCM_PATH` to the location of the ROCm
|
||||||
install (typically `/opt/rocm`), and `CLBlast_DIR` to the location of the
|
install (typically `/opt/rocm`), and `CLBlast_DIR` to the location of the
|
||||||
CLBlast install (typically `/usr/lib/cmake/CLBlast`).
|
CLBlast install (typically `/usr/lib/cmake/CLBlast`). You can also customize
|
||||||
|
the AMD GPU targets by setting AMDGPU_TARGETS (e.g. `AMDGPU_TARGETS="gfx1101;gfx1102"`)
|
||||||
|
|
||||||
```
|
```
|
||||||
go generate ./...
|
go generate ./...
|
||||||
|
|
|
@ -16,6 +16,10 @@ set -o pipefail
|
||||||
|
|
||||||
# See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
|
# See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
|
||||||
amdGPUs() {
|
amdGPUs() {
|
||||||
|
if [ -n "${AMDGPU_TARGETS}" ]; then
|
||||||
|
echo "${AMDGPU_TARGETS}"
|
||||||
|
return
|
||||||
|
fi
|
||||||
GPU_LIST=(
|
GPU_LIST=(
|
||||||
"gfx803"
|
"gfx803"
|
||||||
"gfx900"
|
"gfx900"
|
||||||
|
@ -73,36 +77,42 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
|
||||||
# -DLLAMA_AVX512_VNNI -- 2021 Intel Alder Lake
|
# -DLLAMA_AVX512_VNNI -- 2021 Intel Alder Lake
|
||||||
|
|
||||||
COMMON_CPU_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off"
|
COMMON_CPU_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off"
|
||||||
#
|
if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then
|
||||||
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
|
#
|
||||||
#
|
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
|
||||||
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
#
|
||||||
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu"
|
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
||||||
echo "Building LCD CPU"
|
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu"
|
||||||
build
|
echo "Building LCD CPU"
|
||||||
compress_libs
|
build
|
||||||
|
compress_libs
|
||||||
|
fi
|
||||||
|
|
||||||
#
|
if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx" ]; then
|
||||||
# ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
|
#
|
||||||
# Approximately 400% faster than LCD on same CPU
|
# ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
|
||||||
#
|
# Approximately 400% faster than LCD on same CPU
|
||||||
init_vars
|
#
|
||||||
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
init_vars
|
||||||
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx"
|
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
||||||
echo "Building AVX CPU"
|
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx"
|
||||||
build
|
echo "Building AVX CPU"
|
||||||
compress_libs
|
build
|
||||||
|
compress_libs
|
||||||
|
fi
|
||||||
|
|
||||||
#
|
if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx2" ]; then
|
||||||
# ~2013 CPU Dynamic library
|
#
|
||||||
# Approximately 10% faster than AVX on same CPU
|
# ~2013 CPU Dynamic library
|
||||||
#
|
# Approximately 10% faster than AVX on same CPU
|
||||||
init_vars
|
#
|
||||||
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}"
|
init_vars
|
||||||
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx2"
|
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}"
|
||||||
echo "Building AVX2 CPU"
|
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx2"
|
||||||
build
|
echo "Building AVX2 CPU"
|
||||||
compress_libs
|
build
|
||||||
|
compress_libs
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
echo "Skipping CPU generation step as requested"
|
echo "Skipping CPU generation step as requested"
|
||||||
|
|
|
@ -6,6 +6,7 @@ export VERSION=${VERSION:-0.0.0}
|
||||||
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'"
|
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'"
|
||||||
|
|
||||||
BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"}
|
BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"}
|
||||||
|
export AMDGPU_TARGETS=${AMDGPU_TARGETS:=""}
|
||||||
mkdir -p dist
|
mkdir -p dist
|
||||||
|
|
||||||
for TARGETARCH in ${BUILD_ARCH}; do
|
for TARGETARCH in ${BUILD_ARCH}; do
|
||||||
|
@ -14,6 +15,7 @@ for TARGETARCH in ${BUILD_ARCH}; do
|
||||||
--build-arg=GOFLAGS \
|
--build-arg=GOFLAGS \
|
||||||
--build-arg=CGO_CFLAGS \
|
--build-arg=CGO_CFLAGS \
|
||||||
--build-arg=OLLAMA_CUSTOM_CPU_DEFS \
|
--build-arg=OLLAMA_CUSTOM_CPU_DEFS \
|
||||||
|
--build-arg=AMDGPU_TARGETS \
|
||||||
--target build-$TARGETARCH \
|
--target build-$TARGETARCH \
|
||||||
-f Dockerfile \
|
-f Dockerfile \
|
||||||
-t builder:$TARGETARCH \
|
-t builder:$TARGETARCH \
|
||||||
|
|
Loading…
Reference in a new issue