diff --git a/Dockerfile b/Dockerfile index 8e8b6d2a..9767faa3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,91 +10,102 @@ COPY llm llm FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64 ARG CMAKE_VERSION -ARG CGO_CFLAGS COPY ./scripts/rh_linux_deps.sh / RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/ WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate +ARG CGO_CFLAGS RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64 ARG CMAKE_VERSION -ARG CGO_CFLAGS COPY ./scripts/rh_linux_deps.sh / RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/ WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate +ARG CGO_CFLAGS RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete AS rocm-5-build-amd64 ARG CMAKE_VERSION -ARG CGO_CFLAGS COPY ./scripts/rh_linux_deps.sh / RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH ENV LIBRARY_PATH /opt/amdgpu/lib64 COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/ WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate +ARG CGO_CFLAGS +ARG AMDGPU_TARGETS RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh FROM --platform=linux/amd64 rocm/dev-centos-7:6.0-complete AS rocm-6-build-amd64 ARG CMAKE_VERSION -ARG CGO_CFLAGS COPY ./scripts/rh_linux_deps.sh / RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH ENV LIBRARY_PATH /opt/amdgpu/lib64 COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/ WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate +ARG CGO_CFLAGS +ARG AMDGPU_TARGETS RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh -FROM --platform=linux/amd64 centos:7 AS cpu-build-amd64 +FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64 ARG CMAKE_VERSION ARG GOLANG_VERSION -ARG OLLAMA_CUSTOM_CPU_DEFS -ARG CGO_CFLAGS COPY ./scripts/rh_linux_deps.sh / RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/ +ARG OLLAMA_CUSTOM_CPU_DEFS +ARG CGO_CFLAGS WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate -RUN sh gen_linux.sh + +FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64 +RUN OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh +FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64 +RUN OLLAMA_CPU_TARGET="cpu_avx" sh gen_linux.sh +FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64 +RUN OLLAMA_CPU_TARGET="cpu_avx2" sh gen_linux.sh FROM --platform=linux/arm64 centos:7 AS cpu-build-arm64 ARG CMAKE_VERSION ARG GOLANG_VERSION -ARG OLLAMA_CUSTOM_CPU_DEFS -ARG CGO_CFLAGS COPY ./scripts/rh_linux_deps.sh / RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/ WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate -RUN sh gen_linux.sh +# Note, we only build the "base" CPU variant on arm since avx/avx2 are x86 features +ARG OLLAMA_CUSTOM_CPU_DEFS +ARG CGO_CFLAGS +RUN OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh # Intermediate stage used for ./scripts/build_linux.sh FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64 ENV CGO_ENABLED 1 -ARG GOFLAGS -ARG CGO_CFLAGS WORKDIR /go/src/github.com/jmorganca/ollama COPY . . +COPY --from=cpu_avx-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/ +COPY --from=cpu_avx2-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/ COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/ COPY --from=rocm-5-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/ COPY --from=rocm-6-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/ +ARG GOFLAGS +ARG CGO_CFLAGS RUN go build . # Intermediate stage used for ./scripts/build_linux.sh FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64 ENV CGO_ENABLED 1 ARG GOLANG_VERSION -ARG GOFLAGS -ARG CGO_CFLAGS WORKDIR /go/src/github.com/jmorganca/ollama COPY . . COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/ +ARG GOFLAGS +ARG CGO_CFLAGS RUN go build . # Runtime stages diff --git a/docs/development.md b/docs/development.md index 5369f5a8..ac45a3e0 100644 --- a/docs/development.md +++ b/docs/development.md @@ -74,7 +74,8 @@ Typically the build scripts will auto-detect ROCm, however, if your Linux distro or installation approach uses unusual paths, you can specify the location by specifying an environment variable `ROCM_PATH` to the location of the ROCm install (typically `/opt/rocm`), and `CLBlast_DIR` to the location of the -CLBlast install (typically `/usr/lib/cmake/CLBlast`). +CLBlast install (typically `/usr/lib/cmake/CLBlast`). You can also customize +the AMD GPU targets by setting AMDGPU_TARGETS (e.g. `AMDGPU_TARGETS="gfx1101;gfx1102"`) ``` go generate ./... diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh index 0fcf1356..fee4c85f 100755 --- a/llm/generate/gen_linux.sh +++ b/llm/generate/gen_linux.sh @@ -16,6 +16,10 @@ set -o pipefail # See https://llvm.org/docs/AMDGPUUsage.html#processors for reference amdGPUs() { + if [ -n "${AMDGPU_TARGETS}" ]; then + echo "${AMDGPU_TARGETS}" + return + fi GPU_LIST=( "gfx803" "gfx900" @@ -73,36 +77,42 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then # -DLLAMA_AVX512_VNNI -- 2021 Intel Alder Lake COMMON_CPU_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off" - # - # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta) - # - CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}" - BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu" - echo "Building LCD CPU" - build - compress_libs + if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then + # + # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta) + # + CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}" + BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu" + echo "Building LCD CPU" + build + compress_libs + fi - # - # ~2011 CPU Dynamic library with more capabilities turned on to optimize performance - # Approximately 400% faster than LCD on same CPU - # - init_vars - CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}" - BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx" - echo "Building AVX CPU" - build - compress_libs + if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx" ]; then + # + # ~2011 CPU Dynamic library with more capabilities turned on to optimize performance + # Approximately 400% faster than LCD on same CPU + # + init_vars + CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}" + BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx" + echo "Building AVX CPU" + build + compress_libs + fi - # - # ~2013 CPU Dynamic library - # Approximately 10% faster than AVX on same CPU - # - init_vars - CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}" - BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx2" - echo "Building AVX2 CPU" - build - compress_libs + if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx2" ]; then + # + # ~2013 CPU Dynamic library + # Approximately 10% faster than AVX on same CPU + # + init_vars + CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}" + BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx2" + echo "Building AVX2 CPU" + build + compress_libs + fi fi else echo "Skipping CPU generation step as requested" diff --git a/scripts/build_linux.sh b/scripts/build_linux.sh index 6a249239..75018837 100755 --- a/scripts/build_linux.sh +++ b/scripts/build_linux.sh @@ -6,6 +6,7 @@ export VERSION=${VERSION:-0.0.0} export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'" BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"} +export AMDGPU_TARGETS=${AMDGPU_TARGETS:=""} mkdir -p dist for TARGETARCH in ${BUILD_ARCH}; do @@ -14,6 +15,7 @@ for TARGETARCH in ${BUILD_ARCH}; do --build-arg=GOFLAGS \ --build-arg=CGO_CFLAGS \ --build-arg=OLLAMA_CUSTOM_CPU_DEFS \ + --build-arg=AMDGPU_TARGETS \ --target build-$TARGETARCH \ -f Dockerfile \ -t builder:$TARGETARCH \