From a017cf2fea4aaa376087520382058c42cffce097 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 20 Aug 2024 07:26:38 -0700 Subject: [PATCH] Split rocm back out of bundle (#6432) We're over budget for github's maximum release artifact size with rocm + 2 cuda versions. This splits rocm back out as a discrete artifact, but keeps the layout so it can be extracted into the same location as the main bundle. --- .github/workflows/release.yaml | 1 + Dockerfile | 4 ++-- llm/generate/gen_linux.sh | 3 ++- scripts/build_linux.sh | 6 ++++++ scripts/install.sh | 5 +++++ 5 files changed, 16 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 2cf4d2c2..9c1e3e13 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -475,6 +475,7 @@ jobs: (cd dist; find . -type f | xargs sha256sum > ../sha256sum.txt) mv sha256sum.txt dist/ mv dist/linux-???64 . + mv dist/linux-amd64-rocm . cat dist/sha256sum.txt - name: Create or update Release run: | diff --git a/Dockerfile b/Dockerfile index d4b86918..c46477b4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -95,8 +95,8 @@ ARG AMDGPU_TARGETS ENV GOARCH amd64 RUN --mount=type=cache,target=/root/.ccache \ OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh -RUN mkdir -p ../../dist/linux-amd64/lib/ollama && \ - (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd ../../dist/linux-amd64/lib/ollama && tar xf - ) +RUN mkdir -p ../../dist/linux-amd64-rocm/lib/ollama && \ + (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd ../../dist/linux-amd64-rocm/lib/ollama && tar xf - ) FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64 ARG CMAKE_VERSION diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh index aef03f9a..6927dda8 100755 --- a/llm/generate/gen_linux.sh +++ b/llm/generate/gen_linux.sh @@ -260,7 +260,8 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then echo "Building custom ROCM GPU" fi BUILD_DIR="../build/linux/${ARCH}/rocm${ROCM_VARIANT}" - ROCM_DIST_DIR="${DIST_BASE}/lib/ollama" + # ROCm dependencies are too large to fit into a unified bundle + ROCM_DIST_DIR="${DIST_BASE}/../linux-${GOARCH}-rocm/lib/ollama" # TODO figure out how to disable runpath (rpath) # export CMAKE_HIP_FLAGS="-fno-rtlib-add-rpath" # doesn't work export LLAMA_SERVER_LDFLAGS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu" diff --git a/scripts/build_linux.sh b/scripts/build_linux.sh index adda2ad7..6cb0d0cd 100755 --- a/scripts/build_linux.sh +++ b/scripts/build_linux.sh @@ -24,8 +24,14 @@ for TARGETARCH in ${BUILD_ARCH}; do docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH rm -rf ./dist/linux-$TARGETARCH docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/linux-$TARGETARCH ./dist + if echo ${TARGETARCH} | grep "amd64" > /dev/null; then + docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/linux-$TARGETARCH-rocm ./dist + fi docker rm builder-$TARGETARCH echo "Compressing final linux bundle..." rm -f ./dist/ollama-linux-$TARGETARCH.tgz (cd dist/linux-$TARGETARCH && tar cf - . | ${GZIP} --best > ../ollama-linux-$TARGETARCH.tgz ) + if [ -d dist/linux-$TARGETARCH-rocm ]; then + (cd dist/linux-$TARGETARCH-rocm && tar cf - . | ${GZIP} --best > ../ollama-linux-$TARGETARCH-rocm.tgz ) + fi done diff --git a/scripts/install.sh b/scripts/install.sh index a02a0675..25f57565 100644 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -199,6 +199,11 @@ fi if check_gpu lspci amdgpu || check_gpu lshw amdgpu; then if [ $BUNDLE -ne 0 ]; then + status "Downloading Linux ROCm ${ARCH} bundle" + curl --fail --show-error --location --progress-bar \ + "https://ollama.com/download/ollama-linux-${ARCH}-rocm.tgz${VER_PARAM}" | \ + $SUDO tar -xzf - -C "$OLLAMA_INSTALL_DIR" + install_success status "AMD GPU ready." exit 0