7555ea44f8
This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped.
88 lines
3.5 KiB
Bash
Executable file
88 lines
3.5 KiB
Bash
Executable file
#!/bin/bash
|
|
# This script is intended to run inside the go generate
|
|
# working directory must be llm/llama.cpp
|
|
|
|
# First we build our default built-in library which will be linked into the CGO
|
|
# binary as a normal dependency. This default build is CPU based.
|
|
#
|
|
# Then we build a CUDA dynamic library (although statically linked with the CUDA
|
|
# library dependencies for maximum portability)
|
|
#
|
|
# Then if we detect ROCm, we build a dynamically loaded ROCm lib. ROCm is particularly
|
|
# important to be a dynamic lib even if it's the only GPU library detected because
|
|
# we can't redistribute the objectfiles but must rely on dynamic libraries at
|
|
# runtime, which could lead the server not to start if not present.
|
|
|
|
set -ex
|
|
set -o pipefail
|
|
|
|
echo "Starting linux generate script"
|
|
if [ -z "${CUDACXX}" -a -x /usr/local/cuda/bin/nvcc ]; then
|
|
export CUDACXX=/usr/local/cuda/bin/nvcc
|
|
fi
|
|
COMMON_CMAKE_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_ACCELERATE=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off"
|
|
OLLAMA_DYN_LIB_DIR="gguf/build/lib"
|
|
mkdir -p ${OLLAMA_DYN_LIB_DIR}
|
|
touch ${OLLAMA_DYN_LIB_DIR}/.generated
|
|
source $(dirname $0)/gen_common.sh
|
|
init_vars
|
|
git_module_setup
|
|
apply_patches
|
|
|
|
#
|
|
# CPU first for the default library
|
|
#
|
|
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
|
|
BUILD_DIR="gguf/build/cpu"
|
|
build
|
|
|
|
if [ -d /usr/local/cuda/lib64/ ]; then
|
|
echo "CUDA libraries detected - building dynamic CUDA library"
|
|
init_vars
|
|
CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
|
|
BUILD_DIR="gguf/build/cuda"
|
|
CUDA_LIB_DIR=/usr/local/cuda/lib64
|
|
build
|
|
gcc -fPIC -g -shared -o ${OLLAMA_DYN_LIB_DIR}/libcuda_server.so \
|
|
-Wl,--whole-archive \
|
|
${BUILD_DIR}/examples/server/libext_server.a \
|
|
${BUILD_DIR}/common/libcommon.a \
|
|
${BUILD_DIR}/libllama.a \
|
|
-Wl,--no-whole-archive \
|
|
${CUDA_LIB_DIR}/libcudart_static.a \
|
|
${CUDA_LIB_DIR}/libcublas_static.a \
|
|
${CUDA_LIB_DIR}/libcublasLt_static.a \
|
|
${CUDA_LIB_DIR}/libcudadevrt.a \
|
|
${CUDA_LIB_DIR}/libculibos.a \
|
|
-lrt -lpthread -ldl -lstdc++ -lm
|
|
fi
|
|
|
|
if [ -z "${ROCM_PATH}" ]; then
|
|
# Try the default location in case it exists
|
|
ROCM_PATH=/opt/rocm
|
|
fi
|
|
|
|
if [ -z "${CLBlast_DIR}" ]; then
|
|
# Try the default location in case it exists
|
|
if [ -d /usr/lib/cmake/CLBlast ]; then
|
|
export CLBlast_DIR=/usr/lib/cmake/CLBlast
|
|
fi
|
|
fi
|
|
|
|
if [ -d "${ROCM_PATH}" ]; then
|
|
echo "ROCm libraries detected - building dynamic ROCm library"
|
|
init_vars
|
|
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS='gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102' -DGPU_TARGETS='gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102'"
|
|
BUILD_DIR="gguf/build/rocm"
|
|
build
|
|
gcc -fPIC -g -shared -o ${OLLAMA_DYN_LIB_DIR}/librocm_server.so \
|
|
-Wl,--whole-archive \
|
|
${BUILD_DIR}/examples/server/libext_server.a \
|
|
${BUILD_DIR}/common/libcommon.a \
|
|
${BUILD_DIR}/libllama.a \
|
|
-Wl,--no-whole-archive \
|
|
-lrt -lpthread -ldl -lstdc++ -lm \
|
|
-L/opt/rocm/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ \
|
|
-Wl,-rpath,/opt/rocm/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ \
|
|
-lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu
|
|
fi
|