5c44461ccf
On windows ensure windows version define is properly set for rocm. Remove duplicate rocm arch flags. Resolve wildcards in the targets so parallel builds don't race. Use readlink to resolve rocm dependencies since wildcards omit libelf Keep windows rocm deps aligned with unified packaging model
105 lines
4.1 KiB
Text
105 lines
4.1 KiB
Text
# Build rules for ROCm runner
|
|
#
|
|
# Note: at present we only support a single ROCm version (whichever is default on the build system)
|
|
# unlike CUDA where we'll build both a v11 and v12 variant.
|
|
|
|
include make/common-defs.make
|
|
|
|
HIP_ARCHS_COMMON := gfx900 gfx940 gfx941 gfx942 gfx1010 gfx1012 gfx1030 gfx1100 gfx1101 gfx1102
|
|
HIP_ARCHS_LINUX := gfx906:xnack- gfx908:xnack- gfx90a:xnack+ gfx90a:xnack-
|
|
|
|
ifeq ($(OS),windows)
|
|
GPU_LIB_DIR_WIN := $(shell cygpath -m -s "$(HIP_PATH)\bin")
|
|
# If HIP_PATH has spaces, hipcc trips over them when subprocessing
|
|
HIP_PATH := $(shell cygpath -m -s "$(HIP_PATH)\")
|
|
CGO_EXTRA_LDFLAGS_WIN := -L$(shell cygpath -m -s "$(HIP_PATH)\lib")
|
|
export HIP_PATH
|
|
GPU_COMPILER_WIN := $(HIP_PATH)bin/hipcc.bin.exe
|
|
GPU_COMPILER:=$(GPU_COMPILER_WIN)
|
|
else ifeq ($(OS),linux)
|
|
HIP_PATH?=/opt/rocm
|
|
GPU_LIB_DIR_LINUX := $(HIP_PATH)/lib
|
|
GPU_COMPILER_LINUX := $(shell X=$$(which hipcc 2>/dev/null) && echo $$X)
|
|
GPU_COMPILER:=$(GPU_COMPILER_LINUX)
|
|
ROCM_TRANSITIVE_LIBS_INITIAL = $(sort $(shell ldd $(ROCM_LIBS) | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf))
|
|
ROCM_TRANSITIVE_LIBS = $(sort $(shell readlink -f $(ROCM_TRANSITIVE_LIBS_INITIAL)) $(ROCM_TRANSITIVE_LIBS_INITIAL))
|
|
endif
|
|
|
|
# TODO future multi-variant support for ROCm
|
|
# ROCM_VERSION = $(subst $(space),.,$(wordlist 1,2,$(subst .,$(space),$(word 3,$(subst -,$(space),$(filter HIP version: %,$(shell $(GPU_COMPILER) --version)))))))
|
|
# ifneq (,$(ROCM_VERSION))
|
|
# GPU_RUNNER_VARIANT = _v$(ROCM_VERSION)
|
|
# endif
|
|
|
|
GPU_RUNNER_GO_TAGS := rocm
|
|
GPU_RUNNER_NAME := rocm$(GPU_RUNNER_VARIANT)
|
|
GPU_RUNNER_DRIVER_LIB_LINK := -lamdhip64
|
|
GPU_RUNNER_LIBS_SHORT := hipblas rocblas
|
|
GPU_PATH_ROOT_WIN=$(dir $(GPU_LIB_DIR_WIN))
|
|
GPU_PATH_ROOT_LINUX=$(dir $(GPU_LIB_DIR_LINUX))
|
|
GPU_COMPILER_CFLAGS_WIN = $(CFLAGS) -D_WIN32_WINNT=0x602
|
|
GPU_COMPILER_CFLAGS_LINUX = $(CFLAGS) -fPIC -D_GNU_SOURCE
|
|
GPU_COMPILER_CXXFLAGS_WIN = $(CXXFLAGS) -D_WIN32_WINNT=0x602
|
|
GPU_COMPILER_CXXFLAGS_LINUX = $(CXXFLAGS) -fPIC -D_GNU_SOURCE
|
|
|
|
ROCM_LIBS = $(wildcard $(addsuffix .$(SHARED_EXT),$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT))))
|
|
ifeq ($(OS),windows)
|
|
ROCM_DIST_DEPS_DIR = $(abspath $(SRC_DIR)/../dist/$(OS)-$(ARCH))/lib/ollama
|
|
else ifeq ($(OS),linux)
|
|
ROCM_DIST_DEPS_DIR = $(abspath $(SRC_DIR)/../dist/$(OS)-$(ARCH)-rocm)/lib/ollama
|
|
endif
|
|
ROCM_DIST_DEPS_LIBS= $(sort $(addprefix $(ROCM_DIST_DEPS_DIR)/,$(notdir $(ROCM_LIBS)) $(notdir $(ROCM_TRANSITIVE_LIBS))))
|
|
ROCBLAS_DIST_DEP_MANIFEST = $(ROCM_DIST_DEPS_DIR)/rocblas/library/TensileManifest.txt
|
|
|
|
ifeq ($(OS),linux)
|
|
GPU_COMPILER_FPIC := -fPIC -Wno-unused-function -std=gnu++11
|
|
GPU_RUNNER_ARCH_FLAGS := $(foreach arch, $(HIP_ARCHS_COMMON) $(HIP_ARCHS_LINUX), --offload-arch=$(arch))
|
|
else ifeq ($(OS),windows)
|
|
GPU_COMPILER_FPIC := -Xclang --dependent-lib=msvcrt
|
|
GPU_RUNNER_ARCH_FLAGS := $(foreach arch, $(HIP_ARCHS_COMMON), --offload-arch=$(arch))
|
|
endif
|
|
|
|
GPU_COMPILER_CUFLAGS = \
|
|
$(GPU_COMPILER_FPIC) \
|
|
$(addprefix -m,$(GPU_RUNNER_CPU_FLAGS)) \
|
|
-parallel-jobs=2 \
|
|
-c \
|
|
-O3 \
|
|
-DGGML_USE_CUDA \
|
|
-DGGML_BUILD=1 \
|
|
-DGGML_SHARED=1 \
|
|
-DGGML_CUDA_DMMV_X=32 \
|
|
-DGGML_CUDA_MMV_Y=1 \
|
|
-DGGML_SCHED_MAX_COPIES=4 \
|
|
-DGGML_USE_HIPBLAS \
|
|
-DGGML_USE_LLAMAFILE \
|
|
-DHIP_FAST_MATH \
|
|
-D__HIP_PLATFORM_AMD__=1 \
|
|
-D__HIP_ROCclr__=1 \
|
|
-DNDEBUG \
|
|
-DK_QUANTS_PER_ITERATION=2 \
|
|
-D_CRT_SECURE_NO_WARNINGS \
|
|
-D_GNU_SOURCE \
|
|
-D_XOPEN_SOURCE=600 \
|
|
-mllvm=-amdgpu-early-inline-all=true \
|
|
-mllvm=-amdgpu-function-calls=false \
|
|
-Wno-expansion-to-defined \
|
|
-Wno-invalid-noreturn \
|
|
-Wno-ignored-attributes \
|
|
-Wno-pass-failed \
|
|
-Wno-deprecated-declarations \
|
|
-Wno-unused-result \
|
|
-I.
|
|
|
|
include make/gpu.make
|
|
|
|
# Adjust the rules from gpu.make to handle the ROCm dependencies properly
|
|
$(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(ROCBLAS_DIST_DEP_MANIFEST) $(ROCM_DIST_DEPS_LIBS)
|
|
$(ROCBLAS_DIST_DEP_MANIFEST):
|
|
@-mkdir -p $(dir $@)
|
|
@echo "Copying rocblas library..."
|
|
cd $(GPU_LIB_DIR)/rocblas/library/ && tar cf - . | (cd $(dir $@) && tar xf - )
|
|
@echo "rocblas library copy complete"
|
|
$(ROCM_DIST_DEPS_LIBS):
|
|
@-mkdir -p $(dir $@)
|
|
$(CP) $(dir $(filter %$(notdir $@),$(ROCM_LIBS) $(ROCM_TRANSITIVE_LIBS)))/$(notdir $@) $(dir $@)
|