Fix rocm windows build and clean up dependency gathering (#7305)
On windows ensure windows version define is properly set for rocm. Remove duplicate rocm arch flags. Resolve wildcards in the targets so parallel builds don't race. Use readlink to resolve rocm dependencies since wildcards omit libelf Keep windows rocm deps aligned with unified packaging model
This commit is contained in:
parent
03e40efa51
commit
5c44461ccf
3 changed files with 18 additions and 14 deletions
|
@ -9,7 +9,7 @@ ARG ROCM_VERSION=6.1.2
|
|||
|
||||
### To create a local image for building linux binaries on mac or windows with efficient incremental builds
|
||||
#
|
||||
# docker build --platform linux/amd64 -t builder-amd64 -f Dockerfile.new --target unified-builder-amd64 .
|
||||
# docker build --platform linux/amd64 -t builder-amd64 -f llama/Dockerfile --target unified-builder-amd64 .
|
||||
# docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64
|
||||
#
|
||||
### Then incremental builds will be much faster in this container
|
||||
|
@ -41,7 +41,7 @@ ENTRYPOINT [ "zsh" ]
|
|||
### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds
|
||||
# Note: this does not contain jetson variants
|
||||
#
|
||||
# docker build --platform linux/arm64 -t builder-arm64 -f Dockerfile.new --target unified-builder-arm64 .
|
||||
# docker build --platform linux/arm64 -t builder-arm64 -f llama/Dockerfile --target unified-builder-arm64 .
|
||||
# docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64
|
||||
#
|
||||
FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64
|
||||
|
|
|
@ -21,7 +21,8 @@ else ifeq ($(OS),linux)
|
|||
GPU_LIB_DIR_LINUX := $(HIP_PATH)/lib
|
||||
GPU_COMPILER_LINUX := $(shell X=$$(which hipcc 2>/dev/null) && echo $$X)
|
||||
GPU_COMPILER:=$(GPU_COMPILER_LINUX)
|
||||
ROCM_TRANSITIVE_LIBS = $(shell ldd $(ROCM_LIBS) | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf | sort -u )
|
||||
ROCM_TRANSITIVE_LIBS_INITIAL = $(sort $(shell ldd $(ROCM_LIBS) | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf))
|
||||
ROCM_TRANSITIVE_LIBS = $(sort $(shell readlink -f $(ROCM_TRANSITIVE_LIBS_INITIAL)) $(ROCM_TRANSITIVE_LIBS_INITIAL))
|
||||
endif
|
||||
|
||||
# TODO future multi-variant support for ROCm
|
||||
|
@ -36,14 +37,18 @@ GPU_RUNNER_DRIVER_LIB_LINK := -lamdhip64
|
|||
GPU_RUNNER_LIBS_SHORT := hipblas rocblas
|
||||
GPU_PATH_ROOT_WIN=$(dir $(GPU_LIB_DIR_WIN))
|
||||
GPU_PATH_ROOT_LINUX=$(dir $(GPU_LIB_DIR_LINUX))
|
||||
GPU_COMPILER_CFLAGS_WIN = $(CFLAGS)
|
||||
GPU_COMPILER_CFLAGS_WIN = $(CFLAGS) -D_WIN32_WINNT=0x602
|
||||
GPU_COMPILER_CFLAGS_LINUX = $(CFLAGS) -fPIC -D_GNU_SOURCE
|
||||
GPU_COMPILER_CXXFLAGS_WIN = $(CXXFLAGS)
|
||||
GPU_COMPILER_CXXFLAGS_WIN = $(CXXFLAGS) -D_WIN32_WINNT=0x602
|
||||
GPU_COMPILER_CXXFLAGS_LINUX = $(CXXFLAGS) -fPIC -D_GNU_SOURCE
|
||||
|
||||
ROCM_LIBS = $(wildcard $(addsuffix .$(SHARED_EXT),$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT))))
|
||||
ifeq ($(OS),windows)
|
||||
ROCM_DIST_DEPS_DIR = $(abspath $(SRC_DIR)/../dist/$(OS)-$(ARCH))/lib/ollama
|
||||
else ifeq ($(OS),linux)
|
||||
ROCM_DIST_DEPS_DIR = $(abspath $(SRC_DIR)/../dist/$(OS)-$(ARCH)-rocm)/lib/ollama
|
||||
ROCM_DIST_DEPS_LIBS = $(addprefix $(ROCM_DIST_DEPS_DIR)/,$(notdir $(ROCM_LIBS)) $(notdir $(ROCM_TRANSITIVE_LIBS)))
|
||||
endif
|
||||
ROCM_DIST_DEPS_LIBS= $(sort $(addprefix $(ROCM_DIST_DEPS_DIR)/,$(notdir $(ROCM_LIBS)) $(notdir $(ROCM_TRANSITIVE_LIBS))))
|
||||
ROCBLAS_DIST_DEP_MANIFEST = $(ROCM_DIST_DEPS_DIR)/rocblas/library/TensileManifest.txt
|
||||
|
||||
ifeq ($(OS),linux)
|
||||
|
@ -84,8 +89,7 @@ GPU_COMPILER_CUFLAGS = \
|
|||
-Wno-pass-failed \
|
||||
-Wno-deprecated-declarations \
|
||||
-Wno-unused-result \
|
||||
-I. \
|
||||
$(foreach arch, $(HIP_ARCHS_COMMON), --offload-arch=$(arch))
|
||||
-I.
|
||||
|
||||
include make/gpu.make
|
||||
|
||||
|
@ -98,4 +102,4 @@ $(ROCBLAS_DIST_DEP_MANIFEST):
|
|||
@echo "rocblas library copy complete"
|
||||
$(ROCM_DIST_DEPS_LIBS):
|
||||
@-mkdir -p $(dir $@)
|
||||
$(CP) $(dir $(filter %$(notdir $@),$(ROCM_LIBS) $(ROCM_TRANSITIVE_LIBS)))/$(notdir $@)* $(dir $@)
|
||||
$(CP) $(dir $(filter %$(notdir $@),$(ROCM_LIBS) $(ROCM_TRANSITIVE_LIBS)))/$(notdir $@) $(dir $@)
|
||||
|
|
|
@ -79,7 +79,7 @@ $(GPU_RUNNER_NAME): $(BUILD_RUNNERS) $(DIST_RUNNERS) $(PAYLOAD_RUNNERS)
|
|||
# Build targets
|
||||
$(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.cu
|
||||
@-mkdir -p $(dir $@)
|
||||
$(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CUFLAGS) $(GPU_RUNNER_ARCH_FLAGS) -o $@ $<
|
||||
$(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CFLAGS) $(GPU_COMPILER_CUFLAGS) $(GPU_RUNNER_ARCH_FLAGS) -o $@ $<
|
||||
$(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.c
|
||||
@-mkdir -p $(dir $@)
|
||||
$(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CFLAGS) -o $@ $<
|
||||
|
@ -97,14 +97,14 @@ $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).
|
|||
# Distribution targets
|
||||
$(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/%
|
||||
@-mkdir -p $(dir $@)
|
||||
cp $< $@
|
||||
$(CP) $< $@
|
||||
$(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT)
|
||||
$(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT)
|
||||
@-mkdir -p $(dir $@)
|
||||
cp $< $@
|
||||
$(CP) $< $@
|
||||
$(DIST_GPU_RUNNER_LIB_DEPS):
|
||||
@-mkdir -p $(dir $@)
|
||||
$(CP) $(GPU_LIB_DIR)/$(notdir $@)* $(dir $@)
|
||||
$(CP) $(GPU_LIB_DIR)/$(notdir $@) $(dir $@)
|
||||
|
||||
# Payload targets
|
||||
$(RUNNERS_PAYLOAD_DIR)/%/ollama_llama_server.gz: $(RUNNERS_BUILD_DIR)/%/ollama_llama_server
|
||||
|
|
Loading…
Reference in a new issue