Fix rocm windows build and clean up dependency gathering (#7305)

On windows ensure windows version define is properly set for rocm.
Remove duplicate rocm arch flags.
Resolve wildcards in the targets so parallel builds don't race.
Use readlink to resolve rocm dependencies since wildcards omit libelf
Keep windows rocm deps aligned with unified packaging model
This commit is contained in:
Daniel Hiltgen 2024-10-22 12:54:15 -07:00 committed by GitHub
parent 03e40efa51
commit 5c44461ccf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 18 additions and 14 deletions

View file

@ -9,7 +9,7 @@ ARG ROCM_VERSION=6.1.2
### To create a local image for building linux binaries on mac or windows with efficient incremental builds ### To create a local image for building linux binaries on mac or windows with efficient incremental builds
# #
# docker build --platform linux/amd64 -t builder-amd64 -f Dockerfile.new --target unified-builder-amd64 . # docker build --platform linux/amd64 -t builder-amd64 -f llama/Dockerfile --target unified-builder-amd64 .
# docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64 # docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64
# #
### Then incremental builds will be much faster in this container ### Then incremental builds will be much faster in this container
@ -41,7 +41,7 @@ ENTRYPOINT [ "zsh" ]
### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds ### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds
# Note: this does not contain jetson variants # Note: this does not contain jetson variants
# #
# docker build --platform linux/arm64 -t builder-arm64 -f Dockerfile.new --target unified-builder-arm64 . # docker build --platform linux/arm64 -t builder-arm64 -f llama/Dockerfile --target unified-builder-arm64 .
# docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64 # docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64
# #
FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64 FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64

View file

@ -21,7 +21,8 @@ else ifeq ($(OS),linux)
GPU_LIB_DIR_LINUX := $(HIP_PATH)/lib GPU_LIB_DIR_LINUX := $(HIP_PATH)/lib
GPU_COMPILER_LINUX := $(shell X=$$(which hipcc 2>/dev/null) && echo $$X) GPU_COMPILER_LINUX := $(shell X=$$(which hipcc 2>/dev/null) && echo $$X)
GPU_COMPILER:=$(GPU_COMPILER_LINUX) GPU_COMPILER:=$(GPU_COMPILER_LINUX)
ROCM_TRANSITIVE_LIBS = $(shell ldd $(ROCM_LIBS) | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf | sort -u ) ROCM_TRANSITIVE_LIBS_INITIAL = $(sort $(shell ldd $(ROCM_LIBS) | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf))
ROCM_TRANSITIVE_LIBS = $(sort $(shell readlink -f $(ROCM_TRANSITIVE_LIBS_INITIAL)) $(ROCM_TRANSITIVE_LIBS_INITIAL))
endif endif
# TODO future multi-variant support for ROCm # TODO future multi-variant support for ROCm
@ -36,14 +37,18 @@ GPU_RUNNER_DRIVER_LIB_LINK := -lamdhip64
GPU_RUNNER_LIBS_SHORT := hipblas rocblas GPU_RUNNER_LIBS_SHORT := hipblas rocblas
GPU_PATH_ROOT_WIN=$(dir $(GPU_LIB_DIR_WIN)) GPU_PATH_ROOT_WIN=$(dir $(GPU_LIB_DIR_WIN))
GPU_PATH_ROOT_LINUX=$(dir $(GPU_LIB_DIR_LINUX)) GPU_PATH_ROOT_LINUX=$(dir $(GPU_LIB_DIR_LINUX))
GPU_COMPILER_CFLAGS_WIN = $(CFLAGS) GPU_COMPILER_CFLAGS_WIN = $(CFLAGS) -D_WIN32_WINNT=0x602
GPU_COMPILER_CFLAGS_LINUX = $(CFLAGS) -fPIC -D_GNU_SOURCE GPU_COMPILER_CFLAGS_LINUX = $(CFLAGS) -fPIC -D_GNU_SOURCE
GPU_COMPILER_CXXFLAGS_WIN = $(CXXFLAGS) GPU_COMPILER_CXXFLAGS_WIN = $(CXXFLAGS) -D_WIN32_WINNT=0x602
GPU_COMPILER_CXXFLAGS_LINUX = $(CXXFLAGS) -fPIC -D_GNU_SOURCE GPU_COMPILER_CXXFLAGS_LINUX = $(CXXFLAGS) -fPIC -D_GNU_SOURCE
ROCM_LIBS = $(wildcard $(addsuffix .$(SHARED_EXT),$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT)))) ROCM_LIBS = $(wildcard $(addsuffix .$(SHARED_EXT),$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT))))
ROCM_DIST_DEPS_DIR = $(abspath $(SRC_DIR)/../dist/$(OS)-$(ARCH)-rocm)/lib/ollama ifeq ($(OS),windows)
ROCM_DIST_DEPS_LIBS = $(addprefix $(ROCM_DIST_DEPS_DIR)/,$(notdir $(ROCM_LIBS)) $(notdir $(ROCM_TRANSITIVE_LIBS))) ROCM_DIST_DEPS_DIR = $(abspath $(SRC_DIR)/../dist/$(OS)-$(ARCH))/lib/ollama
else ifeq ($(OS),linux)
ROCM_DIST_DEPS_DIR = $(abspath $(SRC_DIR)/../dist/$(OS)-$(ARCH)-rocm)/lib/ollama
endif
ROCM_DIST_DEPS_LIBS= $(sort $(addprefix $(ROCM_DIST_DEPS_DIR)/,$(notdir $(ROCM_LIBS)) $(notdir $(ROCM_TRANSITIVE_LIBS))))
ROCBLAS_DIST_DEP_MANIFEST = $(ROCM_DIST_DEPS_DIR)/rocblas/library/TensileManifest.txt ROCBLAS_DIST_DEP_MANIFEST = $(ROCM_DIST_DEPS_DIR)/rocblas/library/TensileManifest.txt
ifeq ($(OS),linux) ifeq ($(OS),linux)
@ -84,8 +89,7 @@ GPU_COMPILER_CUFLAGS = \
-Wno-pass-failed \ -Wno-pass-failed \
-Wno-deprecated-declarations \ -Wno-deprecated-declarations \
-Wno-unused-result \ -Wno-unused-result \
-I. \ -I.
$(foreach arch, $(HIP_ARCHS_COMMON), --offload-arch=$(arch))
include make/gpu.make include make/gpu.make
@ -98,4 +102,4 @@ $(ROCBLAS_DIST_DEP_MANIFEST):
@echo "rocblas library copy complete" @echo "rocblas library copy complete"
$(ROCM_DIST_DEPS_LIBS): $(ROCM_DIST_DEPS_LIBS):
@-mkdir -p $(dir $@) @-mkdir -p $(dir $@)
$(CP) $(dir $(filter %$(notdir $@),$(ROCM_LIBS) $(ROCM_TRANSITIVE_LIBS)))/$(notdir $@)* $(dir $@) $(CP) $(dir $(filter %$(notdir $@),$(ROCM_LIBS) $(ROCM_TRANSITIVE_LIBS)))/$(notdir $@) $(dir $@)

View file

@ -79,7 +79,7 @@ $(GPU_RUNNER_NAME): $(BUILD_RUNNERS) $(DIST_RUNNERS) $(PAYLOAD_RUNNERS)
# Build targets # Build targets
$(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.cu $(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.cu
@-mkdir -p $(dir $@) @-mkdir -p $(dir $@)
$(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CUFLAGS) $(GPU_RUNNER_ARCH_FLAGS) -o $@ $< $(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CFLAGS) $(GPU_COMPILER_CUFLAGS) $(GPU_RUNNER_ARCH_FLAGS) -o $@ $<
$(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.c $(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.c
@-mkdir -p $(dir $@) @-mkdir -p $(dir $@)
$(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CFLAGS) -o $@ $< $(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CFLAGS) -o $@ $<
@ -97,14 +97,14 @@ $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).
# Distribution targets # Distribution targets
$(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/% $(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/%
@-mkdir -p $(dir $@) @-mkdir -p $(dir $@)
cp $< $@ $(CP) $< $@
$(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) $(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT)
$(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) $(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT)
@-mkdir -p $(dir $@) @-mkdir -p $(dir $@)
cp $< $@ $(CP) $< $@
$(DIST_GPU_RUNNER_LIB_DEPS): $(DIST_GPU_RUNNER_LIB_DEPS):
@-mkdir -p $(dir $@) @-mkdir -p $(dir $@)
$(CP) $(GPU_LIB_DIR)/$(notdir $@)* $(dir $@) $(CP) $(GPU_LIB_DIR)/$(notdir $@) $(dir $@)
# Payload targets # Payload targets
$(RUNNERS_PAYLOAD_DIR)/%/ollama_llama_server.gz: $(RUNNERS_BUILD_DIR)/%/ollama_llama_server $(RUNNERS_PAYLOAD_DIR)/%/ollama_llama_server.gz: $(RUNNERS_BUILD_DIR)/%/ollama_llama_server