diff --git a/llama/Dockerfile b/llama/Dockerfile index c636ee5d..0e2386d4 100644 --- a/llama/Dockerfile +++ b/llama/Dockerfile @@ -9,7 +9,7 @@ ARG ROCM_VERSION=6.1.2 ### To create a local image for building linux binaries on mac or windows with efficient incremental builds # -# docker build --platform linux/amd64 -t builder-amd64 -f Dockerfile.new --target unified-builder-amd64 . +# docker build --platform linux/amd64 -t builder-amd64 -f llama/Dockerfile --target unified-builder-amd64 . # docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64 # ### Then incremental builds will be much faster in this container @@ -41,7 +41,7 @@ ENTRYPOINT [ "zsh" ] ### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds # Note: this does not contain jetson variants # -# docker build --platform linux/arm64 -t builder-arm64 -f Dockerfile.new --target unified-builder-arm64 . +# docker build --platform linux/arm64 -t builder-arm64 -f llama/Dockerfile --target unified-builder-arm64 . # docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64 # FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64 diff --git a/llama/make/Makefile.rocm b/llama/make/Makefile.rocm index 865714b8..ee3fe2e4 100644 --- a/llama/make/Makefile.rocm +++ b/llama/make/Makefile.rocm @@ -21,7 +21,8 @@ else ifeq ($(OS),linux) GPU_LIB_DIR_LINUX := $(HIP_PATH)/lib GPU_COMPILER_LINUX := $(shell X=$$(which hipcc 2>/dev/null) && echo $$X) GPU_COMPILER:=$(GPU_COMPILER_LINUX) - ROCM_TRANSITIVE_LIBS = $(shell ldd $(ROCM_LIBS) | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf | sort -u ) + ROCM_TRANSITIVE_LIBS_INITIAL = $(sort $(shell ldd $(ROCM_LIBS) | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf)) + ROCM_TRANSITIVE_LIBS = $(sort $(shell readlink -f $(ROCM_TRANSITIVE_LIBS_INITIAL)) $(ROCM_TRANSITIVE_LIBS_INITIAL)) endif # TODO future multi-variant support for ROCm @@ -36,14 +37,18 @@ GPU_RUNNER_DRIVER_LIB_LINK := -lamdhip64 GPU_RUNNER_LIBS_SHORT := hipblas rocblas GPU_PATH_ROOT_WIN=$(dir $(GPU_LIB_DIR_WIN)) GPU_PATH_ROOT_LINUX=$(dir $(GPU_LIB_DIR_LINUX)) -GPU_COMPILER_CFLAGS_WIN = $(CFLAGS) +GPU_COMPILER_CFLAGS_WIN = $(CFLAGS) -D_WIN32_WINNT=0x602 GPU_COMPILER_CFLAGS_LINUX = $(CFLAGS) -fPIC -D_GNU_SOURCE -GPU_COMPILER_CXXFLAGS_WIN = $(CXXFLAGS) +GPU_COMPILER_CXXFLAGS_WIN = $(CXXFLAGS) -D_WIN32_WINNT=0x602 GPU_COMPILER_CXXFLAGS_LINUX = $(CXXFLAGS) -fPIC -D_GNU_SOURCE ROCM_LIBS = $(wildcard $(addsuffix .$(SHARED_EXT),$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT)))) -ROCM_DIST_DEPS_DIR = $(abspath $(SRC_DIR)/../dist/$(OS)-$(ARCH)-rocm)/lib/ollama -ROCM_DIST_DEPS_LIBS = $(addprefix $(ROCM_DIST_DEPS_DIR)/,$(notdir $(ROCM_LIBS)) $(notdir $(ROCM_TRANSITIVE_LIBS))) +ifeq ($(OS),windows) + ROCM_DIST_DEPS_DIR = $(abspath $(SRC_DIR)/../dist/$(OS)-$(ARCH))/lib/ollama +else ifeq ($(OS),linux) + ROCM_DIST_DEPS_DIR = $(abspath $(SRC_DIR)/../dist/$(OS)-$(ARCH)-rocm)/lib/ollama +endif +ROCM_DIST_DEPS_LIBS= $(sort $(addprefix $(ROCM_DIST_DEPS_DIR)/,$(notdir $(ROCM_LIBS)) $(notdir $(ROCM_TRANSITIVE_LIBS)))) ROCBLAS_DIST_DEP_MANIFEST = $(ROCM_DIST_DEPS_DIR)/rocblas/library/TensileManifest.txt ifeq ($(OS),linux) @@ -84,8 +89,7 @@ GPU_COMPILER_CUFLAGS = \ -Wno-pass-failed \ -Wno-deprecated-declarations \ -Wno-unused-result \ - -I. \ - $(foreach arch, $(HIP_ARCHS_COMMON), --offload-arch=$(arch)) + -I. include make/gpu.make @@ -98,4 +102,4 @@ $(ROCBLAS_DIST_DEP_MANIFEST): @echo "rocblas library copy complete" $(ROCM_DIST_DEPS_LIBS): @-mkdir -p $(dir $@) - $(CP) $(dir $(filter %$(notdir $@),$(ROCM_LIBS) $(ROCM_TRANSITIVE_LIBS)))/$(notdir $@)* $(dir $@) + $(CP) $(dir $(filter %$(notdir $@),$(ROCM_LIBS) $(ROCM_TRANSITIVE_LIBS)))/$(notdir $@) $(dir $@) diff --git a/llama/make/gpu.make b/llama/make/gpu.make index 7143bed6..2d63af56 100644 --- a/llama/make/gpu.make +++ b/llama/make/gpu.make @@ -79,7 +79,7 @@ $(GPU_RUNNER_NAME): $(BUILD_RUNNERS) $(DIST_RUNNERS) $(PAYLOAD_RUNNERS) # Build targets $(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.cu @-mkdir -p $(dir $@) - $(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CUFLAGS) $(GPU_RUNNER_ARCH_FLAGS) -o $@ $< + $(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CFLAGS) $(GPU_COMPILER_CUFLAGS) $(GPU_RUNNER_ARCH_FLAGS) -o $@ $< $(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.c @-mkdir -p $(dir $@) $(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CFLAGS) -o $@ $< @@ -97,14 +97,14 @@ $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME). # Distribution targets $(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/% @-mkdir -p $(dir $@) - cp $< $@ + $(CP) $< $@ $(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) $(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) @-mkdir -p $(dir $@) - cp $< $@ + $(CP) $< $@ $(DIST_GPU_RUNNER_LIB_DEPS): @-mkdir -p $(dir $@) - $(CP) $(GPU_LIB_DIR)/$(notdir $@)* $(dir $@) + $(CP) $(GPU_LIB_DIR)/$(notdir $@) $(dir $@) # Payload targets $(RUNNERS_PAYLOAD_DIR)/%/ollama_llama_server.gz: $(RUNNERS_BUILD_DIR)/%/ollama_llama_server