From df011054fab42766d36cf319421badc4e0e4048a Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 12 Nov 2024 10:31:52 -0800 Subject: [PATCH] Jetpack support for Go server (#7217) This adds support for the Jetson JetPack variants into the Go runner --- Dockerfile | 72 ++++++++++++++++++++++++++++++++++++----- discover/amd_linux.go | 2 +- discover/amd_windows.go | 2 +- discover/gpu.go | 8 ++--- discover/types.go | 2 +- llama/llama.go | 6 ++-- llama/make/cuda.make | 2 +- llm/server.go | 4 +-- 8 files changed, 78 insertions(+), 20 deletions(-) diff --git a/Dockerfile b/Dockerfile index 16d1e4be..ca09325c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,6 +5,8 @@ ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86" ARG CUDA_VERSION_12=12.4.0 ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a" ARG ROCM_VERSION=6.1.2 +ARG JETPACK_6=r36.2.0 +ARG JETPACK_5=r35.4.1 ### To create a local image for building linux binaries on mac or windows with efficient incremental builds # @@ -13,7 +15,7 @@ ARG ROCM_VERSION=6.1.2 # ### Then incremental builds will be much faster in this container # -# make -C llama -j 10 && go build -trimpath -o dist/linux-amd64/ollama . +# make -j 10 && go build -trimpath -o dist/linux-amd64/ollama . # FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS unified-builder-amd64 ARG CMAKE_VERSION @@ -76,9 +78,9 @@ ARG CUDA_V12_ARCHITECTURES ARG OLLAMA_FAST_BUILD RUN --mount=type=cache,target=/root/.ccache \ if grep "^flags" /proc/cpuinfo|grep avx>/dev/null; then \ - make -C llama -j $(expr $(nproc) / 2 ) ; \ + make -j $(expr $(nproc) / 2 ) ; \ else \ - make -C llama -j 5 ; \ + make -j 5 ; \ fi FROM --platform=linux/arm64 unified-builder-arm64 AS runners-arm64 @@ -90,7 +92,46 @@ ARG CUDA_V11_ARCHITECTURES ARG CUDA_V12_ARCHITECTURES ARG OLLAMA_FAST_BUILD RUN --mount=type=cache,target=/root/.ccache \ - make -C llama -j 8 + make -j 5 + +# Jetsons need to be built in discrete stages +FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5} AS runners-jetpack5-arm64 +ARG GOLANG_VERSION +RUN apt-get update && apt-get install -y git curl ccache && \ + curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \ + ln -s /usr/local/go/bin/go /usr/local/bin/go && \ + ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \ + apt-get clean && rm -rf /var/lib/apt/lists/* +WORKDIR /go/src/github.com/ollama/ollama/ +COPY . . +ARG CGO_CFLAGS +ENV GOARCH arm64 +RUN --mount=type=cache,target=/root/.ccache \ + make -j 5 cuda_v11 \ + CUDA_ARCHITECTURES="72;87" \ + GPU_RUNNER_VARIANT=_jetpack5 \ + CGO_EXTRA_LDFLAGS_LINUX=-L/usr/local/cuda/lib64/stubs \ + DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama \ + DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama/cuda_jetpack5 + +FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS runners-jetpack6-arm64 +ARG GOLANG_VERSION +RUN apt-get update && apt-get install -y git curl ccache && \ + curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \ + ln -s /usr/local/go/bin/go /usr/local/bin/go && \ + ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \ + apt-get clean && rm -rf /var/lib/apt/lists/* +WORKDIR /go/src/github.com/ollama/ollama/ +COPY . . +ARG CGO_CFLAGS +ENV GOARCH arm64 +RUN --mount=type=cache,target=/root/.ccache \ + make -j 5 cuda_v12 \ + CUDA_ARCHITECTURES="87" \ + GPU_RUNNER_VARIANT=_jetpack6 \ + CGO_EXTRA_LDFLAGS_LINUX=-L/usr/local/cuda/lib64/stubs \ + DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama \ + DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama/cuda_jetpack6 # Intermediate stages used for ./scripts/build_linux.sh @@ -134,12 +175,20 @@ FROM --platform=linux/arm64 builder-arm64 AS build-arm64 COPY . . COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/build/ build/ +COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ +COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/build/ build/ +COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ +COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/build/ build/ ARG GOFLAGS ARG CGO_CFLAGS RUN --mount=type=cache,target=/root/.ccache \ go build -trimpath -o dist/linux-arm64/bin/ollama . RUN cd dist/linux-$GOARCH && \ tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz +RUN cd dist/linux-$GOARCH-jetpack5 && \ + tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack5.tgz +RUN cd dist/linux-$GOARCH-jetpack6 && \ + tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack6.tgz FROM --platform=linux/amd64 scratch AS dist-amd64 COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz / @@ -180,16 +229,23 @@ RUN rm -rf \ FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64 RUN apt-get update && \ apt-get install -y ca-certificates && \ - rm -rf /var/lib/apt/lists/* + apt-get clean && rm -rf /var/lib/apt/lists/* COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/ COPY --from=runners-cuda-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64 +COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ /lib/ +COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ /lib/ RUN apt-get update && \ apt-get install -y ca-certificates && \ - rm -rf /var/lib/apt/lists/* + apt-get clean && rm -rf /var/lib/apt/lists/* COPY --from=container-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/ -COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/ +COPY --from=cpu-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/ +COPY --from=cuda-11-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/ +COPY --from=cuda-12-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/ +COPY --from=cuda-build-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/ +COPY --from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/ + # ROCm libraries larger so we keep it distinct from the CPU/CUDA image FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm @@ -198,7 +254,7 @@ FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/ RUN apt-get update && \ apt-get install -y ca-certificates && \ - rm -rf /var/lib/apt/lists/* + apt-get clean && rm -rf /var/lib/apt/lists/* COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/ COPY --from=runners-rocm-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ diff --git a/discover/amd_linux.go b/discover/amd_linux.go index fad7b7a6..d092f6b5 100644 --- a/discover/amd_linux.go +++ b/discover/amd_linux.go @@ -350,7 +350,7 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) { return nil, err } } - gpuInfo.DependencyPath = libDir + gpuInfo.DependencyPath = []string{libDir} if gfxOverride == "" { // Only load supported list once diff --git a/discover/amd_windows.go b/discover/amd_windows.go index b0c76f1e..efa5cc23 100644 --- a/discover/amd_windows.go +++ b/discover/amd_windows.go @@ -111,7 +111,7 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) { UnreliableFreeMemory: true, ID: strconv.Itoa(i), // TODO this is probably wrong if we specify visible devices - DependencyPath: libDir, + DependencyPath: []string{libDir}, MinimumMemory: rocmMinimumMemory, Name: name, Compute: gfx, diff --git a/discover/gpu.go b/discover/gpu.go index 808c807b..cf34b904 100644 --- a/discover/gpu.go +++ b/discover/gpu.go @@ -240,7 +240,7 @@ func GetGPUInfo() GpuInfoList { Library: "cpu", Variant: cpuCapability.String(), ID: "0", - DependencyPath: depPath, + DependencyPath: []string{depPath}, }, CPUs: details, }, @@ -293,11 +293,11 @@ func GetGPUInfo() GpuInfoList { gpuInfo.DriverMinor = driverMinor variant := cudaVariant(gpuInfo) if depPath != "" { - gpuInfo.DependencyPath = depPath + gpuInfo.DependencyPath = []string{depPath} // Check for variant specific directory if variant != "" { if _, err := os.Stat(filepath.Join(depPath, "cuda_"+variant)); err == nil { - gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+variant) + gpuInfo.DependencyPath = []string{filepath.Join(depPath, "cuda_"+variant), depPath} } } } @@ -370,7 +370,7 @@ func GetGPUInfo() GpuInfoList { gpuInfo.FreeMemory = uint64(memInfo.free) gpuInfo.ID = C.GoString(&memInfo.gpu_id[0]) gpuInfo.Name = C.GoString(&memInfo.gpu_name[0]) - gpuInfo.DependencyPath = depPath + gpuInfo.DependencyPath = []string{depPath} oneapiGPUs = append(oneapiGPUs, gpuInfo) } } diff --git a/discover/types.go b/discover/types.go index b2cca109..19f21524 100644 --- a/discover/types.go +++ b/discover/types.go @@ -25,7 +25,7 @@ type GpuInfo struct { // TODO better name maybe "InferenceProcessor"? MinimumMemory uint64 `json:"-"` // Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly - DependencyPath string `json:"lib_path,omitempty"` + DependencyPath []string `json:"lib_path,omitempty"` // Extra environment variables specific to the GPU as list of [key,value] EnvWorkarounds [][2]string `json:"envs,omitempty"` diff --git a/llama/llama.go b/llama/llama.go index a092ea12..dbb02768 100644 --- a/llama/llama.go +++ b/llama/llama.go @@ -21,6 +21,8 @@ package llama #cgo cuda CFLAGS: -fPIE -DGGML_USE_CUDA -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1 #cgo cuda CXXFLAGS: -DGGML_USE_CUDA -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1 #cgo cuda CXXFLAGS: -DGGML_USE_CUDA -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1 +#cgo cuda_jetpack5 LDFLAGS: -lggml_cuda_jetpack5 -L/usr/local/cuda-11/lib64 +#cgo cuda_jetpack6 LDFLAGS: -lggml_cuda_jetpack6 -L/usr/local/cuda-12/lib64 #cgo cuda_v11 LDFLAGS: -lggml_cuda_v11 -L/usr/local/cuda-11/lib64 #cgo cuda_v12 LDFLAGS: -lggml_cuda_v12 -L/usr/local/cuda-12/lib64 #cgo darwin,amd64 CFLAGS: -Wno-incompatible-pointer-types-discards-qualifiers @@ -36,8 +38,8 @@ package llama #cgo linux CXXFLAGS: -D_GNU_SOURCE #cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/Linux/amd64 #cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/Linux/amd64 -#cgo linux,arm64 CFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA -D__ARM_FEATURE_MATMUL_INT8 -#cgo linux,arm64 CXXFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA -D__ARM_FEATURE_MATMUL_INT8 +#cgo linux,arm64 CFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA +#cgo linux,arm64 CXXFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA #cgo linux,arm64 LDFLAGS: -L${SRCDIR}/build/Linux/arm64 #cgo linux,arm64,sve CFLAGS: -march=armv8.6-a+sve #cgo linux,arm64,sve CXXFLAGS: -march=armv8.6-a+sve diff --git a/llama/make/cuda.make b/llama/make/cuda.make index 7ff1815f..7a4b1036 100644 --- a/llama/make/cuda.make +++ b/llama/make/cuda.make @@ -20,7 +20,7 @@ GPU_COMPILER_CFLAGS_LINUX = $(CFLAGS) -Xcompiler -fPIC -D_GNU_SOURCE GPU_COMPILER_CXXFLAGS_WIN = $(CXXFLAGS) -D_WIN32_WINNT=0x602 GPU_COMPILER_CXXFLAGS_LINUX = $(CXXFLAGS) -Xcompiler -fPIC -D_GNU_SOURCE GPU_LIBS = $(sort $(wildcard $(addsuffix *.$(SHARED_EXT)*,$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT))))) -GPU_DIST_DEPS_LIBS= $(sort $(addprefix $(DIST_LIB_DIR)/,$(notdir $(GPU_LIBS)))) +GPU_DIST_DEPS_LIBS= $(sort $(addprefix $(DIST_GPU_RUNNER_DEPS_DIR)/,$(notdir $(GPU_LIBS)))) ifeq ($(OS),linux) CUDA_PATH?=/usr/local/cuda diff --git a/llm/server.go b/llm/server.go index 5ca6aa32..96815826 100644 --- a/llm/server.go +++ b/llm/server.go @@ -306,9 +306,9 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter // Note: we always put the dependency path first // since this was the exact version we compiled/linked against - if gpus[0].DependencyPath != "" { + if gpus[0].DependencyPath != nil { // assume gpus from the same library have the same dependency path - libraryPaths = append([]string{gpus[0].DependencyPath}, libraryPaths...) + libraryPaths = append(gpus[0].DependencyPath, libraryPaths...) } server := filepath.Join(dir, "ollama_llama_server")