Add Jetson cuda variants for arm
This adds new variants for arm64 specific to Jetson platforms
This commit is contained in:
parent
c7bcb00319
commit
d470ebe78b
7 changed files with 96 additions and 16 deletions
48
Dockerfile
48
Dockerfile
|
@ -3,6 +3,9 @@ ARG CMAKE_VERSION=3.22.1
|
|||
# this CUDA_VERSION corresponds with the one specified in docs/gpu.md
|
||||
ARG CUDA_VERSION=11.3.1
|
||||
ARG ROCM_VERSION=6.1.2
|
||||
ARG JETPACK_6=r36.2.0
|
||||
ARG JETPACK_5=r35.4.1
|
||||
ARG JETPACK_4=r32.7.1
|
||||
|
||||
# Copy the minimal context we need to run the generate scripts
|
||||
FROM scratch AS llm-code
|
||||
|
@ -22,7 +25,7 @@ ENV GOARCH amd64
|
|||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh
|
||||
|
||||
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
|
||||
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-server-arm64
|
||||
ARG CMAKE_VERSION
|
||||
COPY ./scripts/rh_linux_deps.sh /
|
||||
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||
|
@ -31,11 +34,40 @@ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
|||
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
||||
ARG CGO_CFLAGS
|
||||
ENV GOARCH arm64
|
||||
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh
|
||||
|
||||
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS cuda-build-jetpack6-arm64
|
||||
ARG CMAKE_VERSION
|
||||
RUN apt-get update && apt-get install -y git curl && \
|
||||
curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1
|
||||
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
||||
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
||||
ARG CGO_CFLAGS
|
||||
ENV GOARCH arm64
|
||||
ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
OLLAMA_SKIP_STATIC_GENERATE=1 \
|
||||
OLLAMA_SKIP_CPU_GENERATE=1 \
|
||||
CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \
|
||||
CUDA_VARIANT="_v11" \
|
||||
CUDA_VARIANT="_jetpack6" \
|
||||
CUDA_DIST_DIR="/go/src/github.com/ollama/ollama/dist/linux-arm64/ollama_libs/cuda_jetpack6" \
|
||||
CMAKE_CUDA_ARCHITECTURES="87" \
|
||||
bash gen_linux.sh
|
||||
|
||||
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5} AS cuda-build-jetpack5-arm64
|
||||
ARG CMAKE_VERSION
|
||||
RUN apt-get update && apt-get install -y git curl && \
|
||||
curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1
|
||||
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
||||
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
||||
ARG CGO_CFLAGS
|
||||
ENV GOARCH arm64
|
||||
ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
OLLAMA_SKIP_STATIC_GENERATE=1 \
|
||||
OLLAMA_SKIP_CPU_GENERATE=1 \
|
||||
CUDA_VARIANT="_jetpack5" \
|
||||
CUDA_DIST_DIR="/go/src/github.com/ollama/ollama/dist/linux-arm64/ollama_libs/cuda_jetpack5" \
|
||||
CMAKE_CUDA_ARCHITECTURES="72;87" \
|
||||
bash gen_linux.sh
|
||||
|
||||
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
|
||||
|
@ -123,8 +155,14 @@ ARG GOLANG_VERSION
|
|||
WORKDIR /go/src/github.com/ollama/ollama
|
||||
COPY . .
|
||||
COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||
COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
|
||||
COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||
COPY --from=cuda-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
|
||||
COPY --from=cuda-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||
## arm binary += 381M
|
||||
COPY --from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||
COPY --from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
|
||||
## arm binary += 330M
|
||||
COPY --from=cuda-build-jetpack5-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||
COPY --from=cuda-build-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
|
||||
ARG GOFLAGS
|
||||
ARG CGO_CFLAGS
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
|
|
44
gpu/gpu.go
44
gpu/gpu.go
|
@ -15,7 +15,9 @@ import (
|
|||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
@ -215,7 +217,7 @@ func GetGPUInfo() GpuInfoList {
|
|||
GpuInfo: GpuInfo{
|
||||
memInfo: mem,
|
||||
Library: "cpu",
|
||||
Variant: cpuCapability,
|
||||
Variant: cpuCapability.String(),
|
||||
ID: "0",
|
||||
},
|
||||
},
|
||||
|
@ -231,6 +233,35 @@ func GetGPUInfo() GpuInfoList {
|
|||
|
||||
depPath := GetDepDir()
|
||||
|
||||
var cudaVariant string
|
||||
if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
|
||||
if CudaTegra != "" {
|
||||
ver := strings.Split(CudaTegra, ".")
|
||||
if len(ver) > 0 {
|
||||
cudaVariant = "jetpack" + ver[0]
|
||||
}
|
||||
} else if data, err := os.ReadFile("/etc/nv_tegra_release"); err == nil {
|
||||
r := regexp.MustCompile(` R(\d+) `)
|
||||
m := r.FindSubmatch(data)
|
||||
if len(m) != 2 {
|
||||
slog.Info("Unexpected format for /etc/nv_tegra_release. Set JETSON_JETPACK to select version")
|
||||
} else {
|
||||
if l4t, err := strconv.Atoi(string(m[1])); err == nil {
|
||||
// Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
|
||||
// https://developer.nvidia.com/embedded/jetpack-archive
|
||||
switch l4t {
|
||||
case 35:
|
||||
cudaVariant = "jetpack5"
|
||||
case 36:
|
||||
cudaVariant = "jetpack6"
|
||||
default:
|
||||
slog.Info("unsupported L4T version", "nv_tegra_release", string(data))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Load ALL libraries
|
||||
cHandles = initCudaHandles()
|
||||
|
||||
|
@ -240,6 +271,7 @@ func GetGPUInfo() GpuInfoList {
|
|||
gpuInfo := CudaGPUInfo{
|
||||
GpuInfo: GpuInfo{
|
||||
Library: "cuda",
|
||||
Variant: cudaVariant,
|
||||
},
|
||||
index: i,
|
||||
}
|
||||
|
@ -266,7 +298,15 @@ func GetGPUInfo() GpuInfoList {
|
|||
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
||||
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
|
||||
gpuInfo.MinimumMemory = cudaMinimumMemory
|
||||
gpuInfo.DependencyPath = depPath
|
||||
if depPath != "" {
|
||||
gpuInfo.DependencyPath = depPath
|
||||
// Check for variant specific directory
|
||||
if cudaVariant != "" {
|
||||
if _, err := os.Stat(filepath.Join(depPath, "cuda_"+cudaVariant)); err == nil {
|
||||
gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+cudaVariant)
|
||||
}
|
||||
}
|
||||
}
|
||||
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
||||
gpuInfo.DriverMajor = driverMajor
|
||||
gpuInfo.DriverMinor = driverMinor
|
||||
|
|
|
@ -25,7 +25,7 @@ func GetGPUInfo() GpuInfoList {
|
|||
return []GpuInfo{
|
||||
{
|
||||
Library: "cpu",
|
||||
Variant: GetCPUCapability(),
|
||||
Variant: GetCPUCapability().String(),
|
||||
memInfo: mem,
|
||||
},
|
||||
}
|
||||
|
@ -48,7 +48,7 @@ func GetCPUInfo() GpuInfoList {
|
|||
return []GpuInfo{
|
||||
{
|
||||
Library: "cpu",
|
||||
Variant: GetCPUCapability(),
|
||||
Variant: GetCPUCapability().String(),
|
||||
memInfo: mem,
|
||||
},
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@ type GpuInfo struct {
|
|||
Library string `json:"library,omitempty"`
|
||||
|
||||
// Optional variant to select (e.g. versions, cpu feature flags)
|
||||
Variant CPUCapability `json:"variant"`
|
||||
Variant string `json:"variant"`
|
||||
|
||||
// MinimumMemory represents the minimum memory required to use the GPU
|
||||
MinimumMemory uint64 `json:"-"`
|
||||
|
@ -81,8 +81,8 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
|
|||
for _, info := range l {
|
||||
found := false
|
||||
requested := info.Library
|
||||
if info.Variant != CPUCapabilityNone {
|
||||
requested += "_" + info.Variant.String()
|
||||
if info.Variant != CPUCapabilityNone.String() {
|
||||
requested += "_" + info.Variant
|
||||
}
|
||||
for i, lib := range libs {
|
||||
if lib == requested {
|
||||
|
|
|
@ -165,7 +165,7 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
|
|||
echo "CUDA libraries detected - building dynamic CUDA library"
|
||||
init_vars
|
||||
CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true)
|
||||
if [ -n "${CUDA_MAJOR}" ]; then
|
||||
if [ -n "${CUDA_MAJOR}" -a -z "${CUDA_VARIANT}" ]; then
|
||||
CUDA_VARIANT=_v${CUDA_MAJOR}
|
||||
fi
|
||||
if [ "${ARCH}" == "arm64" ]; then
|
||||
|
@ -189,9 +189,10 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
|
|||
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS} -DGGML_STATIC=off"
|
||||
BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}"
|
||||
export LLAMA_SERVER_LDFLAGS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
|
||||
CUDA_DIST_DIR="${DIST_BASE}/ollama_libs"
|
||||
CUDA_DIST_DIR="${CUDA_DIST_DIR:-${DIST_BASE}/ollama_libs}"
|
||||
build
|
||||
install
|
||||
echo "Installing CUDA dependencies in ${CUDA_DIST_DIR}"
|
||||
mkdir -p "${CUDA_DIST_DIR}"
|
||||
for lib in ${CUDA_LIB_DIR}/libcudart.so* ${CUDA_LIB_DIR}/libcublas.so* ${CUDA_LIB_DIR}/libcublasLt.so* ; do
|
||||
cp -a "${lib}" "${CUDA_DIST_DIR}"
|
||||
|
|
|
@ -82,8 +82,8 @@ func serversForGpu(info gpu.GpuInfo) []string {
|
|||
// glob workDir for files that start with ollama_
|
||||
availableServers := getAvailableServers()
|
||||
requested := info.Library
|
||||
if info.Variant != gpu.CPUCapabilityNone {
|
||||
requested += "_" + info.Variant.String()
|
||||
if info.Variant != gpu.CPUCapabilityNone.String() {
|
||||
requested += "_" + info.Variant
|
||||
}
|
||||
|
||||
servers := []string{}
|
||||
|
|
|
@ -22,6 +22,7 @@ for TARGETARCH in ${BUILD_ARCH}; do
|
|||
-t builder:$TARGETARCH \
|
||||
.
|
||||
docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
|
||||
rm -rf ./dist/linux-$TARGETARCH
|
||||
docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/linux-$TARGETARCH ./dist
|
||||
docker rm builder-$TARGETARCH
|
||||
echo "Compressing final linux bundle..."
|
||||
|
|
Loading…
Reference in a new issue