Add Jetson cuda variants for arm
This adds new variants for arm64 specific to Jetson platforms
This commit is contained in:
parent
c7bcb00319
commit
d470ebe78b
7 changed files with 96 additions and 16 deletions
48
Dockerfile
48
Dockerfile
|
@ -3,6 +3,9 @@ ARG CMAKE_VERSION=3.22.1
|
||||||
# this CUDA_VERSION corresponds with the one specified in docs/gpu.md
|
# this CUDA_VERSION corresponds with the one specified in docs/gpu.md
|
||||||
ARG CUDA_VERSION=11.3.1
|
ARG CUDA_VERSION=11.3.1
|
||||||
ARG ROCM_VERSION=6.1.2
|
ARG ROCM_VERSION=6.1.2
|
||||||
|
ARG JETPACK_6=r36.2.0
|
||||||
|
ARG JETPACK_5=r35.4.1
|
||||||
|
ARG JETPACK_4=r32.7.1
|
||||||
|
|
||||||
# Copy the minimal context we need to run the generate scripts
|
# Copy the minimal context we need to run the generate scripts
|
||||||
FROM scratch AS llm-code
|
FROM scratch AS llm-code
|
||||||
|
@ -22,7 +25,7 @@ ENV GOARCH amd64
|
||||||
RUN --mount=type=cache,target=/root/.ccache \
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh
|
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh
|
||||||
|
|
||||||
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
|
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-server-arm64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||||
|
@ -31,11 +34,40 @@ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
||||||
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
||||||
ARG CGO_CFLAGS
|
ARG CGO_CFLAGS
|
||||||
ENV GOARCH arm64
|
ENV GOARCH arm64
|
||||||
|
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh
|
||||||
|
|
||||||
|
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS cuda-build-jetpack6-arm64
|
||||||
|
ARG CMAKE_VERSION
|
||||||
|
RUN apt-get update && apt-get install -y git curl && \
|
||||||
|
curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1
|
||||||
|
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
||||||
|
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
||||||
|
ARG CGO_CFLAGS
|
||||||
|
ENV GOARCH arm64
|
||||||
|
ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs
|
||||||
RUN --mount=type=cache,target=/root/.ccache \
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
OLLAMA_SKIP_STATIC_GENERATE=1 \
|
OLLAMA_SKIP_STATIC_GENERATE=1 \
|
||||||
OLLAMA_SKIP_CPU_GENERATE=1 \
|
OLLAMA_SKIP_CPU_GENERATE=1 \
|
||||||
CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \
|
CUDA_VARIANT="_jetpack6" \
|
||||||
CUDA_VARIANT="_v11" \
|
CUDA_DIST_DIR="/go/src/github.com/ollama/ollama/dist/linux-arm64/ollama_libs/cuda_jetpack6" \
|
||||||
|
CMAKE_CUDA_ARCHITECTURES="87" \
|
||||||
|
bash gen_linux.sh
|
||||||
|
|
||||||
|
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5} AS cuda-build-jetpack5-arm64
|
||||||
|
ARG CMAKE_VERSION
|
||||||
|
RUN apt-get update && apt-get install -y git curl && \
|
||||||
|
curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1
|
||||||
|
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
||||||
|
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
||||||
|
ARG CGO_CFLAGS
|
||||||
|
ENV GOARCH arm64
|
||||||
|
ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs
|
||||||
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
|
OLLAMA_SKIP_STATIC_GENERATE=1 \
|
||||||
|
OLLAMA_SKIP_CPU_GENERATE=1 \
|
||||||
|
CUDA_VARIANT="_jetpack5" \
|
||||||
|
CUDA_DIST_DIR="/go/src/github.com/ollama/ollama/dist/linux-arm64/ollama_libs/cuda_jetpack5" \
|
||||||
|
CMAKE_CUDA_ARCHITECTURES="72;87" \
|
||||||
bash gen_linux.sh
|
bash gen_linux.sh
|
||||||
|
|
||||||
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
|
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
|
||||||
|
@ -123,8 +155,14 @@ ARG GOLANG_VERSION
|
||||||
WORKDIR /go/src/github.com/ollama/ollama
|
WORKDIR /go/src/github.com/ollama/ollama
|
||||||
COPY . .
|
COPY . .
|
||||||
COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
|
COPY --from=cuda-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
|
||||||
COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
COPY --from=cuda-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
|
## arm binary += 381M
|
||||||
|
COPY --from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
|
COPY --from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
|
||||||
|
## arm binary += 330M
|
||||||
|
COPY --from=cuda-build-jetpack5-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
|
COPY --from=cuda-build-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
|
||||||
ARG GOFLAGS
|
ARG GOFLAGS
|
||||||
ARG CGO_CFLAGS
|
ARG CGO_CFLAGS
|
||||||
RUN --mount=type=cache,target=/root/.ccache \
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
|
|
44
gpu/gpu.go
44
gpu/gpu.go
|
@ -15,7 +15,9 @@ import (
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
@ -215,7 +217,7 @@ func GetGPUInfo() GpuInfoList {
|
||||||
GpuInfo: GpuInfo{
|
GpuInfo: GpuInfo{
|
||||||
memInfo: mem,
|
memInfo: mem,
|
||||||
Library: "cpu",
|
Library: "cpu",
|
||||||
Variant: cpuCapability,
|
Variant: cpuCapability.String(),
|
||||||
ID: "0",
|
ID: "0",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -231,6 +233,35 @@ func GetGPUInfo() GpuInfoList {
|
||||||
|
|
||||||
depPath := GetDepDir()
|
depPath := GetDepDir()
|
||||||
|
|
||||||
|
var cudaVariant string
|
||||||
|
if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
|
||||||
|
if CudaTegra != "" {
|
||||||
|
ver := strings.Split(CudaTegra, ".")
|
||||||
|
if len(ver) > 0 {
|
||||||
|
cudaVariant = "jetpack" + ver[0]
|
||||||
|
}
|
||||||
|
} else if data, err := os.ReadFile("/etc/nv_tegra_release"); err == nil {
|
||||||
|
r := regexp.MustCompile(` R(\d+) `)
|
||||||
|
m := r.FindSubmatch(data)
|
||||||
|
if len(m) != 2 {
|
||||||
|
slog.Info("Unexpected format for /etc/nv_tegra_release. Set JETSON_JETPACK to select version")
|
||||||
|
} else {
|
||||||
|
if l4t, err := strconv.Atoi(string(m[1])); err == nil {
|
||||||
|
// Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
|
||||||
|
// https://developer.nvidia.com/embedded/jetpack-archive
|
||||||
|
switch l4t {
|
||||||
|
case 35:
|
||||||
|
cudaVariant = "jetpack5"
|
||||||
|
case 36:
|
||||||
|
cudaVariant = "jetpack6"
|
||||||
|
default:
|
||||||
|
slog.Info("unsupported L4T version", "nv_tegra_release", string(data))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Load ALL libraries
|
// Load ALL libraries
|
||||||
cHandles = initCudaHandles()
|
cHandles = initCudaHandles()
|
||||||
|
|
||||||
|
@ -240,6 +271,7 @@ func GetGPUInfo() GpuInfoList {
|
||||||
gpuInfo := CudaGPUInfo{
|
gpuInfo := CudaGPUInfo{
|
||||||
GpuInfo: GpuInfo{
|
GpuInfo: GpuInfo{
|
||||||
Library: "cuda",
|
Library: "cuda",
|
||||||
|
Variant: cudaVariant,
|
||||||
},
|
},
|
||||||
index: i,
|
index: i,
|
||||||
}
|
}
|
||||||
|
@ -266,7 +298,15 @@ func GetGPUInfo() GpuInfoList {
|
||||||
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
||||||
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
|
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
|
||||||
gpuInfo.MinimumMemory = cudaMinimumMemory
|
gpuInfo.MinimumMemory = cudaMinimumMemory
|
||||||
gpuInfo.DependencyPath = depPath
|
if depPath != "" {
|
||||||
|
gpuInfo.DependencyPath = depPath
|
||||||
|
// Check for variant specific directory
|
||||||
|
if cudaVariant != "" {
|
||||||
|
if _, err := os.Stat(filepath.Join(depPath, "cuda_"+cudaVariant)); err == nil {
|
||||||
|
gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+cudaVariant)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
||||||
gpuInfo.DriverMajor = driverMajor
|
gpuInfo.DriverMajor = driverMajor
|
||||||
gpuInfo.DriverMinor = driverMinor
|
gpuInfo.DriverMinor = driverMinor
|
||||||
|
|
|
@ -25,7 +25,7 @@ func GetGPUInfo() GpuInfoList {
|
||||||
return []GpuInfo{
|
return []GpuInfo{
|
||||||
{
|
{
|
||||||
Library: "cpu",
|
Library: "cpu",
|
||||||
Variant: GetCPUCapability(),
|
Variant: GetCPUCapability().String(),
|
||||||
memInfo: mem,
|
memInfo: mem,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -48,7 +48,7 @@ func GetCPUInfo() GpuInfoList {
|
||||||
return []GpuInfo{
|
return []GpuInfo{
|
||||||
{
|
{
|
||||||
Library: "cpu",
|
Library: "cpu",
|
||||||
Variant: GetCPUCapability(),
|
Variant: GetCPUCapability().String(),
|
||||||
memInfo: mem,
|
memInfo: mem,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,7 @@ type GpuInfo struct {
|
||||||
Library string `json:"library,omitempty"`
|
Library string `json:"library,omitempty"`
|
||||||
|
|
||||||
// Optional variant to select (e.g. versions, cpu feature flags)
|
// Optional variant to select (e.g. versions, cpu feature flags)
|
||||||
Variant CPUCapability `json:"variant"`
|
Variant string `json:"variant"`
|
||||||
|
|
||||||
// MinimumMemory represents the minimum memory required to use the GPU
|
// MinimumMemory represents the minimum memory required to use the GPU
|
||||||
MinimumMemory uint64 `json:"-"`
|
MinimumMemory uint64 `json:"-"`
|
||||||
|
@ -81,8 +81,8 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
|
||||||
for _, info := range l {
|
for _, info := range l {
|
||||||
found := false
|
found := false
|
||||||
requested := info.Library
|
requested := info.Library
|
||||||
if info.Variant != CPUCapabilityNone {
|
if info.Variant != CPUCapabilityNone.String() {
|
||||||
requested += "_" + info.Variant.String()
|
requested += "_" + info.Variant
|
||||||
}
|
}
|
||||||
for i, lib := range libs {
|
for i, lib := range libs {
|
||||||
if lib == requested {
|
if lib == requested {
|
||||||
|
|
|
@ -165,7 +165,7 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
|
||||||
echo "CUDA libraries detected - building dynamic CUDA library"
|
echo "CUDA libraries detected - building dynamic CUDA library"
|
||||||
init_vars
|
init_vars
|
||||||
CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true)
|
CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true)
|
||||||
if [ -n "${CUDA_MAJOR}" ]; then
|
if [ -n "${CUDA_MAJOR}" -a -z "${CUDA_VARIANT}" ]; then
|
||||||
CUDA_VARIANT=_v${CUDA_MAJOR}
|
CUDA_VARIANT=_v${CUDA_MAJOR}
|
||||||
fi
|
fi
|
||||||
if [ "${ARCH}" == "arm64" ]; then
|
if [ "${ARCH}" == "arm64" ]; then
|
||||||
|
@ -189,9 +189,10 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
|
||||||
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS} -DGGML_STATIC=off"
|
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS} -DGGML_STATIC=off"
|
||||||
BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}"
|
BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}"
|
||||||
export LLAMA_SERVER_LDFLAGS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
|
export LLAMA_SERVER_LDFLAGS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
|
||||||
CUDA_DIST_DIR="${DIST_BASE}/ollama_libs"
|
CUDA_DIST_DIR="${CUDA_DIST_DIR:-${DIST_BASE}/ollama_libs}"
|
||||||
build
|
build
|
||||||
install
|
install
|
||||||
|
echo "Installing CUDA dependencies in ${CUDA_DIST_DIR}"
|
||||||
mkdir -p "${CUDA_DIST_DIR}"
|
mkdir -p "${CUDA_DIST_DIR}"
|
||||||
for lib in ${CUDA_LIB_DIR}/libcudart.so* ${CUDA_LIB_DIR}/libcublas.so* ${CUDA_LIB_DIR}/libcublasLt.so* ; do
|
for lib in ${CUDA_LIB_DIR}/libcudart.so* ${CUDA_LIB_DIR}/libcublas.so* ${CUDA_LIB_DIR}/libcublasLt.so* ; do
|
||||||
cp -a "${lib}" "${CUDA_DIST_DIR}"
|
cp -a "${lib}" "${CUDA_DIST_DIR}"
|
||||||
|
|
|
@ -82,8 +82,8 @@ func serversForGpu(info gpu.GpuInfo) []string {
|
||||||
// glob workDir for files that start with ollama_
|
// glob workDir for files that start with ollama_
|
||||||
availableServers := getAvailableServers()
|
availableServers := getAvailableServers()
|
||||||
requested := info.Library
|
requested := info.Library
|
||||||
if info.Variant != gpu.CPUCapabilityNone {
|
if info.Variant != gpu.CPUCapabilityNone.String() {
|
||||||
requested += "_" + info.Variant.String()
|
requested += "_" + info.Variant
|
||||||
}
|
}
|
||||||
|
|
||||||
servers := []string{}
|
servers := []string{}
|
||||||
|
|
|
@ -22,6 +22,7 @@ for TARGETARCH in ${BUILD_ARCH}; do
|
||||||
-t builder:$TARGETARCH \
|
-t builder:$TARGETARCH \
|
||||||
.
|
.
|
||||||
docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
|
docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
|
||||||
|
rm -rf ./dist/linux-$TARGETARCH
|
||||||
docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/linux-$TARGETARCH ./dist
|
docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/linux-$TARGETARCH ./dist
|
||||||
docker rm builder-$TARGETARCH
|
docker rm builder-$TARGETARCH
|
||||||
echo "Compressing final linux bundle..."
|
echo "Compressing final linux bundle..."
|
||||||
|
|
Loading…
Reference in a new issue