Enable cuda v12 flags
This commit is contained in:
parent
4fe3a556fa
commit
f6c811b320
1 changed files with 32 additions and 3 deletions
35
Dockerfile
35
Dockerfile
|
@ -1,7 +1,9 @@
|
||||||
ARG GOLANG_VERSION=1.22.5
|
ARG GOLANG_VERSION=1.22.5
|
||||||
ARG CMAKE_VERSION=3.22.1
|
ARG CMAKE_VERSION=3.22.1
|
||||||
ARG CUDA_VERSION_11=11.3.1
|
ARG CUDA_VERSION_11=11.3.1
|
||||||
|
ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
|
||||||
ARG CUDA_VERSION_12=12.4.0
|
ARG CUDA_VERSION_12=12.4.0
|
||||||
|
ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
|
||||||
ARG ROCM_VERSION=6.1.2
|
ARG ROCM_VERSION=6.1.2
|
||||||
ARG JETPACK_6=r36.2.0
|
ARG JETPACK_6=r36.2.0
|
||||||
ARG JETPACK_5=r35.4.1
|
ARG JETPACK_5=r35.4.1
|
||||||
|
@ -21,11 +23,12 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||||
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
||||||
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
||||||
ARG CGO_CFLAGS
|
ARG CGO_CFLAGS
|
||||||
|
ARG CUDA_V11_ARCHITECTURES
|
||||||
ENV GOARCH amd64
|
ENV GOARCH amd64
|
||||||
RUN --mount=type=cache,target=/root/.ccache \
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
OLLAMA_SKIP_STATIC_GENERATE=1 \
|
OLLAMA_SKIP_STATIC_GENERATE=1 \
|
||||||
OLLAMA_SKIP_CPU_GENERATE=1 \
|
OLLAMA_SKIP_CPU_GENERATE=1 \
|
||||||
CMAKE_CUDA_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86" \
|
CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \
|
||||||
CUDA_VARIANT="_v11" \
|
CUDA_VARIANT="_v11" \
|
||||||
bash gen_linux.sh
|
bash gen_linux.sh
|
||||||
|
|
||||||
|
@ -37,12 +40,14 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||||
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
||||||
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
||||||
ARG CGO_CFLAGS
|
ARG CGO_CFLAGS
|
||||||
|
ARG CUDA_V12_ARCHITECTURES
|
||||||
ENV GOARCH amd64
|
ENV GOARCH amd64
|
||||||
RUN --mount=type=cache,target=/root/.ccache \
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
OLLAMA_SKIP_STATIC_GENERATE=1 \
|
OLLAMA_SKIP_STATIC_GENERATE=1 \
|
||||||
OLLAMA_SKIP_CPU_GENERATE=1 \
|
OLLAMA_SKIP_CPU_GENERATE=1 \
|
||||||
CMAKE_CUDA_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a" \
|
CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \
|
||||||
CUDA_VARIANT="_v12" \
|
CUDA_VARIANT="_v12" \
|
||||||
|
OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \
|
||||||
bash gen_linux.sh
|
bash gen_linux.sh
|
||||||
|
|
||||||
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-server-arm64
|
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-server-arm64
|
||||||
|
@ -53,9 +58,31 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
|
||||||
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
||||||
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
||||||
ARG CGO_CFLAGS
|
ARG CGO_CFLAGS
|
||||||
|
ARG CUDA_V11_ARCHITECTURES
|
||||||
|
ENV GOARCH arm64
|
||||||
|
RUN OLLAMA_SKIP_STATIC_GENERATE=1 \
|
||||||
|
OLLAMA_SKIP_CPU_GENERATE=1 \
|
||||||
|
CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \
|
||||||
|
CUDA_VARIANT="_v11" \
|
||||||
|
bash gen_linux.sh
|
||||||
|
|
||||||
|
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-server-arm64
|
||||||
|
ARG CMAKE_VERSION
|
||||||
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
|
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||||
|
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
|
||||||
|
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
||||||
|
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
||||||
|
ARG CGO_CFLAGS
|
||||||
|
ARG CUDA_V12_ARCHITECTURES
|
||||||
ENV GOARCH arm64
|
ENV GOARCH arm64
|
||||||
RUN --mount=type=cache,target=/root/.ccache \
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh
|
OLLAMA_SKIP_STATIC_GENERATE=1 \
|
||||||
|
OLLAMA_SKIP_CPU_GENERATE=1 \
|
||||||
|
CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \
|
||||||
|
CUDA_VARIANT="_v12" \
|
||||||
|
OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \
|
||||||
|
bash gen_linux.sh
|
||||||
|
|
||||||
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS cuda-build-jetpack6-arm64
|
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS cuda-build-jetpack6-arm64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
|
@ -180,6 +207,8 @@ COPY . .
|
||||||
COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
|
COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
|
||||||
COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
|
COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
|
||||||
|
COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
## arm binary += 381M
|
## arm binary += 381M
|
||||||
COPY --from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
COPY --from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
COPY --from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
|
COPY --from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
|
||||||
|
|
Loading…
Reference in a new issue