Add windows cuda v12 + v11 support
This commit is contained in:
parent
f6c811b320
commit
927d98a6cd
3 changed files with 142 additions and 20 deletions
93
.github/workflows/release.yaml
vendored
93
.github/workflows/release.yaml
vendored
|
@ -183,8 +183,8 @@ jobs:
|
||||||
name: windows-rocm-deps
|
name: windows-rocm-deps
|
||||||
path: dist/deps/*
|
path: dist/deps/*
|
||||||
|
|
||||||
# CUDA generation step
|
# CUDA v11 generation step
|
||||||
generate-windows-cuda:
|
generate-windows-cuda-v11:
|
||||||
environment: release
|
environment: release
|
||||||
runs-on: windows
|
runs-on: windows
|
||||||
env:
|
env:
|
||||||
|
@ -256,7 +256,89 @@ jobs:
|
||||||
cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
|
cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: generate-windows-cuda
|
name: generate-windows-cuda-v11
|
||||||
|
path: |
|
||||||
|
llm/build/**/bin/*
|
||||||
|
dist/windows-amd64/**
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: windows-cuda-deps
|
||||||
|
path: dist/deps/*
|
||||||
|
|
||||||
|
# CUDA v12 generation step
|
||||||
|
generate-windows-cuda-v12:
|
||||||
|
environment: release
|
||||||
|
runs-on: windows
|
||||||
|
env:
|
||||||
|
KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- name: Set Version
|
||||||
|
shell: bash
|
||||||
|
run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
|
||||||
|
- uses: 'google-github-actions/auth@v2'
|
||||||
|
with:
|
||||||
|
project_id: 'ollama'
|
||||||
|
credentials_json: '${{ secrets.GOOGLE_SIGNING_CREDENTIALS }}'
|
||||||
|
- run: echo "${{ vars.OLLAMA_CERT }}" > ollama_inc.crt
|
||||||
|
- name: install Windows SDK 8.1 to get signtool
|
||||||
|
run: |
|
||||||
|
$ErrorActionPreference = "Stop"
|
||||||
|
write-host "downloading SDK"
|
||||||
|
Invoke-WebRequest -Uri "https://go.microsoft.com/fwlink/p/?LinkId=323507" -OutFile "${env:RUNNER_TEMP}\sdksetup.exe"
|
||||||
|
Start-Process "${env:RUNNER_TEMP}\sdksetup.exe" -ArgumentList @("/q") -NoNewWindow -Wait
|
||||||
|
write-host "Win SDK 8.1 installed"
|
||||||
|
gci -path 'C:\Program Files (x86)\Windows Kits\' -r -fi 'signtool.exe'
|
||||||
|
- name: install signing plugin
|
||||||
|
run: |
|
||||||
|
$ErrorActionPreference = "Stop"
|
||||||
|
write-host "downloading plugin"
|
||||||
|
Invoke-WebRequest -Uri "https://github.com/GoogleCloudPlatform/kms-integrations/releases/download/cng-v1.0/kmscng-1.0-windows-amd64.zip" -OutFile "${env:RUNNER_TEMP}\plugin.zip"
|
||||||
|
Expand-Archive -Path "${env:RUNNER_TEMP}\plugin.zip" -DestinationPath ${env:RUNNER_TEMP}\plugin\
|
||||||
|
write-host "Installing plugin"
|
||||||
|
& "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet
|
||||||
|
write-host "plugin installed"
|
||||||
|
- uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version-file: go.mod
|
||||||
|
cache: true
|
||||||
|
- name: 'Install CUDA'
|
||||||
|
run: |
|
||||||
|
$ErrorActionPreference = "Stop"
|
||||||
|
write-host "downloading CUDA Installer"
|
||||||
|
Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
|
||||||
|
write-host "Installing CUDA"
|
||||||
|
Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
|
||||||
|
write-host "Completed CUDA"
|
||||||
|
$cudaPath=((resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0].path | split-path | split-path)
|
||||||
|
$cudaVer=($cudaPath | split-path -leaf ) -replace 'v(\d+).(\d+)', '$1_$2'
|
||||||
|
echo "$cudaPath\bin" >> $env:GITHUB_PATH
|
||||||
|
echo "CUDA_PATH=$cudaPath" >> $env:GITHUB_ENV
|
||||||
|
echo "CUDA_PATH_V${cudaVer}=$cudaPath" >> $env:GITHUB_ENV
|
||||||
|
echo "CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVer}" >> $env:GITHUB_ENV
|
||||||
|
- name: 'Verify CUDA'
|
||||||
|
run: nvcc -V
|
||||||
|
- run: go get ./...
|
||||||
|
- name: go generate
|
||||||
|
run: |
|
||||||
|
$gopath=(get-command go).source | split-path -parent
|
||||||
|
$cudabin=(get-command nvcc).source | split-path
|
||||||
|
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
|
||||||
|
cd $env:GITHUB_WORKSPACE
|
||||||
|
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
||||||
|
$env:PATH="$gopath;$cudabin;$env:PATH"
|
||||||
|
$env:OLLAMA_SKIP_CPU_GENERATE="1"
|
||||||
|
go generate -x ./...
|
||||||
|
- name: 'gather cuda dependencies'
|
||||||
|
run: |
|
||||||
|
$NVIDIA_DIR=(resolve-path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*\bin\')[0]
|
||||||
|
md "dist\deps"
|
||||||
|
cp "${NVIDIA_DIR}\cudart64_*.dll" "dist\deps\"
|
||||||
|
cp "${NVIDIA_DIR}\cublas64_*.dll" "dist\deps\"
|
||||||
|
cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: generate-windows-cuda-v12
|
||||||
path: |
|
path: |
|
||||||
llm/build/**/bin/*
|
llm/build/**/bin/*
|
||||||
dist/windows-amd64/**
|
dist/windows-amd64/**
|
||||||
|
@ -270,7 +352,8 @@ jobs:
|
||||||
environment: release
|
environment: release
|
||||||
runs-on: windows
|
runs-on: windows
|
||||||
needs:
|
needs:
|
||||||
- generate-windows-cuda
|
- generate-windows-cuda-v11
|
||||||
|
- generate-windows-cuda-v12
|
||||||
- generate-windows-rocm
|
- generate-windows-rocm
|
||||||
- generate-windows-cpu
|
- generate-windows-cpu
|
||||||
env:
|
env:
|
||||||
|
@ -314,7 +397,7 @@ jobs:
|
||||||
name: generate-windows-cpu
|
name: generate-windows-cpu
|
||||||
- uses: actions/download-artifact@v4
|
- uses: actions/download-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: generate-windows-cuda
|
name: generate-windows-cuda-v11
|
||||||
- uses: actions/download-artifact@v4
|
- uses: actions/download-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: windows-cuda-deps
|
name: windows-cuda-deps
|
||||||
|
|
|
@ -261,7 +261,7 @@ function build_cuda() {
|
||||||
if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${script:CUDA_LIB_DIR}")) {
|
if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${script:CUDA_LIB_DIR}")) {
|
||||||
# Then build cuda as a dynamically loaded library
|
# Then build cuda as a dynamically loaded library
|
||||||
$nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
|
$nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
|
||||||
$script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
|
$script:CUDA_VERSION=((get-item ($nvcc | split-path | split-path)).Basename -Split "\.")[0]
|
||||||
if ($null -ne $script:CUDA_VERSION) {
|
if ($null -ne $script:CUDA_VERSION) {
|
||||||
$script:CUDA_VARIANT="_"+$script:CUDA_VERSION
|
$script:CUDA_VARIANT="_"+$script:CUDA_VERSION
|
||||||
}
|
}
|
||||||
|
@ -273,9 +273,9 @@ function build_cuda() {
|
||||||
"-DGGML_CUDA=ON",
|
"-DGGML_CUDA=ON",
|
||||||
"-DGGML_AVX=on",
|
"-DGGML_AVX=on",
|
||||||
"-DGGML_AVX2=off",
|
"-DGGML_AVX2=off",
|
||||||
"-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR",
|
|
||||||
"-DCMAKE_CUDA_FLAGS=-t8",
|
"-DCMAKE_CUDA_FLAGS=-t8",
|
||||||
"-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}"
|
"-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}",
|
||||||
|
"-DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=$env:CUDA_PATH"
|
||||||
)
|
)
|
||||||
if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
|
if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
|
||||||
write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
|
write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
$ErrorActionPreference = "Stop"
|
$ErrorActionPreference = "Stop"
|
||||||
|
|
||||||
function checkEnv() {
|
function checkEnv() {
|
||||||
|
$script:ARCH = $Env:PROCESSOR_ARCHITECTURE.ToLower()
|
||||||
$script:TARGET_ARCH=$Env:PROCESSOR_ARCHITECTURE.ToLower()
|
$script:TARGET_ARCH=$Env:PROCESSOR_ARCHITECTURE.ToLower()
|
||||||
Write-host "Building for ${script:TARGET_ARCH}"
|
Write-host "Building for ${script:TARGET_ARCH}"
|
||||||
write-host "Locating required tools and paths"
|
write-host "Locating required tools and paths"
|
||||||
|
@ -15,26 +16,23 @@ function checkEnv() {
|
||||||
$MSVC_INSTALL=(Get-CimInstance MSFT_VSInstance -Namespace root/cimv2/vs)[0].InstallLocation
|
$MSVC_INSTALL=(Get-CimInstance MSFT_VSInstance -Namespace root/cimv2/vs)[0].InstallLocation
|
||||||
$env:VCToolsRedistDir=(get-item "${MSVC_INSTALL}\VC\Redist\MSVC\*")[0]
|
$env:VCToolsRedistDir=(get-item "${MSVC_INSTALL}\VC\Redist\MSVC\*")[0]
|
||||||
}
|
}
|
||||||
# Try to find the CUDA dir
|
# Locate CUDA versions
|
||||||
if ($null -eq $env:NVIDIA_DIR) {
|
# Note: this assumes every version found will be built
|
||||||
|
$cudaList=(get-item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\" -ea 'silentlycontinue')
|
||||||
|
if ($cudaList.length -eq 0) {
|
||||||
$d=(get-command -ea 'silentlycontinue' nvcc).path
|
$d=(get-command -ea 'silentlycontinue' nvcc).path
|
||||||
if ($d -ne $null) {
|
if ($null -ne $d) {
|
||||||
$script:NVIDIA_DIR=($d| split-path -parent)
|
$script:CUDA_DIRS=@($d| split-path -parent)
|
||||||
} else {
|
|
||||||
$cudaList=(get-item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\" -ea 'silentlycontinue')
|
|
||||||
if ($cudaList.length > 0) {
|
|
||||||
$script:NVIDIA_DIR=$cudaList[0]
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
$script:NVIDIA_DIR=$env:NVIDIA_DIR
|
$script:CUDA_DIRS=$cudaList
|
||||||
}
|
}
|
||||||
|
|
||||||
$script:INNO_SETUP_DIR=(get-item "C:\Program Files*\Inno Setup*\")[0]
|
$script:INNO_SETUP_DIR=(get-item "C:\Program Files*\Inno Setup*\")[0]
|
||||||
|
|
||||||
$script:DEPS_DIR="${script:SRC_DIR}\dist\windows-${script:TARGET_ARCH}"
|
$script:DEPS_DIR="${script:SRC_DIR}\dist\windows-${script:TARGET_ARCH}"
|
||||||
$env:CGO_ENABLED="1"
|
$env:CGO_ENABLED="1"
|
||||||
echo "Checking version"
|
Write-Output "Checking version"
|
||||||
if (!$env:VERSION) {
|
if (!$env:VERSION) {
|
||||||
$data=(git describe --tags --first-parent --abbrev=7 --long --dirty --always)
|
$data=(git describe --tags --first-parent --abbrev=7 --long --dirty --always)
|
||||||
$pattern="v(.+)"
|
$pattern="v(.+)"
|
||||||
|
@ -71,7 +69,48 @@ function checkEnv() {
|
||||||
function buildOllama() {
|
function buildOllama() {
|
||||||
write-host "Building ollama CLI"
|
write-host "Building ollama CLI"
|
||||||
if ($null -eq ${env:OLLAMA_SKIP_GENERATE}) {
|
if ($null -eq ${env:OLLAMA_SKIP_GENERATE}) {
|
||||||
& go generate ./...
|
Remove-Item -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}"
|
||||||
|
|
||||||
|
# TODO - consider trying to parallelize this with Start-ThreadJob, but env vars can't be used to toggle
|
||||||
|
# which targets to build
|
||||||
|
|
||||||
|
# Start by skipping CUDA to build everything else
|
||||||
|
pwsh -Command { $env:OLLAMA_SKIP_CUDA_GENERATE="1"; & go generate ./... }
|
||||||
|
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||||
|
|
||||||
|
# Then skip everyhting else and build all the CUDA variants
|
||||||
|
foreach ($env:CUDA_LIB_DIR in $script:CUDA_DIRS) {
|
||||||
|
write-host "Building CUDA ${env:CUDA_LIB_DIR}"
|
||||||
|
|
||||||
|
if ($env:CUDA_LIB_DIR.Contains("v12")) {
|
||||||
|
pwsh -Command {
|
||||||
|
$env:OLLAMA_SKIP_CUDA_GENERATE=""
|
||||||
|
$env:OLLAMA_SKIP_STATIC_GENERATE="1"
|
||||||
|
$env:OLLAMA_SKIP_CPU_GENERATE="1"
|
||||||
|
$env:OLLAMA_SKIP_ONEAPI_GENERATE="1"
|
||||||
|
$env:OLLAMA_SKIP_ROCM_GENERATE="1"
|
||||||
|
$env:CMAKE_CUDA_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
|
||||||
|
$env:OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on"
|
||||||
|
$env:CUDA_PATH=split-path -path $env:CUDA_LIB_DIR -parent
|
||||||
|
$env:PATH="$envs:CUDA_LIB_DIR;$env:PATH"
|
||||||
|
& go generate ./...
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
pwsh -Command {
|
||||||
|
$env:OLLAMA_SKIP_CUDA_GENERATE=""
|
||||||
|
$env:OLLAMA_SKIP_STATIC_GENERATE="1"
|
||||||
|
$env:OLLAMA_SKIP_CPU_GENERATE="1"
|
||||||
|
$env:OLLAMA_SKIP_ONEAPI_GENERATE="1"
|
||||||
|
$env:OLLAMA_SKIP_ROCM_GENERATE="1"
|
||||||
|
$env:CMAKE_CUDA_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
|
||||||
|
$env:OLLAMA_CUSTOM_CUDA_DEFS=""
|
||||||
|
$env:CUDA_PATH=split-path -path $env:CUDA_LIB_DIR -parent
|
||||||
|
$env:PATH="$envs:CUDA_LIB_DIR;$env:PATH"
|
||||||
|
& go generate ./...
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||||
|
}
|
||||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||||
} else {
|
} else {
|
||||||
write-host "Skipping generate step with OLLAMA_SKIP_GENERATE set"
|
write-host "Skipping generate step with OLLAMA_SKIP_GENERATE set"
|
||||||
|
|
Loading…
Reference in a new issue