From 927d98a6cde43ffee3ef269cf013df5e96cbe767 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Fri, 12 Jul 2024 14:33:13 -0700 Subject: [PATCH] Add windows cuda v12 + v11 support --- .github/workflows/release.yaml | 93 ++++++++++++++++++++++++++++++++-- llm/generate/gen_windows.ps1 | 6 +-- scripts/build_windows.ps1 | 63 ++++++++++++++++++----- 3 files changed, 142 insertions(+), 20 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 9287f6f7..4bd68455 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -183,8 +183,8 @@ jobs: name: windows-rocm-deps path: dist/deps/* - # CUDA generation step - generate-windows-cuda: + # CUDA v11 generation step + generate-windows-cuda-v11: environment: release runs-on: windows env: @@ -256,7 +256,89 @@ jobs: cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\" - uses: actions/upload-artifact@v4 with: - name: generate-windows-cuda + name: generate-windows-cuda-v11 + path: | + llm/build/**/bin/* + dist/windows-amd64/** + - uses: actions/upload-artifact@v4 + with: + name: windows-cuda-deps + path: dist/deps/* + + # CUDA v12 generation step + generate-windows-cuda-v12: + environment: release + runs-on: windows + env: + KEY_CONTAINER: ${{ vars.KEY_CONTAINER }} + steps: + - uses: actions/checkout@v4 + - name: Set Version + shell: bash + run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV + - uses: 'google-github-actions/auth@v2' + with: + project_id: 'ollama' + credentials_json: '${{ secrets.GOOGLE_SIGNING_CREDENTIALS }}' + - run: echo "${{ vars.OLLAMA_CERT }}" > ollama_inc.crt + - name: install Windows SDK 8.1 to get signtool + run: | + $ErrorActionPreference = "Stop" + write-host "downloading SDK" + Invoke-WebRequest -Uri "https://go.microsoft.com/fwlink/p/?LinkId=323507" -OutFile "${env:RUNNER_TEMP}\sdksetup.exe" + Start-Process "${env:RUNNER_TEMP}\sdksetup.exe" -ArgumentList @("/q") -NoNewWindow -Wait + write-host "Win SDK 8.1 installed" + gci -path 'C:\Program Files (x86)\Windows Kits\' -r -fi 'signtool.exe' + - name: install signing plugin + run: | + $ErrorActionPreference = "Stop" + write-host "downloading plugin" + Invoke-WebRequest -Uri "https://github.com/GoogleCloudPlatform/kms-integrations/releases/download/cng-v1.0/kmscng-1.0-windows-amd64.zip" -OutFile "${env:RUNNER_TEMP}\plugin.zip" + Expand-Archive -Path "${env:RUNNER_TEMP}\plugin.zip" -DestinationPath ${env:RUNNER_TEMP}\plugin\ + write-host "Installing plugin" + & "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet + write-host "plugin installed" + - uses: actions/setup-go@v5 + with: + go-version-file: go.mod + cache: true + - name: 'Install CUDA' + run: | + $ErrorActionPreference = "Stop" + write-host "downloading CUDA Installer" + Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe" + write-host "Installing CUDA" + Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait + write-host "Completed CUDA" + $cudaPath=((resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0].path | split-path | split-path) + $cudaVer=($cudaPath | split-path -leaf ) -replace 'v(\d+).(\d+)', '$1_$2' + echo "$cudaPath\bin" >> $env:GITHUB_PATH + echo "CUDA_PATH=$cudaPath" >> $env:GITHUB_ENV + echo "CUDA_PATH_V${cudaVer}=$cudaPath" >> $env:GITHUB_ENV + echo "CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVer}" >> $env:GITHUB_ENV + - name: 'Verify CUDA' + run: nvcc -V + - run: go get ./... + - name: go generate + run: | + $gopath=(get-command go).source | split-path -parent + $cudabin=(get-command nvcc).source | split-path + & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1" + cd $env:GITHUB_WORKSPACE + $env:CMAKE_SYSTEM_VERSION="10.0.22621.0" + $env:PATH="$gopath;$cudabin;$env:PATH" + $env:OLLAMA_SKIP_CPU_GENERATE="1" + go generate -x ./... + - name: 'gather cuda dependencies' + run: | + $NVIDIA_DIR=(resolve-path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*\bin\')[0] + md "dist\deps" + cp "${NVIDIA_DIR}\cudart64_*.dll" "dist\deps\" + cp "${NVIDIA_DIR}\cublas64_*.dll" "dist\deps\" + cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\" + - uses: actions/upload-artifact@v4 + with: + name: generate-windows-cuda-v12 path: | llm/build/**/bin/* dist/windows-amd64/** @@ -270,7 +352,8 @@ jobs: environment: release runs-on: windows needs: - - generate-windows-cuda + - generate-windows-cuda-v11 + - generate-windows-cuda-v12 - generate-windows-rocm - generate-windows-cpu env: @@ -314,7 +397,7 @@ jobs: name: generate-windows-cpu - uses: actions/download-artifact@v4 with: - name: generate-windows-cuda + name: generate-windows-cuda-v11 - uses: actions/download-artifact@v4 with: name: windows-cuda-deps diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1 index 1f8c96d8..42708d3e 100644 --- a/llm/generate/gen_windows.ps1 +++ b/llm/generate/gen_windows.ps1 @@ -261,7 +261,7 @@ function build_cuda() { if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${script:CUDA_LIB_DIR}")) { # Then build cuda as a dynamically loaded library $nvcc = "$script:CUDA_LIB_DIR\nvcc.exe" - $script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename + $script:CUDA_VERSION=((get-item ($nvcc | split-path | split-path)).Basename -Split "\.")[0] if ($null -ne $script:CUDA_VERSION) { $script:CUDA_VARIANT="_"+$script:CUDA_VERSION } @@ -273,9 +273,9 @@ function build_cuda() { "-DGGML_CUDA=ON", "-DGGML_AVX=on", "-DGGML_AVX2=off", - "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_FLAGS=-t8", - "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}" + "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}", + "-DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=$env:CUDA_PATH" ) if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) { write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`"" diff --git a/scripts/build_windows.ps1 b/scripts/build_windows.ps1 index e8d851f4..50b60230 100644 --- a/scripts/build_windows.ps1 +++ b/scripts/build_windows.ps1 @@ -7,6 +7,7 @@ $ErrorActionPreference = "Stop" function checkEnv() { + $script:ARCH = $Env:PROCESSOR_ARCHITECTURE.ToLower() $script:TARGET_ARCH=$Env:PROCESSOR_ARCHITECTURE.ToLower() Write-host "Building for ${script:TARGET_ARCH}" write-host "Locating required tools and paths" @@ -15,26 +16,23 @@ function checkEnv() { $MSVC_INSTALL=(Get-CimInstance MSFT_VSInstance -Namespace root/cimv2/vs)[0].InstallLocation $env:VCToolsRedistDir=(get-item "${MSVC_INSTALL}\VC\Redist\MSVC\*")[0] } - # Try to find the CUDA dir - if ($null -eq $env:NVIDIA_DIR) { + # Locate CUDA versions + # Note: this assumes every version found will be built + $cudaList=(get-item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\" -ea 'silentlycontinue') + if ($cudaList.length -eq 0) { $d=(get-command -ea 'silentlycontinue' nvcc).path - if ($d -ne $null) { - $script:NVIDIA_DIR=($d| split-path -parent) - } else { - $cudaList=(get-item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\" -ea 'silentlycontinue') - if ($cudaList.length > 0) { - $script:NVIDIA_DIR=$cudaList[0] - } + if ($null -ne $d) { + $script:CUDA_DIRS=@($d| split-path -parent) } } else { - $script:NVIDIA_DIR=$env:NVIDIA_DIR + $script:CUDA_DIRS=$cudaList } $script:INNO_SETUP_DIR=(get-item "C:\Program Files*\Inno Setup*\")[0] $script:DEPS_DIR="${script:SRC_DIR}\dist\windows-${script:TARGET_ARCH}" $env:CGO_ENABLED="1" - echo "Checking version" + Write-Output "Checking version" if (!$env:VERSION) { $data=(git describe --tags --first-parent --abbrev=7 --long --dirty --always) $pattern="v(.+)" @@ -71,7 +69,48 @@ function checkEnv() { function buildOllama() { write-host "Building ollama CLI" if ($null -eq ${env:OLLAMA_SKIP_GENERATE}) { - & go generate ./... + Remove-Item -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}" + + # TODO - consider trying to parallelize this with Start-ThreadJob, but env vars can't be used to toggle + # which targets to build + + # Start by skipping CUDA to build everything else + pwsh -Command { $env:OLLAMA_SKIP_CUDA_GENERATE="1"; & go generate ./... } + if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} + + # Then skip everyhting else and build all the CUDA variants + foreach ($env:CUDA_LIB_DIR in $script:CUDA_DIRS) { + write-host "Building CUDA ${env:CUDA_LIB_DIR}" + + if ($env:CUDA_LIB_DIR.Contains("v12")) { + pwsh -Command { + $env:OLLAMA_SKIP_CUDA_GENERATE="" + $env:OLLAMA_SKIP_STATIC_GENERATE="1" + $env:OLLAMA_SKIP_CPU_GENERATE="1" + $env:OLLAMA_SKIP_ONEAPI_GENERATE="1" + $env:OLLAMA_SKIP_ROCM_GENERATE="1" + $env:CMAKE_CUDA_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a" + $env:OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" + $env:CUDA_PATH=split-path -path $env:CUDA_LIB_DIR -parent + $env:PATH="$envs:CUDA_LIB_DIR;$env:PATH" + & go generate ./... + } + } else { + pwsh -Command { + $env:OLLAMA_SKIP_CUDA_GENERATE="" + $env:OLLAMA_SKIP_STATIC_GENERATE="1" + $env:OLLAMA_SKIP_CPU_GENERATE="1" + $env:OLLAMA_SKIP_ONEAPI_GENERATE="1" + $env:OLLAMA_SKIP_ROCM_GENERATE="1" + $env:CMAKE_CUDA_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86" + $env:OLLAMA_CUSTOM_CUDA_DEFS="" + $env:CUDA_PATH=split-path -path $env:CUDA_LIB_DIR -parent + $env:PATH="$envs:CUDA_LIB_DIR;$env:PATH" + & go generate ./... + } + } + if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} + } if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} } else { write-host "Skipping generate step with OLLAMA_SKIP_GENERATE set"