diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 4bd68455..508fbb35 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -183,10 +183,17 @@ jobs: name: windows-rocm-deps path: dist/deps/* - # CUDA v11 generation step - generate-windows-cuda-v11: + # CUDA generation step + generate-windows-cuda: environment: release runs-on: windows + strategy: + matrix: + cuda: + - version: "11" + url: 'https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe' + - version: "12" + url: 'https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe' env: KEY_CONTAINER: ${{ vars.KEY_CONTAINER }} steps: @@ -220,11 +227,11 @@ jobs: with: go-version-file: go.mod cache: true - - name: 'Install CUDA' + - name: 'Install CUDA ${{ matrix.cuda.version }}' run: | $ErrorActionPreference = "Stop" write-host "downloading CUDA Installer" - Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe" + Invoke-WebRequest -Uri "${{ matrix.cuda.url }}" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe" write-host "Installing CUDA" Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait write-host "Completed CUDA" @@ -256,7 +263,7 @@ jobs: cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\" - uses: actions/upload-artifact@v4 with: - name: generate-windows-cuda-v11 + name: generate-windows-cuda-${{ matrix.cuda.version }} path: | llm/build/**/bin/* dist/windows-amd64/** @@ -265,95 +272,13 @@ jobs: name: windows-cuda-deps path: dist/deps/* - # CUDA v12 generation step - generate-windows-cuda-v12: - environment: release - runs-on: windows - env: - KEY_CONTAINER: ${{ vars.KEY_CONTAINER }} - steps: - - uses: actions/checkout@v4 - - name: Set Version - shell: bash - run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV - - uses: 'google-github-actions/auth@v2' - with: - project_id: 'ollama' - credentials_json: '${{ secrets.GOOGLE_SIGNING_CREDENTIALS }}' - - run: echo "${{ vars.OLLAMA_CERT }}" > ollama_inc.crt - - name: install Windows SDK 8.1 to get signtool - run: | - $ErrorActionPreference = "Stop" - write-host "downloading SDK" - Invoke-WebRequest -Uri "https://go.microsoft.com/fwlink/p/?LinkId=323507" -OutFile "${env:RUNNER_TEMP}\sdksetup.exe" - Start-Process "${env:RUNNER_TEMP}\sdksetup.exe" -ArgumentList @("/q") -NoNewWindow -Wait - write-host "Win SDK 8.1 installed" - gci -path 'C:\Program Files (x86)\Windows Kits\' -r -fi 'signtool.exe' - - name: install signing plugin - run: | - $ErrorActionPreference = "Stop" - write-host "downloading plugin" - Invoke-WebRequest -Uri "https://github.com/GoogleCloudPlatform/kms-integrations/releases/download/cng-v1.0/kmscng-1.0-windows-amd64.zip" -OutFile "${env:RUNNER_TEMP}\plugin.zip" - Expand-Archive -Path "${env:RUNNER_TEMP}\plugin.zip" -DestinationPath ${env:RUNNER_TEMP}\plugin\ - write-host "Installing plugin" - & "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet - write-host "plugin installed" - - uses: actions/setup-go@v5 - with: - go-version-file: go.mod - cache: true - - name: 'Install CUDA' - run: | - $ErrorActionPreference = "Stop" - write-host "downloading CUDA Installer" - Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe" - write-host "Installing CUDA" - Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait - write-host "Completed CUDA" - $cudaPath=((resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0].path | split-path | split-path) - $cudaVer=($cudaPath | split-path -leaf ) -replace 'v(\d+).(\d+)', '$1_$2' - echo "$cudaPath\bin" >> $env:GITHUB_PATH - echo "CUDA_PATH=$cudaPath" >> $env:GITHUB_ENV - echo "CUDA_PATH_V${cudaVer}=$cudaPath" >> $env:GITHUB_ENV - echo "CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVer}" >> $env:GITHUB_ENV - - name: 'Verify CUDA' - run: nvcc -V - - run: go get ./... - - name: go generate - run: | - $gopath=(get-command go).source | split-path -parent - $cudabin=(get-command nvcc).source | split-path - & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1" - cd $env:GITHUB_WORKSPACE - $env:CMAKE_SYSTEM_VERSION="10.0.22621.0" - $env:PATH="$gopath;$cudabin;$env:PATH" - $env:OLLAMA_SKIP_CPU_GENERATE="1" - go generate -x ./... - - name: 'gather cuda dependencies' - run: | - $NVIDIA_DIR=(resolve-path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*\bin\')[0] - md "dist\deps" - cp "${NVIDIA_DIR}\cudart64_*.dll" "dist\deps\" - cp "${NVIDIA_DIR}\cublas64_*.dll" "dist\deps\" - cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\" - - uses: actions/upload-artifact@v4 - with: - name: generate-windows-cuda-v12 - path: | - llm/build/**/bin/* - dist/windows-amd64/** - - uses: actions/upload-artifact@v4 - with: - name: windows-cuda-deps - path: dist/deps/* # Import the prior generation steps and build the final windows assets build-windows: environment: release runs-on: windows needs: - - generate-windows-cuda-v11 - - generate-windows-cuda-v12 + - generate-windows-cuda - generate-windows-rocm - generate-windows-cpu env: @@ -397,7 +322,10 @@ jobs: name: generate-windows-cpu - uses: actions/download-artifact@v4 with: - name: generate-windows-cuda-v11 + name: generate-windows-cuda-11 + - uses: actions/download-artifact@v4 + with: + name: generate-windows-cuda-12 - uses: actions/download-artifact@v4 with: name: windows-cuda-deps diff --git a/docs/linux.md b/docs/linux.md index 3ed2bed0..d1d5892c 100644 --- a/docs/linux.md +++ b/docs/linux.md @@ -20,12 +20,12 @@ GPU. ## Manual install -### Download the `ollama` tar file +### Download `ollama` -Ollama is distributed as a tar file including GPU library dependencies. +Download and extract the Linux package: ```bash -curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar -C /usr -zxf - +curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr ``` ### Adding Ollama as a startup service (recommended) @@ -95,7 +95,7 @@ curl -fsSL https://ollama.com/install.sh | sh Or by downloading the ollama binary: ```bash -curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar -C /usr -zxf - +curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr ``` ## Installing specific versions diff --git a/gpu/cuda_common.go b/gpu/cuda_common.go index defaa60a..827cc9b4 100644 --- a/gpu/cuda_common.go +++ b/gpu/cuda_common.go @@ -28,7 +28,7 @@ func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) { return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",") } -func cudaGetVariant(gpuInfo CudaGPUInfo) string { +func cudaVariant(gpuInfo CudaGPUInfo) string { if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" { if CudaTegra != "" { ver := strings.Split(CudaTegra, ".") diff --git a/gpu/gpu.go b/gpu/gpu.go index 391c98a8..72d237a6 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -225,7 +225,7 @@ func GetGPUInfo() GpuInfoList { return GpuInfoList{cpus[0].GpuInfo} } - depPath := GetDepDir() + depPath := LibraryDir() // Load ALL libraries cHandles = initCudaHandles() @@ -264,20 +264,20 @@ func GetGPUInfo() GpuInfoList { gpuInfo.computeMajor = int(memInfo.major) gpuInfo.computeMinor = int(memInfo.minor) gpuInfo.MinimumMemory = cudaMinimumMemory - cudaVariant := cudaGetVariant(gpuInfo) + variant := cudaVariant(gpuInfo) if depPath != "" { gpuInfo.DependencyPath = depPath // Check for variant specific directory - if cudaVariant != "" { - if _, err := os.Stat(filepath.Join(depPath, "cuda_"+cudaVariant)); err == nil { - gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+cudaVariant) + if variant != "" { + if _, err := os.Stat(filepath.Join(depPath, "cuda_"+variant)); err == nil { + gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+variant) } } } gpuInfo.Name = C.GoString(&memInfo.gpu_name[0]) gpuInfo.DriverMajor = driverMajor gpuInfo.DriverMinor = driverMinor - gpuInfo.Variant = cudaGetVariant(gpuInfo) + gpuInfo.Variant = variant // query the management library as well so we can record any skew between the two // which represents overhead on the GPU we must set aside on subsequent updates @@ -468,7 +468,7 @@ func FindGPULibs(baseLibName string, defaultPatterns []string) []string { slog.Debug("Searching for GPU library", "name", baseLibName) // Start with our bundled libraries - patterns := []string{filepath.Join(GetDepDir(), baseLibName)} + patterns := []string{filepath.Join(LibraryDir(), baseLibName)} switch runtime.GOOS { case "windows": @@ -642,7 +642,7 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) { } } -func GetDepDir() string { +func LibraryDir() string { // On Windows/linux we bundle the dependencies at the same level as the executable appExe, err := os.Executable() if err != nil { diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1 index 4d43c9e2..cbdfd09f 100644 --- a/llm/generate/gen_windows.ps1 +++ b/llm/generate/gen_windows.ps1 @@ -117,7 +117,7 @@ function build { if ($cmakeDefs -contains "-G") { $extra=@("-j8") } else { - $extra= @("--", "/p:CL_MPcount=8") + $extra= @("--", "/maxCpuCount:8") } write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ }) $extra" & cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ }) $extra @@ -273,7 +273,7 @@ function build_cuda() { "-DGGML_CUDA=ON", "-DGGML_AVX=on", "-DGGML_AVX2=off", - "-DCMAKE_CUDA_FLAGS=-t8", + "-DCMAKE_CUDA_FLAGS=-t6", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}", "-DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=$env:CUDA_PATH" )