diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 61ca3c43..5ae630c3 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -147,7 +147,7 @@ jobs: run: | $ErrorActionPreference = "Stop" write-host "downloading AMD HIP Installer" - Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" + Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" write-host "Installing AMD HIP" Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait write-host "Completed AMD HIP" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 13d1c957..977d8da1 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -169,7 +169,7 @@ jobs: run: | $ErrorActionPreference = "Stop" write-host "downloading AMD HIP Installer" - Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" + Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" write-host "Installing AMD HIP" Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait write-host "Completed AMD HIP" diff --git a/docs/faq.md b/docs/faq.md index 57411246..da1848f7 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -272,4 +272,4 @@ The following server settings may be used to adjust how Ollama handles concurren - `OLLAMA_NUM_PARALLEL` - The maximum number of parallel requests each model will process at the same time. The default will auto-select either 4 or 1 based on available memory. - `OLLAMA_MAX_QUEUE` - The maximum number of requests Ollama will queue when busy before rejecting additional requests. The default is 512 -Note: Windows with Radeon GPUs currently default to 1 model maximum due to limitations in ROCm v5.7 for available VRAM reporting. Once ROCm v6 is available, Windows Radeon will follow the defaults above. You may enable concurrent model loads on Radeon on Windows, but ensure you don't load more models than will fit into your GPUs VRAM. \ No newline at end of file +Note: Windows with Radeon GPUs currently default to 1 model maximum due to limitations in ROCm v5.7 for available VRAM reporting. Once ROCm v6.2 is available, Windows Radeon will follow the defaults above. You may enable concurrent model loads on Radeon on Windows, but ensure you don't load more models than will fit into your GPUs VRAM. \ No newline at end of file diff --git a/gpu/amd_common.go b/gpu/amd_common.go index 27a81e3f..7d1cab7c 100644 --- a/gpu/amd_common.go +++ b/gpu/amd_common.go @@ -49,9 +49,17 @@ func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) { } func commonAMDValidateLibDir() (string, error) { - // We try to favor system paths first, so that we can wire up the subprocess to use - // the system version. Only use our bundled version if the system version doesn't work - // This gives users a more recovery options if versions have subtle problems at runtime + // Favor our bundled version + + // Installer payload location if we're running the installed binary + exe, err := os.Executable() + if err == nil { + rocmTargetDir := filepath.Join(filepath.Dir(exe), "rocm") + if rocmLibUsable(rocmTargetDir) { + slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir) + return rocmTargetDir, nil + } + } // Prefer explicit HIP env var hipPath := os.Getenv("HIP_PATH") @@ -87,14 +95,5 @@ func commonAMDValidateLibDir() (string, error) { } } - // Installer payload location if we're running the installed binary - exe, err := os.Executable() - if err == nil { - rocmTargetDir := filepath.Join(filepath.Dir(exe), "rocm") - if rocmLibUsable(rocmTargetDir) { - slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir) - return rocmTargetDir, nil - } - } return "", fmt.Errorf("no suitable rocm found, falling back to CPU") } diff --git a/gpu/amd_windows.go b/gpu/amd_windows.go index 8b6fabeb..5d09be8b 100644 --- a/gpu/amd_windows.go +++ b/gpu/amd_windows.go @@ -22,8 +22,8 @@ const ( var ( // Used to validate if the given ROCm lib is usable - ROCmLibGlobs = []string{"hipblas.dll", "rocblas"} // TODO - probably include more coverage of files here... - RocmStandardLocations = []string{"C:\\Program Files\\AMD\\ROCm\\5.7\\bin"} // TODO glob? + ROCmLibGlobs = []string{"hipblas.dll", "rocblas"} // This is not sufficient to discern v5 vs v6 + RocmStandardLocations = []string{"C:\\Program Files\\AMD\\ROCm\\6.1\\bin"} // TODO glob? ) func AMDGetGPUInfo() []RocmGPUInfo { diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1 index 26bc4fa3..beb964f9 100644 --- a/llm/generate/gen_windows.ps1 +++ b/llm/generate/gen_windows.ps1 @@ -6,18 +6,9 @@ function amdGPUs { if ($env:AMDGPU_TARGETS) { return $env:AMDGPU_TARGETS } - # TODO - load from some common data file for linux + windows build consistency + # Current supported rocblas list from ROCm v6.1.2 on windows $GPU_LIST = @( - "gfx900" "gfx906:xnack-" - "gfx908:xnack-" - "gfx90a:xnack+" - "gfx90a:xnack-" - "gfx940" - "gfx941" - "gfx942" - "gfx1010" - "gfx1012" "gfx1030" "gfx1100" "gfx1101" @@ -395,7 +386,6 @@ function build_rocm() { sign install - # Assumes v5.7, may need adjustments for v6 rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\" md "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\" -ea 0 > $null cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"