From 283948c83b5cbf74f6cf86dce4434238e64d6e1c Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Fri, 19 Jul 2024 15:07:26 -0700 Subject: [PATCH] Adjust windows ROCm discovery The v5 hip library returns unsupported GPUs which wont enumerate at inference time in the runner so this makes sure we align discovery. The gfx906 cards are no longer supported so we shouldn't compile with that GPU type as it wont enumerate at runtime. --- docs/gpu.md | 15 +++++++++++++-- gpu/amd_hip_windows.go | 5 +++-- gpu/amd_windows.go | 3 ++- llm/generate/gen_windows.ps1 | 2 +- llm/server.go | 2 ++ 5 files changed, 21 insertions(+), 6 deletions(-) diff --git a/docs/gpu.md b/docs/gpu.md index 80f276c3..e669ea32 100644 --- a/docs/gpu.md +++ b/docs/gpu.md @@ -46,13 +46,24 @@ sudo modprobe nvidia_uvm` ## AMD Radeon Ollama supports the following AMD GPUs: + +### Linux Support | Family | Cards and accelerators | | -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | | AMD Radeon RX | `7900 XTX` `7900 XT` `7900 GRE` `7800 XT` `7700 XT` `7600 XT` `7600` `6950 XT` `6900 XTX` `6900XT` `6800 XT` `6800` `Vega 64` `Vega 56` | | AMD Radeon PRO | `W7900` `W7800` `W7700` `W7600` `W7500` `W6900X` `W6800X Duo` `W6800X` `W6800` `V620` `V420` `V340` `V320` `Vega II Duo` `Vega II` `VII` `SSG` | | AMD Instinct | `MI300X` `MI300A` `MI300` `MI250X` `MI250` `MI210` `MI200` `MI100` `MI60` `MI50` | -### Overrides +### Windows Support +With ROCm v6.1, the following GPUs are supported on Windows. + +| Family | Cards and accelerators | +| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | +| AMD Radeon RX | `7900 XTX` `7900 XT` `7900 GRE` `7800 XT` `7700 XT` `7600 XT` `7600` `6950 XT` `6900 XTX` `6900XT` `6800 XT` `6800` | +| AMD Radeon PRO | `W7900` `W7800` `W7700` `W7600` `W7500` `W6900X` `W6800X Duo` `W6800X` `W6800` `V620` | + + +### Overrides on Linux Ollama leverages the AMD ROCm library, which does not support all AMD GPUs. In some cases you can force the system to try to use a similar LLVM target that is close. For example The Radeon RX 5400 is `gfx1034` (also known as 10.3.4) @@ -63,7 +74,7 @@ would set `HSA_OVERRIDE_GFX_VERSION="10.3.0"` as an environment variable for the server. If you have an unsupported AMD GPU you can experiment using the list of supported types below. -At this time, the known supported GPU types are the following LLVM Targets. +At this time, the known supported GPU types on linux are the following LLVM Targets. This table shows some example GPUs that map to these LLVM targets: | **LLVM Target** | **An Example GPU** | |-----------------|---------------------| diff --git a/gpu/amd_hip_windows.go b/gpu/amd_hip_windows.go index 2586278c..98806234 100644 --- a/gpu/amd_hip_windows.go +++ b/gpu/amd_hip_windows.go @@ -33,9 +33,10 @@ type HipLib struct { } func NewHipLib() (*HipLib, error) { - h, err := windows.LoadLibrary("amdhip64.dll") + // At runtime we depend on v6, so discover GPUs with the same library for a consistent set of GPUs + h, err := windows.LoadLibrary("amdhip64_6.dll") if err != nil { - return nil, fmt.Errorf("unable to load amdhip64.dll: %w", err) + return nil, fmt.Errorf("unable to load amdhip64_6.dll, please make sure to upgrade to the latest amd driver: %w", err) } hl := &HipLib{} hl.dll = h diff --git a/gpu/amd_windows.go b/gpu/amd_windows.go index 425259d7..20aed447 100644 --- a/gpu/amd_windows.go +++ b/gpu/amd_windows.go @@ -92,7 +92,8 @@ func AMDGetGPUInfo() []RocmGPUInfo { continue } if gfxOverride == "" { - if !slices.Contains[[]string, string](supported, gfx) { + // Strip off Target Features when comparing + if !slices.Contains[[]string, string](supported, strings.Split(gfx, ":")[0]) { slog.Warn("amdgpu is not supported", "gpu", i, "gpu_type", gfx, "library", libDir, "supported_types", supported) // TODO - consider discrete markdown just for ROCM troubleshooting? slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for HSA_OVERRIDE_GFX_VERSION usage") diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1 index beb964f9..d8bce92d 100644 --- a/llm/generate/gen_windows.ps1 +++ b/llm/generate/gen_windows.ps1 @@ -7,8 +7,8 @@ function amdGPUs { return $env:AMDGPU_TARGETS } # Current supported rocblas list from ROCm v6.1.2 on windows + # https://rocm.docs.amd.com/projects/install-on-windows/en/latest/reference/system-requirements.html#windows-supported-gpus $GPU_LIST = @( - "gfx906:xnack-" "gfx1030" "gfx1100" "gfx1101" diff --git a/llm/server.go b/llm/server.go index 36c0e0b5..ba7eab03 100644 --- a/llm/server.go +++ b/llm/server.go @@ -385,8 +385,10 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr filteredEnv := []string{} for _, ev := range s.cmd.Env { if strings.HasPrefix(ev, "CUDA_") || + strings.HasPrefix(ev, "ROCR_") || strings.HasPrefix(ev, "ROCM_") || strings.HasPrefix(ev, "HIP_") || + strings.HasPrefix(ev, "GPU_") || strings.HasPrefix(ev, "HSA_") || strings.HasPrefix(ev, "GGML_") || strings.HasPrefix(ev, "PATH=") ||