From da3bf2335483fac7cbfff72a1b80e40988f4f35a Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Fri, 31 May 2024 16:15:21 -0700 Subject: [PATCH] Workaround gfx900 SDMA bugs Implement support for GPU env var workarounds, and leverage this for the Vega RX 56 which needs HSA_ENABLE_SDMA=0 set to work properly --- gpu/amd_linux.go | 5 +++++ gpu/types.go | 3 +++ llm/server.go | 10 ++++++++++ 3 files changed, 18 insertions(+) diff --git a/gpu/amd_linux.go b/gpu/amd_linux.go index 61e6a059..7637c776 100644 --- a/gpu/amd_linux.go +++ b/gpu/amd_linux.go @@ -332,6 +332,11 @@ func AMDGetGPUInfo() []RocmGPUInfo { slog.Info("skipping rocm gfx compatibility check", "HSA_OVERRIDE_GFX_VERSION", gfxOverride) } + // Check for env var workarounds + if name == "1002:687f" { // Vega RX 56 + gpuInfo.EnvWorkarounds = append(gpuInfo.EnvWorkarounds, [2]string{"HSA_ENABLE_SDMA", "0"}) + } + // The GPU has passed all the verification steps and is supported resp = append(resp, gpuInfo) } diff --git a/gpu/types.go b/gpu/types.go index 47355959..9920db5f 100644 --- a/gpu/types.go +++ b/gpu/types.go @@ -26,6 +26,9 @@ type GpuInfo struct { // Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly DependencyPath string `json:"lib_path,omitempty"` + // Extra environment variables specific to the GPU as list of [key,value] + EnvWorkarounds [][2]string `json:"envs,omitempty"` + // GPU information ID string `json:"gpu_id"` // string to use for selection of this specific GPU Name string `json:"name"` // user friendly name if available diff --git a/llm/server.go b/llm/server.go index 6313fc32..117565ba 100644 --- a/llm/server.go +++ b/llm/server.go @@ -320,6 +320,10 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr s.cmd.Stdout = os.Stdout s.cmd.Stderr = s.status + envWorkarounds := [][2]string{} + for _, gpu := range gpus { + envWorkarounds = append(envWorkarounds, gpu.EnvWorkarounds...) + } visibleDevicesEnv, visibleDevicesEnvVal := gpus.GetVisibleDevicesEnv() pathEnvVal := strings.Join(libraryPaths, string(filepath.ListSeparator)) @@ -334,6 +338,12 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr } else if devicesNeeded && strings.EqualFold(cmp[0], visibleDevicesEnv) { s.cmd.Env[i] = visibleDevicesEnv + "=" + visibleDevicesEnvVal devicesNeeded = false + } else if len(envWorkarounds) != 0 { + for _, kv := range envWorkarounds { + if strings.EqualFold(cmp[0], kv[0]) { + s.cmd.Env[i] = kv[0] + "=" + kv[1] + } + } } } if pathNeeded {