From f07f8b7a9ed8cd8c07860a5b7852702ef9737429 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Sun, 28 Jan 2024 13:13:10 -0800 Subject: [PATCH] Harden for zero detected GPUs At least with the ROCm libraries, its possible to have the library present with zero GPUs. This fix avoids a divide by zero bug in llm.go when we try to calculate GPU memory with zero GPUs. --- gpu/gpu.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gpu/gpu.go b/gpu/gpu.go index 550467a3..e083ccc1 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -135,7 +135,7 @@ func GetGPUInfo() GpuInfo { if memInfo.err != nil { slog.Info(fmt.Sprintf("error looking up CUDA GPU memory: %s", C.GoString(memInfo.err))) C.free(unsafe.Pointer(memInfo.err)) - } else { + } else if memInfo.count > 0 { // Verify minimum compute capability var cc C.cuda_compute_capability_t C.cuda_compute_capability(*gpuHandles.cuda, &cc) @@ -157,7 +157,7 @@ func GetGPUInfo() GpuInfo { } else if memInfo.igpu_index >= 0 && memInfo.count == 1 { // Only one GPU detected and it appears to be an integrated GPU - skip it slog.Info("ROCm unsupported integrated GPU detected") - } else { + } else if memInfo.count > 0 { if memInfo.igpu_index >= 0 { // We have multiple GPUs reported, and one of them is an integrated GPU // so we have to set the env var to bypass it