Harden for zero detected GPUs

At least with the ROCm libraries, its possible to have the library
present with zero GPUs.  This fix avoids a divide by zero bug in llm.go
when we try to calculate GPU memory with zero GPUs.
This commit is contained in:
Daniel Hiltgen 2024-01-28 13:13:10 -08:00
parent e02ecfb6c8
commit f07f8b7a9e

View file

@ -135,7 +135,7 @@ func GetGPUInfo() GpuInfo {
if memInfo.err != nil { if memInfo.err != nil {
slog.Info(fmt.Sprintf("error looking up CUDA GPU memory: %s", C.GoString(memInfo.err))) slog.Info(fmt.Sprintf("error looking up CUDA GPU memory: %s", C.GoString(memInfo.err)))
C.free(unsafe.Pointer(memInfo.err)) C.free(unsafe.Pointer(memInfo.err))
} else { } else if memInfo.count > 0 {
// Verify minimum compute capability // Verify minimum compute capability
var cc C.cuda_compute_capability_t var cc C.cuda_compute_capability_t
C.cuda_compute_capability(*gpuHandles.cuda, &cc) C.cuda_compute_capability(*gpuHandles.cuda, &cc)
@ -157,7 +157,7 @@ func GetGPUInfo() GpuInfo {
} else if memInfo.igpu_index >= 0 && memInfo.count == 1 { } else if memInfo.igpu_index >= 0 && memInfo.count == 1 {
// Only one GPU detected and it appears to be an integrated GPU - skip it // Only one GPU detected and it appears to be an integrated GPU - skip it
slog.Info("ROCm unsupported integrated GPU detected") slog.Info("ROCm unsupported integrated GPU detected")
} else { } else if memInfo.count > 0 {
if memInfo.igpu_index >= 0 { if memInfo.igpu_index >= 0 {
// We have multiple GPUs reported, and one of them is an integrated GPU // We have multiple GPUs reported, and one of them is an integrated GPU
// so we have to set the env var to bypass it // so we have to set the env var to bypass it