diff --git a/gpu/gpu.go b/gpu/gpu.go index b9f6e4e0..45b55ffb 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -28,6 +28,9 @@ type handles struct { var gpuMutex sync.Mutex var gpuHandles *handles = nil +// TODO verify this is the correct min version +const CudaComputeMajorMin = 5 + // Note: gpuMutex must already be held func initGPUHandles() { // TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing @@ -73,7 +76,18 @@ func GetGPUInfo() GpuInfo { log.Printf("error looking up CUDA GPU memory: %s", C.GoString(memInfo.err)) C.free(unsafe.Pointer(memInfo.err)) } else { - resp.Library = "cuda" + // Verify minimum compute capability + var cc C.cuda_compute_capability_t + C.cuda_compute_capability(*gpuHandles.cuda, &cc) + if cc.err != nil { + log.Printf("error looking up CUDA GPU compute capability: %s", C.GoString(cc.err)) + C.free(unsafe.Pointer(cc.err)) + } else if cc.major >= CudaComputeMajorMin { + log.Printf("CUDA Compute Capability detected: %d.%d", cc.major, cc.minor) + resp.Library = "cuda" + } else { + log.Printf("CUDA GPU is too old. Falling back to CPU mode. Compute Capability detected: %d.%d", cc.major, cc.minor) + } } } else if gpuHandles.rocm != nil { C.rocm_check_vram(*gpuHandles.rocm, &memInfo) diff --git a/gpu/gpu_info_cuda.c b/gpu/gpu_info_cuda.c index f071f909..9dc97bd9 100644 --- a/gpu/gpu_info_cuda.c +++ b/gpu/gpu_info_cuda.c @@ -21,7 +21,7 @@ const char *cuda_lib_paths[] = { }; #endif -#define CUDA_LOOKUP_SIZE 5 +#define CUDA_LOOKUP_SIZE 6 void cuda_init(cuda_init_resp_t *resp) { nvmlReturn_t ret; @@ -39,6 +39,7 @@ void cuda_init(cuda_init_resp_t *resp) { {"nvmlDeviceGetHandleByIndex", (void *)&resp->ch.getHandle}, {"nvmlDeviceGetMemoryInfo", (void *)&resp->ch.getMemInfo}, {"nvmlDeviceGetCount_v2", (void *)&resp->ch.getCount}, + {"nvmlDeviceGetCudaComputeCapability", (void *)&resp->ch.getComputeCapability}, }; for (i = 0; cuda_lib_paths[i] != NULL && resp->ch.handle == NULL; i++) { @@ -123,4 +124,53 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) { resp->free += memInfo.free; } } + +void cuda_compute_capability(cuda_handle_t h, cuda_compute_capability_t *resp) { + resp->err = NULL; + resp->major = 0; + resp->minor = 0; + nvmlDevice_t device; + int major = 0; + int minor = 0; + nvmlReturn_t ret; + const int buflen = 256; + char buf[buflen + 1]; + int i; + + if (h.handle == NULL) { + resp->err = strdup("nvml handle not initialized"); + return; + } + + unsigned int devices; + ret = (*h.getCount)(&devices); + if (ret != NVML_SUCCESS) { + snprintf(buf, buflen, "unable to get device count: %d", ret); + resp->err = strdup(buf); + return; + } + + for (i = 0; i < devices; i++) { + ret = (*h.getHandle)(i, &device); + if (ret != NVML_SUCCESS) { + snprintf(buf, buflen, "unable to get device handle %d: %d", i, ret); + resp->err = strdup(buf); + return; + } + + ret = (*h.getComputeCapability)(device, &major, &minor); + if (ret != NVML_SUCCESS) { + snprintf(buf, buflen, "device compute capability lookup failure %d: %d", i, ret); + resp->err = strdup(buf); + return; + } + // Report the lowest major.minor we detect as that limits our compatibility + if (resp->major == 0 || resp->major > major ) { + resp->major = major; + resp->minor = minor; + } else if ( resp->major == major && resp->minor > minor ) { + resp->minor = minor; + } + } +} #endif // __APPLE__ \ No newline at end of file diff --git a/gpu/gpu_info_cuda.h b/gpu/gpu_info_cuda.h index 9a66a735..81995ab2 100644 --- a/gpu/gpu_info_cuda.h +++ b/gpu/gpu_info_cuda.h @@ -22,6 +22,7 @@ typedef struct cuda_handle { nvmlReturn_t (*getHandle)(unsigned int, nvmlDevice_t *); nvmlReturn_t (*getMemInfo)(nvmlDevice_t, nvmlMemory_t *); nvmlReturn_t (*getCount)(unsigned int *); + nvmlReturn_t (*getComputeCapability)(nvmlDevice_t, int* major, int* minor); } cuda_handle_t; typedef struct cuda_init_resp { @@ -29,8 +30,15 @@ typedef struct cuda_init_resp { cuda_handle_t ch; } cuda_init_resp_t; +typedef struct cuda_compute_capability { + char *err; + int major; + int minor; +} cuda_compute_capability_t; + void cuda_init(cuda_init_resp_t *resp); void cuda_check_vram(cuda_handle_t ch, mem_info_t *resp); +void cuda_compute_capability(cuda_handle_t ch, cuda_compute_capability_t *cc); #endif // __GPU_INFO_CUDA_H__ #endif // __APPLE__ \ No newline at end of file