Merge pull request #1834 from dhiltgen/old_cuda
Detect very old CUDA GPUs and fall back to CPU
This commit is contained in:
commit
059ae4585e
3 changed files with 74 additions and 2 deletions
16
gpu/gpu.go
16
gpu/gpu.go
|
@ -28,6 +28,9 @@ type handles struct {
|
||||||
var gpuMutex sync.Mutex
|
var gpuMutex sync.Mutex
|
||||||
var gpuHandles *handles = nil
|
var gpuHandles *handles = nil
|
||||||
|
|
||||||
|
// TODO verify this is the correct min version
|
||||||
|
const CudaComputeMajorMin = 5
|
||||||
|
|
||||||
// Note: gpuMutex must already be held
|
// Note: gpuMutex must already be held
|
||||||
func initGPUHandles() {
|
func initGPUHandles() {
|
||||||
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
|
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
|
||||||
|
@ -73,7 +76,18 @@ func GetGPUInfo() GpuInfo {
|
||||||
log.Printf("error looking up CUDA GPU memory: %s", C.GoString(memInfo.err))
|
log.Printf("error looking up CUDA GPU memory: %s", C.GoString(memInfo.err))
|
||||||
C.free(unsafe.Pointer(memInfo.err))
|
C.free(unsafe.Pointer(memInfo.err))
|
||||||
} else {
|
} else {
|
||||||
resp.Library = "cuda"
|
// Verify minimum compute capability
|
||||||
|
var cc C.cuda_compute_capability_t
|
||||||
|
C.cuda_compute_capability(*gpuHandles.cuda, &cc)
|
||||||
|
if cc.err != nil {
|
||||||
|
log.Printf("error looking up CUDA GPU compute capability: %s", C.GoString(cc.err))
|
||||||
|
C.free(unsafe.Pointer(cc.err))
|
||||||
|
} else if cc.major >= CudaComputeMajorMin {
|
||||||
|
log.Printf("CUDA Compute Capability detected: %d.%d", cc.major, cc.minor)
|
||||||
|
resp.Library = "cuda"
|
||||||
|
} else {
|
||||||
|
log.Printf("CUDA GPU is too old. Falling back to CPU mode. Compute Capability detected: %d.%d", cc.major, cc.minor)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else if gpuHandles.rocm != nil {
|
} else if gpuHandles.rocm != nil {
|
||||||
C.rocm_check_vram(*gpuHandles.rocm, &memInfo)
|
C.rocm_check_vram(*gpuHandles.rocm, &memInfo)
|
||||||
|
|
|
@ -21,7 +21,7 @@ const char *cuda_lib_paths[] = {
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define CUDA_LOOKUP_SIZE 5
|
#define CUDA_LOOKUP_SIZE 6
|
||||||
|
|
||||||
void cuda_init(cuda_init_resp_t *resp) {
|
void cuda_init(cuda_init_resp_t *resp) {
|
||||||
nvmlReturn_t ret;
|
nvmlReturn_t ret;
|
||||||
|
@ -39,6 +39,7 @@ void cuda_init(cuda_init_resp_t *resp) {
|
||||||
{"nvmlDeviceGetHandleByIndex", (void *)&resp->ch.getHandle},
|
{"nvmlDeviceGetHandleByIndex", (void *)&resp->ch.getHandle},
|
||||||
{"nvmlDeviceGetMemoryInfo", (void *)&resp->ch.getMemInfo},
|
{"nvmlDeviceGetMemoryInfo", (void *)&resp->ch.getMemInfo},
|
||||||
{"nvmlDeviceGetCount_v2", (void *)&resp->ch.getCount},
|
{"nvmlDeviceGetCount_v2", (void *)&resp->ch.getCount},
|
||||||
|
{"nvmlDeviceGetCudaComputeCapability", (void *)&resp->ch.getComputeCapability},
|
||||||
};
|
};
|
||||||
|
|
||||||
for (i = 0; cuda_lib_paths[i] != NULL && resp->ch.handle == NULL; i++) {
|
for (i = 0; cuda_lib_paths[i] != NULL && resp->ch.handle == NULL; i++) {
|
||||||
|
@ -123,4 +124,53 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
|
||||||
resp->free += memInfo.free;
|
resp->free += memInfo.free;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void cuda_compute_capability(cuda_handle_t h, cuda_compute_capability_t *resp) {
|
||||||
|
resp->err = NULL;
|
||||||
|
resp->major = 0;
|
||||||
|
resp->minor = 0;
|
||||||
|
nvmlDevice_t device;
|
||||||
|
int major = 0;
|
||||||
|
int minor = 0;
|
||||||
|
nvmlReturn_t ret;
|
||||||
|
const int buflen = 256;
|
||||||
|
char buf[buflen + 1];
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (h.handle == NULL) {
|
||||||
|
resp->err = strdup("nvml handle not initialized");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int devices;
|
||||||
|
ret = (*h.getCount)(&devices);
|
||||||
|
if (ret != NVML_SUCCESS) {
|
||||||
|
snprintf(buf, buflen, "unable to get device count: %d", ret);
|
||||||
|
resp->err = strdup(buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < devices; i++) {
|
||||||
|
ret = (*h.getHandle)(i, &device);
|
||||||
|
if (ret != NVML_SUCCESS) {
|
||||||
|
snprintf(buf, buflen, "unable to get device handle %d: %d", i, ret);
|
||||||
|
resp->err = strdup(buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = (*h.getComputeCapability)(device, &major, &minor);
|
||||||
|
if (ret != NVML_SUCCESS) {
|
||||||
|
snprintf(buf, buflen, "device compute capability lookup failure %d: %d", i, ret);
|
||||||
|
resp->err = strdup(buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Report the lowest major.minor we detect as that limits our compatibility
|
||||||
|
if (resp->major == 0 || resp->major > major ) {
|
||||||
|
resp->major = major;
|
||||||
|
resp->minor = minor;
|
||||||
|
} else if ( resp->major == major && resp->minor > minor ) {
|
||||||
|
resp->minor = minor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif // __APPLE__
|
#endif // __APPLE__
|
|
@ -22,6 +22,7 @@ typedef struct cuda_handle {
|
||||||
nvmlReturn_t (*getHandle)(unsigned int, nvmlDevice_t *);
|
nvmlReturn_t (*getHandle)(unsigned int, nvmlDevice_t *);
|
||||||
nvmlReturn_t (*getMemInfo)(nvmlDevice_t, nvmlMemory_t *);
|
nvmlReturn_t (*getMemInfo)(nvmlDevice_t, nvmlMemory_t *);
|
||||||
nvmlReturn_t (*getCount)(unsigned int *);
|
nvmlReturn_t (*getCount)(unsigned int *);
|
||||||
|
nvmlReturn_t (*getComputeCapability)(nvmlDevice_t, int* major, int* minor);
|
||||||
} cuda_handle_t;
|
} cuda_handle_t;
|
||||||
|
|
||||||
typedef struct cuda_init_resp {
|
typedef struct cuda_init_resp {
|
||||||
|
@ -29,8 +30,15 @@ typedef struct cuda_init_resp {
|
||||||
cuda_handle_t ch;
|
cuda_handle_t ch;
|
||||||
} cuda_init_resp_t;
|
} cuda_init_resp_t;
|
||||||
|
|
||||||
|
typedef struct cuda_compute_capability {
|
||||||
|
char *err;
|
||||||
|
int major;
|
||||||
|
int minor;
|
||||||
|
} cuda_compute_capability_t;
|
||||||
|
|
||||||
void cuda_init(cuda_init_resp_t *resp);
|
void cuda_init(cuda_init_resp_t *resp);
|
||||||
void cuda_check_vram(cuda_handle_t ch, mem_info_t *resp);
|
void cuda_check_vram(cuda_handle_t ch, mem_info_t *resp);
|
||||||
|
void cuda_compute_capability(cuda_handle_t ch, cuda_compute_capability_t *cc);
|
||||||
|
|
||||||
#endif // __GPU_INFO_CUDA_H__
|
#endif // __GPU_INFO_CUDA_H__
|
||||||
#endif // __APPLE__
|
#endif // __APPLE__
|
Loading…
Add table
Reference in a new issue