2024-03-25 11:07:44 -04:00
|
|
|
#ifndef __APPLE__ // TODO - maybe consider nvidia support on intel macs?
|
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
#include "gpu_info_cudart.h"
|
|
|
|
|
|
|
|
void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) {
|
|
|
|
cudartReturn_t ret;
|
|
|
|
resp->err = NULL;
|
|
|
|
const int buflen = 256;
|
|
|
|
char buf[buflen + 1];
|
|
|
|
int i;
|
|
|
|
|
|
|
|
struct lookup {
|
|
|
|
char *s;
|
|
|
|
void **p;
|
|
|
|
} l[] = {
|
|
|
|
{"cudaSetDevice", (void *)&resp->ch.cudaSetDevice},
|
|
|
|
{"cudaDeviceSynchronize", (void *)&resp->ch.cudaDeviceSynchronize},
|
|
|
|
{"cudaDeviceReset", (void *)&resp->ch.cudaDeviceReset},
|
|
|
|
{"cudaMemGetInfo", (void *)&resp->ch.cudaMemGetInfo},
|
|
|
|
{"cudaGetDeviceCount", (void *)&resp->ch.cudaGetDeviceCount},
|
|
|
|
{"cudaDeviceGetAttribute", (void *)&resp->ch.cudaDeviceGetAttribute},
|
|
|
|
{"cudaDriverGetVersion", (void *)&resp->ch.cudaDriverGetVersion},
|
|
|
|
{NULL, NULL},
|
|
|
|
};
|
|
|
|
|
|
|
|
resp->ch.handle = LOAD_LIBRARY(cudart_lib_path, RTLD_LAZY);
|
|
|
|
if (!resp->ch.handle) {
|
|
|
|
char *msg = LOAD_ERR();
|
|
|
|
LOG(resp->ch.verbose, "library %s load err: %s\n", cudart_lib_path, msg);
|
|
|
|
snprintf(buf, buflen,
|
|
|
|
"Unable to load %s library to query for Nvidia GPUs: %s",
|
|
|
|
cudart_lib_path, msg);
|
|
|
|
free(msg);
|
|
|
|
resp->err = strdup(buf);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO once we've squashed the remaining corner cases remove this log
|
|
|
|
LOG(resp->ch.verbose, "wiring cudart library functions in %s\n", cudart_lib_path);
|
|
|
|
|
|
|
|
for (i = 0; l[i].s != NULL; i++) {
|
|
|
|
// TODO once we've squashed the remaining corner cases remove this log
|
|
|
|
LOG(resp->ch.verbose, "dlsym: %s\n", l[i].s);
|
|
|
|
|
|
|
|
*l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s);
|
|
|
|
if (!l[i].p) {
|
|
|
|
char *msg = LOAD_ERR();
|
|
|
|
LOG(resp->ch.verbose, "dlerr: %s\n", msg);
|
|
|
|
UNLOAD_LIBRARY(resp->ch.handle);
|
|
|
|
resp->ch.handle = NULL;
|
|
|
|
snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,
|
|
|
|
msg);
|
|
|
|
free(msg);
|
|
|
|
resp->err = strdup(buf);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = (*resp->ch.cudaSetDevice)(0);
|
|
|
|
if (ret != CUDART_SUCCESS) {
|
|
|
|
LOG(resp->ch.verbose, "cudaSetDevice err: %d\n", ret);
|
|
|
|
UNLOAD_LIBRARY(resp->ch.handle);
|
|
|
|
resp->ch.handle = NULL;
|
2024-03-28 09:27:17 -07:00
|
|
|
if (ret == CUDA_ERROR_INSUFFICIENT_DRIVER) {
|
|
|
|
resp->err = strdup("your nvidia driver is too old or missing, please upgrade to run ollama");
|
|
|
|
return;
|
|
|
|
}
|
2024-03-25 11:07:44 -04:00
|
|
|
snprintf(buf, buflen, "cudart init failure: %d", ret);
|
|
|
|
resp->err = strdup(buf);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
int version = 0;
|
|
|
|
cudartDriverVersion_t driverVersion;
|
|
|
|
driverVersion.major = 0;
|
|
|
|
driverVersion.minor = 0;
|
|
|
|
|
|
|
|
// Report driver version if we're in verbose mode, ignore errors
|
|
|
|
ret = (*resp->ch.cudaDriverGetVersion)(&version);
|
|
|
|
if (ret != CUDART_SUCCESS) {
|
|
|
|
LOG(resp->ch.verbose, "cudaDriverGetVersion failed: %d\n", ret);
|
|
|
|
} else {
|
|
|
|
driverVersion.major = version / 1000;
|
|
|
|
driverVersion.minor = (version - (driverVersion.major * 1000)) / 10;
|
|
|
|
LOG(resp->ch.verbose, "CUDA driver version: %d-%d\n", driverVersion.major, driverVersion.minor);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void cudart_check_vram(cudart_handle_t h, mem_info_t *resp) {
|
|
|
|
resp->err = NULL;
|
|
|
|
cudartMemory_t memInfo = {0,0,0};
|
|
|
|
cudartReturn_t ret;
|
|
|
|
const int buflen = 256;
|
|
|
|
char buf[buflen + 1];
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (h.handle == NULL) {
|
|
|
|
resp->err = strdup("cudart handle isn't initialized");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// cudaGetDeviceCount takes int type, resp-> count is uint
|
|
|
|
int deviceCount;
|
|
|
|
ret = (*h.cudaGetDeviceCount)(&deviceCount);
|
|
|
|
if (ret != CUDART_SUCCESS) {
|
|
|
|
snprintf(buf, buflen, "unable to get device count: %d", ret);
|
|
|
|
resp->err = strdup(buf);
|
|
|
|
return;
|
|
|
|
} else {
|
|
|
|
resp->count = (unsigned int)deviceCount;
|
|
|
|
}
|
|
|
|
|
|
|
|
resp->total = 0;
|
|
|
|
resp->free = 0;
|
|
|
|
for (i = 0; i < resp-> count; i++) {
|
|
|
|
ret = (*h.cudaSetDevice)(i);
|
|
|
|
if (ret != CUDART_SUCCESS) {
|
|
|
|
snprintf(buf, buflen, "cudart device failed to initialize");
|
|
|
|
resp->err = strdup(buf);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
ret = (*h.cudaMemGetInfo)(&memInfo.free, &memInfo.total);
|
|
|
|
if (ret != CUDART_SUCCESS) {
|
|
|
|
snprintf(buf, buflen, "cudart device memory info lookup failure %d", ret);
|
|
|
|
resp->err = strdup(buf);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
LOG(h.verbose, "[%d] CUDA totalMem %lu\n", i, memInfo.total);
|
|
|
|
LOG(h.verbose, "[%d] CUDA freeMem %lu\n", i, memInfo.free);
|
|
|
|
|
|
|
|
resp->total += memInfo.total;
|
|
|
|
resp->free += memInfo.free;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void cudart_compute_capability(cudart_handle_t h, cudart_compute_capability_t *resp) {
|
|
|
|
resp->err = NULL;
|
|
|
|
resp->major = 0;
|
|
|
|
resp->minor = 0;
|
|
|
|
int major = 0;
|
|
|
|
int minor = 0;
|
|
|
|
cudartReturn_t ret;
|
|
|
|
const int buflen = 256;
|
|
|
|
char buf[buflen + 1];
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (h.handle == NULL) {
|
|
|
|
resp->err = strdup("cudart handle not initialized");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
int devices;
|
|
|
|
ret = (*h.cudaGetDeviceCount)(&devices);
|
|
|
|
if (ret != CUDART_SUCCESS) {
|
|
|
|
snprintf(buf, buflen, "unable to get cudart device count: %d", ret);
|
|
|
|
resp->err = strdup(buf);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < devices; i++) {
|
|
|
|
ret = (*h.cudaSetDevice)(i);
|
|
|
|
if (ret != CUDART_SUCCESS) {
|
|
|
|
snprintf(buf, buflen, "cudart device failed to initialize");
|
|
|
|
resp->err = strdup(buf);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = (*h.cudaDeviceGetAttribute)(&major, cudartDevAttrComputeCapabilityMajor, i);
|
|
|
|
if (ret != CUDART_SUCCESS) {
|
|
|
|
snprintf(buf, buflen, "device compute capability lookup failure %d: %d", i, ret);
|
|
|
|
resp->err = strdup(buf);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
ret = (*h.cudaDeviceGetAttribute)(&minor, cudartDevAttrComputeCapabilityMinor, i);
|
|
|
|
if (ret != CUDART_SUCCESS) {
|
|
|
|
snprintf(buf, buflen, "device compute capability lookup failure %d: %d", i, ret);
|
|
|
|
resp->err = strdup(buf);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Report the lowest major.minor we detect as that limits our compatibility
|
|
|
|
if (resp->major == 0 || resp->major > major ) {
|
|
|
|
resp->major = major;
|
|
|
|
resp->minor = minor;
|
|
|
|
} else if ( resp->major == major && resp->minor > minor ) {
|
|
|
|
resp->minor = minor;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-30 15:34:21 -07:00
|
|
|
void cudart_release(cudart_handle_t h) {
|
|
|
|
LOG(h.verbose, "releasing cudart library\n");
|
|
|
|
UNLOAD_LIBRARY(h.handle);
|
|
|
|
h.handle = NULL;
|
|
|
|
}
|
|
|
|
|
2024-03-25 11:07:44 -04:00
|
|
|
#endif // __APPLE__
|