ollama/gpu/gpu_info_cuda.c

#ifndef __APPLE__  // TODO - maybe consider nvidia support on intel macs?

#include "gpu_info_cuda.h"

#include <string.h>

#ifndef _WIN32
const char *cuda_lib_paths[] = {
    "libnvidia-ml.so",
    "/usr/local/cuda/lib64/libnvidia-ml.so",
    "/usr/lib/wsl/lib/libnvidia-ml.so.1",  // TODO Maybe glob?
    NULL,
};
#else
const char *cuda_lib_paths[] = {
    "nvml.dll",
    "",
    NULL,
};
#endif

void cuda_init(cuda_init_resp_t *resp) {
  nvmlReturn_t ret;
  resp->err = NULL;
  const int buflen = 256;
  char buf[buflen + 1];
  int i;

  struct lookup {
    char *s;
    void **p;
  } l[4] = {
      {"nvmlInit_v2", (void *)&resp->ch.initFn},
      {"nvmlShutdown", (void *)&resp->ch.shutdownFn},
      {"nvmlDeviceGetHandleByIndex", (void *)&resp->ch.getHandle},
      {"nvmlDeviceGetMemoryInfo", (void *)&resp->ch.getMemInfo},
  };

  for (i = 0; cuda_lib_paths[i] != NULL && resp->ch.handle == NULL; i++) {
    resp->ch.handle = LOAD_LIBRARY(cuda_lib_paths[i], RTLD_LAZY);
  }
  if (!resp->ch.handle) {
    snprintf(buf, buflen,
             "Unable to load %s library to query for Nvidia GPUs: %s",
             cuda_lib_paths[0], LOAD_ERR());
    resp->err = strdup(buf);
    return;
  }

  for (i = 0; i < 4; i++) {  // TODO - fix this to use a null terminated list
    *l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s);
    if (!l[i].p) {
      UNLOAD_LIBRARY(resp->ch.handle);
      resp->ch.handle = NULL;
      snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,
               LOAD_ERR());
      resp->err = strdup(buf);
      return;
    }
  }

  ret = (*resp->ch.initFn)();
  if (ret != NVML_SUCCESS) {
    snprintf(buf, buflen, "nvml vram init failure: %d", ret);
    resp->err = strdup(buf);
  }

  return;
}

void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
  resp->err = NULL;
  nvmlDevice_t device;
  nvmlMemory_t memInfo = {0};
  nvmlReturn_t ret;
  const int buflen = 256;
  char buf[buflen + 1];
  int i;

  if (h.handle == NULL) {
    resp->err = strdup("nvml handle sn't initialized");
    return;
  }

  // TODO - handle multiple GPUs
  ret = (*h.getHandle)(0, &device);
  if (ret != NVML_SUCCESS) {
    snprintf(buf, buflen, "unable to get device handle: %d", ret);
    resp->err = strdup(buf);
    return;
  }

  ret = (*h.getMemInfo)(device, &memInfo);
  if (ret != NVML_SUCCESS) {
    snprintf(buf, buflen, "device memory info lookup failure: %d", ret);
    resp->err = strdup(buf);
    return;
  }
  resp->total = memInfo.total;
  resp->free = memInfo.free;
  return;
}
#endif  // __APPLE__
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`#ifndef __APPLE__ // TODO - maybe consider nvidia support on intel macs?`

			`#include "gpu_info_cuda.h"`

			`#include <string.h>`

			`#ifndef _WIN32`
			`const char *cuda_lib_paths[] = {`
			`"libnvidia-ml.so",`
			`"/usr/local/cuda/lib64/libnvidia-ml.so",`
Add WSL2 path to nvidia-ml.so library 2023-12-16 04:16:02 +00:00			`"/usr/lib/wsl/lib/libnvidia-ml.so.1", // TODO Maybe glob?`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`NULL,`
			`};`
			`#else`
			`const char *cuda_lib_paths[] = {`
			`"nvml.dll",`
			`"",`
			`NULL,`
			`};`
			`#endif`

			`void cuda_init(cuda_init_resp_t *resp) {`
Refine build to support CPU only If someone checks out the ollama repo and doesn't install the CUDA library, this will ensure they can build a CPU only version 2023-12-14 01:26:47 +00:00			`nvmlReturn_t ret;`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`resp->err = NULL;`
			`const int buflen = 256;`
			`char buf[buflen + 1];`
			`int i;`

			`struct lookup {`
			`char *s;`
			`void **p;`
			`} l[4] = {`
			`{"nvmlInit_v2", (void *)&resp->ch.initFn},`
			`{"nvmlShutdown", (void *)&resp->ch.shutdownFn},`
			`{"nvmlDeviceGetHandleByIndex", (void *)&resp->ch.getHandle},`
			`{"nvmlDeviceGetMemoryInfo", (void *)&resp->ch.getMemInfo},`
			`};`

			`for (i = 0; cuda_lib_paths[i] != NULL && resp->ch.handle == NULL; i++) {`
			`resp->ch.handle = LOAD_LIBRARY(cuda_lib_paths[i], RTLD_LAZY);`
			`}`
			`if (!resp->ch.handle) {`
			`snprintf(buf, buflen,`
			`"Unable to load %s library to query for Nvidia GPUs: %s",`
			`cuda_lib_paths[0], LOAD_ERR());`
			`resp->err = strdup(buf);`
			`return;`
			`}`

			`for (i = 0; i < 4; i++) { // TODO - fix this to use a null terminated list`
			`*l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s);`
			`if (!l[i].p) {`
			`UNLOAD_LIBRARY(resp->ch.handle);`
			`resp->ch.handle = NULL;`
			`snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,`
			`LOAD_ERR());`
			`resp->err = strdup(buf);`
			`return;`
			`}`
			`}`
Refine build to support CPU only If someone checks out the ollama repo and doesn't install the CUDA library, this will ensure they can build a CPU only version 2023-12-14 01:26:47 +00:00
			`ret = (*resp->ch.initFn)();`
			`if (ret != NVML_SUCCESS) {`
			`snprintf(buf, buflen, "nvml vram init failure: %d", ret);`
			`resp->err = strdup(buf);`
			`}`

Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`return;`
			`}`

			`void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {`
			`resp->err = NULL;`
			`nvmlDevice_t device;`
			`nvmlMemory_t memInfo = {0};`
			`nvmlReturn_t ret;`
			`const int buflen = 256;`
			`char buf[buflen + 1];`
			`int i;`

			`if (h.handle == NULL) {`
			`resp->err = strdup("nvml handle sn't initialized");`
			`return;`
			`}`

			`// TODO - handle multiple GPUs`
			`ret = (*h.getHandle)(0, &device);`
			`if (ret != NVML_SUCCESS) {`
			`snprintf(buf, buflen, "unable to get device handle: %d", ret);`
			`resp->err = strdup(buf);`
			`return;`
			`}`

			`ret = (*h.getMemInfo)(device, &memInfo);`
			`if (ret != NVML_SUCCESS) {`
			`snprintf(buf, buflen, "device memory info lookup failure: %d", ret);`
			`resp->err = strdup(buf);`
			`return;`
			`}`
			`resp->total = memInfo.total;`
			`resp->free = memInfo.free;`
			`return;`
			`}`
			`#endif // __APPLE__`