ollama/gpu/gpu_info_cuda.h

#ifndef __APPLE__
#ifndef __GPU_INFO_CUDA_H__
#define __GPU_INFO_CUDA_H__
#include "gpu_info.h"

// Just enough typedef's to dlopen/dlsym for memory information
typedef enum nvmlReturn_enum {
  NVML_SUCCESS = 0,
  // Other values omitted for now...
} nvmlReturn_t;
typedef void *nvmlDevice_t;  // Opaque is sufficient
typedef struct nvmlMemory_st {
  unsigned long long total;
  unsigned long long free;
  unsigned long long used;
} nvmlMemory_t;

typedef struct cuda_handle {
  void *handle;
  nvmlReturn_t (*initFn)(void);
  nvmlReturn_t (*shutdownFn)(void);
  nvmlReturn_t (*getHandle)(unsigned int, nvmlDevice_t *);
  nvmlReturn_t (*getMemInfo)(nvmlDevice_t, nvmlMemory_t *);
  nvmlReturn_t (*getCount)(unsigned int *);
  nvmlReturn_t (*getComputeCapability)(nvmlDevice_t, int* major, int* minor);
} cuda_handle_t;

typedef struct cuda_init_resp {
  char *err;  // If err is non-null handle is invalid
  cuda_handle_t ch;
} cuda_init_resp_t;

typedef struct cuda_compute_capability {
  char *err;
  int major;
  int minor;
} cuda_compute_capability_t;

void cuda_init(char *cuda_lib_path, cuda_init_resp_t *resp);
void cuda_check_vram(cuda_handle_t ch, mem_info_t *resp);
void cuda_compute_capability(cuda_handle_t ch, cuda_compute_capability_t *cc);

#endif  // __GPU_INFO_CUDA_H__
#endif  // __APPLE__
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`#ifndef __APPLE__`
			`#ifndef __GPU_INFO_CUDA_H__`
			`#define __GPU_INFO_CUDA_H__`
			`#include "gpu_info.h"`

			`// Just enough typedef's to dlopen/dlsym for memory information`
			`typedef enum nvmlReturn_enum {`
			`NVML_SUCCESS = 0,`
			`// Other values omitted for now...`
			`} nvmlReturn_t;`
			`typedef void *nvmlDevice_t; // Opaque is sufficient`
			`typedef struct nvmlMemory_st {`
			`unsigned long long total;`
			`unsigned long long free;`
			`unsigned long long used;`
			`} nvmlMemory_t;`

			`typedef struct cuda_handle {`
			`void *handle;`
			`nvmlReturn_t (*initFn)(void);`
			`nvmlReturn_t (*shutdownFn)(void);`
			`nvmlReturn_t (getHandle)(unsigned int, nvmlDevice_t );`
			`nvmlReturn_t (getMemInfo)(nvmlDevice_t, nvmlMemory_t );`
gpu: read memory info from all cuda devices (#1802) * gpu: read memory info from all cuda devices * add `LOOKUP_SIZE` constant * better constant name * address comments 2024-01-05 16:25:58 +00:00			`nvmlReturn_t (getCount)(unsigned int );`
Detect very old CUDA GPUs and fall back to CPU If we try to load the CUDA library on an old GPU, it panics and crashes the server. This checks the compute capability before we load the library so we can gracefully fall back to CPU mode. 2024-01-07 05:40:04 +00:00			`nvmlReturn_t (getComputeCapability)(nvmlDevice_t, int major, int* minor);`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`} cuda_handle_t;`

			`typedef struct cuda_init_resp {`
			`char *err; // If err is non-null handle is invalid`
			`cuda_handle_t ch;`
			`} cuda_init_resp_t;`

Detect very old CUDA GPUs and fall back to CPU If we try to load the CUDA library on an old GPU, it panics and crashes the server. This checks the compute capability before we load the library so we can gracefully fall back to CPU mode. 2024-01-07 05:40:04 +00:00			`typedef struct cuda_compute_capability {`
			`char *err;`
			`int major;`
			`int minor;`
			`} cuda_compute_capability_t;`

Harden GPU mgmt library lookup When there are multiple management libraries installed on a system not every one will be compatible with the current driver. This change improves our management library algorithm to build up a set of discovered libraries based on glob patterns, and then try all of them until we're able to load one without error. 2024-01-10 22:39:51 +00:00			`void cuda_init(char cuda_lib_path, cuda_init_resp_t resp);`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00			`void cuda_check_vram(cuda_handle_t ch, mem_info_t *resp);`
Detect very old CUDA GPUs and fall back to CPU If we try to load the CUDA library on an old GPU, it panics and crashes the server. This checks the compute capability before we load the library so we can gracefully fall back to CPU mode. 2024-01-07 05:40:04 +00:00			`void cuda_compute_capability(cuda_handle_t ch, cuda_compute_capability_t *cc);`
Adapted rocm support to cgo based llama.cpp 2023-11-29 19:00:37 +00:00
			`#endif // __GPU_INFO_CUDA_H__`
			`#endif // __APPLE__`