Reintroduce nvidia nvml library for windows
This library will give us the most reliable free VRAM reporting on windows to enable concurrent model scheduling.
This commit is contained in:
parent
4e2b7e181d
commit
434dfe30c5
8 changed files with 248 additions and 9 deletions
79
gpu/gpu.go
79
gpu/gpu.go
|
@ -28,6 +28,7 @@ type cudaHandles struct {
|
||||||
deviceCount int
|
deviceCount int
|
||||||
cudart *C.cudart_handle_t
|
cudart *C.cudart_handle_t
|
||||||
nvcuda *C.nvcuda_handle_t
|
nvcuda *C.nvcuda_handle_t
|
||||||
|
nvml *C.nvml_handle_t
|
||||||
}
|
}
|
||||||
|
|
||||||
type oneapiHandles struct {
|
type oneapiHandles struct {
|
||||||
|
@ -50,6 +51,7 @@ var (
|
||||||
nvcudaLibPath string
|
nvcudaLibPath string
|
||||||
cudartLibPath string
|
cudartLibPath string
|
||||||
oneapiLibPath string
|
oneapiLibPath string
|
||||||
|
nvmlLibPath string
|
||||||
rocmGPUs []RocmGPUInfo
|
rocmGPUs []RocmGPUInfo
|
||||||
oneapiGPUs []OneapiGPUInfo
|
oneapiGPUs []OneapiGPUInfo
|
||||||
)
|
)
|
||||||
|
@ -81,6 +83,10 @@ var CudartWindowsGlobs = []string{
|
||||||
"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
|
"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var NvmlWindowsGlobs = []string{
|
||||||
|
"c:\\Windows\\System32\\nvml.dll",
|
||||||
|
}
|
||||||
|
|
||||||
var NvcudaLinuxGlobs = []string{
|
var NvcudaLinuxGlobs = []string{
|
||||||
"/usr/local/cuda*/targets/*/lib/libcuda.so*",
|
"/usr/local/cuda*/targets/*/lib/libcuda.so*",
|
||||||
"/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*",
|
"/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*",
|
||||||
|
@ -117,6 +123,10 @@ func initCudaHandles() *cudaHandles {
|
||||||
|
|
||||||
cHandles := &cudaHandles{}
|
cHandles := &cudaHandles{}
|
||||||
// Short Circuit if we already know which library to use
|
// Short Circuit if we already know which library to use
|
||||||
|
if nvmlLibPath != "" {
|
||||||
|
cHandles.nvml, _ = LoadNVMLMgmt([]string{nvmlLibPath})
|
||||||
|
return cHandles
|
||||||
|
}
|
||||||
if nvcudaLibPath != "" {
|
if nvcudaLibPath != "" {
|
||||||
cHandles.deviceCount, cHandles.nvcuda, _ = LoadNVCUDAMgmt([]string{nvcudaLibPath})
|
cHandles.deviceCount, cHandles.nvcuda, _ = LoadNVCUDAMgmt([]string{nvcudaLibPath})
|
||||||
return cHandles
|
return cHandles
|
||||||
|
@ -131,6 +141,8 @@ func initCudaHandles() *cudaHandles {
|
||||||
var cudartMgmtPatterns []string
|
var cudartMgmtPatterns []string
|
||||||
var nvcudaMgmtName string
|
var nvcudaMgmtName string
|
||||||
var nvcudaMgmtPatterns []string
|
var nvcudaMgmtPatterns []string
|
||||||
|
var nvmlMgmtName string
|
||||||
|
var nvmlMgmtPatterns []string
|
||||||
|
|
||||||
tmpDir, _ := PayloadsDir()
|
tmpDir, _ := PayloadsDir()
|
||||||
switch runtime.GOOS {
|
switch runtime.GOOS {
|
||||||
|
@ -142,6 +154,12 @@ func initCudaHandles() *cudaHandles {
|
||||||
// Aligned with driver, we can't carry as payloads
|
// Aligned with driver, we can't carry as payloads
|
||||||
nvcudaMgmtName = "nvcuda.dll"
|
nvcudaMgmtName = "nvcuda.dll"
|
||||||
nvcudaMgmtPatterns = NvcudaWindowsGlobs
|
nvcudaMgmtPatterns = NvcudaWindowsGlobs
|
||||||
|
|
||||||
|
// Use nvml to refresh free memory on windows only
|
||||||
|
nvmlMgmtName = "nvml.dll"
|
||||||
|
nvmlMgmtPatterns = make([]string, len(NvmlWindowsGlobs))
|
||||||
|
copy(nvmlMgmtPatterns, NvmlWindowsGlobs)
|
||||||
|
|
||||||
case "linux":
|
case "linux":
|
||||||
cudartMgmtName = "libcudart.so*"
|
cudartMgmtName = "libcudart.so*"
|
||||||
if tmpDir != "" {
|
if tmpDir != "" {
|
||||||
|
@ -152,10 +170,24 @@ func initCudaHandles() *cudaHandles {
|
||||||
// Aligned with driver, we can't carry as payloads
|
// Aligned with driver, we can't carry as payloads
|
||||||
nvcudaMgmtName = "libcuda.so*"
|
nvcudaMgmtName = "libcuda.so*"
|
||||||
nvcudaMgmtPatterns = NvcudaLinuxGlobs
|
nvcudaMgmtPatterns = NvcudaLinuxGlobs
|
||||||
|
|
||||||
|
// nvml omitted on linux
|
||||||
default:
|
default:
|
||||||
return cHandles
|
return cHandles
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if len(nvmlMgmtPatterns) > 0 {
|
||||||
|
nvmlLibPaths := FindGPULibs(nvmlMgmtName, nvmlMgmtPatterns)
|
||||||
|
if len(nvmlLibPaths) > 0 {
|
||||||
|
nvml, libPath := LoadNVMLMgmt(nvmlLibPaths)
|
||||||
|
if nvml != nil {
|
||||||
|
slog.Debug("nvidia-ml loaded", "library", libPath)
|
||||||
|
cHandles.nvml = nvml
|
||||||
|
nvmlLibPath = libPath
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
|
nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
|
||||||
if len(nvcudaLibPaths) > 0 {
|
if len(nvcudaLibPaths) > 0 {
|
||||||
deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
|
deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
|
||||||
|
@ -230,6 +262,9 @@ func GetGPUInfo() GpuInfoList {
|
||||||
if cHandles.nvcuda != nil {
|
if cHandles.nvcuda != nil {
|
||||||
C.nvcuda_release(*cHandles.nvcuda)
|
C.nvcuda_release(*cHandles.nvcuda)
|
||||||
}
|
}
|
||||||
|
if cHandles.nvml != nil {
|
||||||
|
C.nvml_release(*cHandles.nvml)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if oHandles != nil {
|
if oHandles != nil {
|
||||||
if oHandles.oneapi != nil {
|
if oHandles.oneapi != nil {
|
||||||
|
@ -365,10 +400,17 @@ func GetGPUInfo() GpuInfoList {
|
||||||
cHandles = initCudaHandles()
|
cHandles = initCudaHandles()
|
||||||
}
|
}
|
||||||
for i, gpu := range cudaGPUs {
|
for i, gpu := range cudaGPUs {
|
||||||
if cHandles.cudart != nil {
|
if cHandles.nvml != nil {
|
||||||
|
C.nvml_get_free(*cHandles.nvml, C.int(gpu.index), &memInfo.free, &memInfo.total, &memInfo.used)
|
||||||
|
} else if cHandles.cudart != nil {
|
||||||
C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
|
C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
|
||||||
|
} else if cHandles.nvcuda != nil {
|
||||||
|
C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
|
||||||
|
memInfo.used = memInfo.total - memInfo.free
|
||||||
} else {
|
} else {
|
||||||
C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free)
|
// shouldn't happen
|
||||||
|
slog.Warn("no valid cuda library loaded to refresh vram usage")
|
||||||
|
break
|
||||||
}
|
}
|
||||||
if memInfo.err != nil {
|
if memInfo.err != nil {
|
||||||
slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
|
slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
|
||||||
|
@ -379,7 +421,21 @@ func GetGPUInfo() GpuInfoList {
|
||||||
slog.Warn("error looking up nvidia GPU memory")
|
slog.Warn("error looking up nvidia GPU memory")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
slog.Debug("updating cuda free memory", "gpu", gpu.ID, "name", gpu.Name, "before", format.HumanBytes2(gpu.FreeMemory), "now", format.HumanBytes2(uint64(memInfo.free)))
|
slog.Debug("updating cuda memory data",
|
||||||
|
"gpu", gpu.ID,
|
||||||
|
"name", gpu.Name,
|
||||||
|
slog.Group(
|
||||||
|
"before",
|
||||||
|
"total", format.HumanBytes2(gpu.TotalMemory),
|
||||||
|
"free", format.HumanBytes2(gpu.FreeMemory),
|
||||||
|
),
|
||||||
|
slog.Group(
|
||||||
|
"now",
|
||||||
|
"total", format.HumanBytes2(uint64(memInfo.total)),
|
||||||
|
"free", format.HumanBytes2(uint64(memInfo.free)),
|
||||||
|
"used", format.HumanBytes2(uint64(memInfo.used)),
|
||||||
|
),
|
||||||
|
)
|
||||||
cudaGPUs[i].FreeMemory = uint64(memInfo.free)
|
cudaGPUs[i].FreeMemory = uint64(memInfo.free)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -530,6 +586,23 @@ func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
|
||||||
return 0, nil, ""
|
return 0, nil, ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func LoadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string) {
|
||||||
|
var resp C.nvml_init_resp_t
|
||||||
|
resp.ch.verbose = getVerboseState()
|
||||||
|
for _, libPath := range nvmlLibPaths {
|
||||||
|
lib := C.CString(libPath)
|
||||||
|
defer C.free(unsafe.Pointer(lib))
|
||||||
|
C.nvml_init(lib, &resp)
|
||||||
|
if resp.err != nil {
|
||||||
|
slog.Info(fmt.Sprintf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err)))
|
||||||
|
C.free(unsafe.Pointer(resp.err))
|
||||||
|
} else {
|
||||||
|
return &resp.ch, libPath
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, ""
|
||||||
|
}
|
||||||
|
|
||||||
func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
|
func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
|
||||||
var resp C.oneapi_init_resp_t
|
var resp C.oneapi_init_resp_t
|
||||||
num_devices := 0
|
num_devices := 0
|
||||||
|
|
|
@ -47,6 +47,7 @@ typedef struct mem_info {
|
||||||
char gpu_name[GPU_NAME_LEN];
|
char gpu_name[GPU_NAME_LEN];
|
||||||
uint64_t total;
|
uint64_t total;
|
||||||
uint64_t free;
|
uint64_t free;
|
||||||
|
uint64_t used;
|
||||||
|
|
||||||
// Compute Capability
|
// Compute Capability
|
||||||
int major;
|
int major;
|
||||||
|
@ -62,6 +63,7 @@ void cpu_check_ram(mem_info_t *resp);
|
||||||
|
|
||||||
#include "gpu_info_cudart.h"
|
#include "gpu_info_cudart.h"
|
||||||
#include "gpu_info_nvcuda.h"
|
#include "gpu_info_nvcuda.h"
|
||||||
|
#include "gpu_info_nvml.h"
|
||||||
#include "gpu_info_oneapi.h"
|
#include "gpu_info_oneapi.h"
|
||||||
|
|
||||||
#endif // __GPU_INFO_H__
|
#endif // __GPU_INFO_H__
|
||||||
|
|
|
@ -166,9 +166,11 @@ void cudart_bootstrap(cudart_handle_t h, int i, mem_info_t *resp) {
|
||||||
|
|
||||||
resp->total = memInfo.total;
|
resp->total = memInfo.total;
|
||||||
resp->free = memInfo.free;
|
resp->free = memInfo.free;
|
||||||
|
resp->used = memInfo.used;
|
||||||
|
|
||||||
LOG(h.verbose, "[%s] CUDA totalMem %lu\n", resp->gpu_id, resp->total);
|
LOG(h.verbose, "[%s] CUDA totalMem %lu\n", resp->gpu_id, resp->total);
|
||||||
LOG(h.verbose, "[%s] CUDA freeMem %lu\n", resp->gpu_id, resp->free);
|
LOG(h.verbose, "[%s] CUDA freeMem %lu\n", resp->gpu_id, resp->free);
|
||||||
|
LOG(h.verbose, "[%s] CUDA usedMem %lu\n", resp->gpu_id, resp->used);
|
||||||
LOG(h.verbose, "[%s] Compute Capability %d.%d\n", resp->gpu_id, resp->major, resp->minor);
|
LOG(h.verbose, "[%s] Compute Capability %d.%d\n", resp->gpu_id, resp->major, resp->minor);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -197,12 +197,12 @@ void nvcuda_bootstrap(nvcuda_handle_t h, int i, mem_info_t *resp) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void nvcuda_get_free(nvcuda_handle_t h, int i, uint64_t *free) {
|
void nvcuda_get_free(nvcuda_handle_t h, int i, uint64_t *free, uint64_t *total) {
|
||||||
CUresult ret;
|
CUresult ret;
|
||||||
CUcontext ctx = NULL;
|
CUcontext ctx = NULL;
|
||||||
CUdevice device = -1;
|
CUdevice device = -1;
|
||||||
*free = 0;
|
*free = 0;
|
||||||
uint64_t total = 0;
|
*total = 0;
|
||||||
|
|
||||||
ret = (*h.cuDeviceGet)(&device, i);
|
ret = (*h.cuDeviceGet)(&device, i);
|
||||||
if (ret != CUDA_SUCCESS) {
|
if (ret != CUDA_SUCCESS) {
|
||||||
|
@ -218,7 +218,7 @@ void nvcuda_get_free(nvcuda_handle_t h, int i, uint64_t *free) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = (*h.cuMemGetInfo_v2)(free, &total);
|
ret = (*h.cuMemGetInfo_v2)(free, total);
|
||||||
if (ret != CUDA_SUCCESS) {
|
if (ret != CUDA_SUCCESS) {
|
||||||
LOG(1, "nvcuda device memory info lookup failure %d", ret);
|
LOG(1, "nvcuda device memory info lookup failure %d", ret);
|
||||||
// Best effort on failure...
|
// Best effort on failure...
|
||||||
|
|
|
@ -68,7 +68,7 @@ typedef struct nvcuda_init_resp {
|
||||||
|
|
||||||
void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp);
|
void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp);
|
||||||
void nvcuda_bootstrap(nvcuda_handle_t ch, int device_id, mem_info_t *resp);
|
void nvcuda_bootstrap(nvcuda_handle_t ch, int device_id, mem_info_t *resp);
|
||||||
void nvcuda_get_free(nvcuda_handle_t ch, int device_id, uint64_t *free);
|
void nvcuda_get_free(nvcuda_handle_t ch, int device_id, uint64_t *free, uint64_t *total);
|
||||||
void nvcuda_release(nvcuda_handle_t ch);
|
void nvcuda_release(nvcuda_handle_t ch);
|
||||||
|
|
||||||
#endif // __GPU_INFO_NVCUDA_H__
|
#endif // __GPU_INFO_NVCUDA_H__
|
||||||
|
|
112
gpu/gpu_info_nvml.c
Normal file
112
gpu/gpu_info_nvml.c
Normal file
|
@ -0,0 +1,112 @@
|
||||||
|
#ifndef __APPLE__ // TODO - maybe consider nvidia support on intel macs?
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "gpu_info_nvml.h"
|
||||||
|
|
||||||
|
void nvml_init(char *nvml_lib_path, nvml_init_resp_t *resp) {
|
||||||
|
nvmlReturn_t ret;
|
||||||
|
resp->err = NULL;
|
||||||
|
const int buflen = 256;
|
||||||
|
char buf[buflen + 1];
|
||||||
|
int i;
|
||||||
|
|
||||||
|
LOG(1, "XXX starting nvml_init %s\n", nvml_lib_path);
|
||||||
|
|
||||||
|
struct lookup {
|
||||||
|
char *s;
|
||||||
|
void **p;
|
||||||
|
} l[] = {
|
||||||
|
{"nvmlInit_v2", (void *)&resp->ch.nvmlInit_v2},
|
||||||
|
{"nvmlShutdown", (void *)&resp->ch.nvmlShutdown},
|
||||||
|
{"nvmlDeviceGetHandleByIndex", (void *)&resp->ch.nvmlDeviceGetHandleByIndex},
|
||||||
|
{"nvmlDeviceGetMemoryInfo", (void *)&resp->ch.nvmlDeviceGetMemoryInfo},
|
||||||
|
{NULL, NULL},
|
||||||
|
};
|
||||||
|
|
||||||
|
resp->ch.handle = LOAD_LIBRARY(nvml_lib_path, RTLD_LAZY);
|
||||||
|
if (!resp->ch.handle) {
|
||||||
|
char *msg = LOAD_ERR();
|
||||||
|
LOG(resp->ch.verbose, "library %s load err: %s\n", nvml_lib_path, msg);
|
||||||
|
snprintf(buf, buflen,
|
||||||
|
"Unable to load %s library to query for Nvidia GPUs: %s",
|
||||||
|
nvml_lib_path, msg);
|
||||||
|
free(msg);
|
||||||
|
resp->err = strdup(buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO once we've squashed the remaining corner cases remove this log
|
||||||
|
// LOG(resp->ch.verbose, "wiring nvidia management library functions in %s\n", nvml_lib_path);
|
||||||
|
|
||||||
|
LOG(1, "XXX wiring functions nvml_init\n");
|
||||||
|
|
||||||
|
for (i = 0; l[i].s != NULL; i++) {
|
||||||
|
// TODO once we've squashed the remaining corner cases remove this log
|
||||||
|
LOG(resp->ch.verbose, "dlsym: %s\n", l[i].s);
|
||||||
|
|
||||||
|
*l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s);
|
||||||
|
if (!l[i].p) {
|
||||||
|
resp->ch.handle = NULL;
|
||||||
|
char *msg = LOAD_ERR();
|
||||||
|
LOG(resp->ch.verbose, "dlerr: %s\n", msg);
|
||||||
|
UNLOAD_LIBRARY(resp->ch.handle);
|
||||||
|
snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,
|
||||||
|
msg);
|
||||||
|
free(msg);
|
||||||
|
resp->err = strdup(buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LOG(1, "XXX calling init_v2\n");
|
||||||
|
|
||||||
|
ret = (*resp->ch.nvmlInit_v2)();
|
||||||
|
if (ret != NVML_SUCCESS) {
|
||||||
|
LOG(resp->ch.verbose, "nvmlInit_v2 err: %d\n", ret);
|
||||||
|
UNLOAD_LIBRARY(resp->ch.handle);
|
||||||
|
resp->ch.handle = NULL;
|
||||||
|
snprintf(buf, buflen, "nvml vram init failure: %d", ret);
|
||||||
|
resp->err = strdup(buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
LOG(1, "XXX nvml_init done\n");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void nvml_get_free(nvml_handle_t h, int device_id, uint64_t *free, uint64_t *total, uint64_t *used) {
|
||||||
|
nvmlDevice_t device;
|
||||||
|
nvmlMemory_t memInfo = {0};
|
||||||
|
nvmlReturn_t ret;
|
||||||
|
LOG(1, "XXX in nvml_get_free\n");
|
||||||
|
ret = (*h.nvmlDeviceGetHandleByIndex)(device_id, &device);
|
||||||
|
if (ret != NVML_SUCCESS) {
|
||||||
|
LOG(1, "unable to get device handle %d: %d", device_id, ret);
|
||||||
|
*free = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = (*h.nvmlDeviceGetMemoryInfo)(device, &memInfo);
|
||||||
|
if (ret != NVML_SUCCESS) {
|
||||||
|
LOG(1, "device memory info lookup failure %d: %d", device_id, ret);
|
||||||
|
*free = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
*free = memInfo.free;
|
||||||
|
*total = memInfo.total;
|
||||||
|
*used = memInfo.used;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void nvml_release(nvml_handle_t h) {
|
||||||
|
LOG(h.verbose, "releasing nvml library\n");
|
||||||
|
nvmlReturn_t ret;
|
||||||
|
ret = (*h.nvmlShutdown)();
|
||||||
|
if (ret != NVML_SUCCESS) {
|
||||||
|
LOG(1, "error during nvmlShutdown %d", ret);
|
||||||
|
}
|
||||||
|
UNLOAD_LIBRARY(h.handle);
|
||||||
|
h.handle = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // __APPLE__
|
48
gpu/gpu_info_nvml.h
Normal file
48
gpu/gpu_info_nvml.h
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
#ifndef __APPLE__
|
||||||
|
#ifndef __GPU_INFO_NVML_H__
|
||||||
|
#define __GPU_INFO_NVML_H__
|
||||||
|
#include "gpu_info.h"
|
||||||
|
|
||||||
|
// Just enough typedef's to dlopen/dlsym for memory information
|
||||||
|
typedef enum nvmlReturn_enum {
|
||||||
|
NVML_SUCCESS = 0,
|
||||||
|
// Other values omitted for now...
|
||||||
|
} nvmlReturn_t;
|
||||||
|
typedef void *nvmlDevice_t; // Opaque is sufficient
|
||||||
|
typedef struct nvmlMemory_st {
|
||||||
|
unsigned long long total;
|
||||||
|
unsigned long long free;
|
||||||
|
unsigned long long used;
|
||||||
|
} nvmlMemory_t;
|
||||||
|
|
||||||
|
typedef enum nvmlBrandType_enum
|
||||||
|
{
|
||||||
|
NVML_BRAND_UNKNOWN = 0,
|
||||||
|
} nvmlBrandType_t;
|
||||||
|
|
||||||
|
typedef struct nvml_handle {
|
||||||
|
void *handle;
|
||||||
|
uint16_t verbose;
|
||||||
|
nvmlReturn_t (*nvmlInit_v2)(void);
|
||||||
|
nvmlReturn_t (*nvmlShutdown)(void);
|
||||||
|
nvmlReturn_t (*nvmlDeviceGetHandleByIndex)(unsigned int, nvmlDevice_t *);
|
||||||
|
nvmlReturn_t (*nvmlDeviceGetMemoryInfo)(nvmlDevice_t, nvmlMemory_t *);
|
||||||
|
} nvml_handle_t;
|
||||||
|
|
||||||
|
typedef struct nvml_init_resp {
|
||||||
|
char *err; // If err is non-null handle is invalid
|
||||||
|
nvml_handle_t ch;
|
||||||
|
} nvml_init_resp_t;
|
||||||
|
|
||||||
|
typedef struct nvml_compute_capability {
|
||||||
|
char *err;
|
||||||
|
int major;
|
||||||
|
int minor;
|
||||||
|
} nvml_compute_capability_t;
|
||||||
|
|
||||||
|
void nvml_init(char *nvml_lib_path, nvml_init_resp_t *resp);
|
||||||
|
void nvml_get_free(nvml_handle_t ch, int device_id, uint64_t *free, uint64_t *total, uint64_t *used);
|
||||||
|
void nvml_release(nvml_handle_t ch);
|
||||||
|
|
||||||
|
#endif // __GPU_INFO_NVML_H__
|
||||||
|
#endif // __APPLE__
|
|
@ -487,8 +487,10 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
|
||||||
func (runner *runnerRef) waitForVRAMRecovery() chan interface{} {
|
func (runner *runnerRef) waitForVRAMRecovery() chan interface{} {
|
||||||
finished := make(chan interface{}, 1)
|
finished := make(chan interface{}, 1)
|
||||||
|
|
||||||
// CPU or Metal don't need checking, so no waiting required, windows can page VRAM, and the APIs we query tend to be optimistic on free space
|
// CPU or Metal don't need checking, so no waiting required
|
||||||
if (len(runner.gpus) == 1 && (runner.gpus[0].Library == "cpu" || runner.gpus[0].Library == "metal")) || runtime.GOOS == "windows" {
|
// windows can page VRAM, only cuda currently can report accurate used vram usage
|
||||||
|
if (len(runner.gpus) == 1 && (runner.gpus[0].Library == "cpu" || runner.gpus[0].Library == "metal")) ||
|
||||||
|
(runtime.GOOS == "windows" && runner.gpus[0].Library != "cuda") {
|
||||||
finished <- struct{}{}
|
finished <- struct{}{}
|
||||||
return finished
|
return finished
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue