calculate overhead based number of gpu devices (#1875)
This commit is contained in:
parent
e89dc1d54b
commit
c336693f07
8 changed files with 13 additions and 6 deletions
|
@ -110,6 +110,8 @@ func GetGPUInfo() GpuInfo {
|
||||||
C.free(unsafe.Pointer(memInfo.err))
|
C.free(unsafe.Pointer(memInfo.err))
|
||||||
return resp
|
return resp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
resp.DeviceCount = uint32(memInfo.count)
|
||||||
resp.FreeMemory = uint64(memInfo.free)
|
resp.FreeMemory = uint64(memInfo.free)
|
||||||
resp.TotalMemory = uint64(memInfo.total)
|
resp.TotalMemory = uint64(memInfo.total)
|
||||||
return resp
|
return resp
|
||||||
|
@ -132,7 +134,7 @@ func CheckVRAM() (int64, error) {
|
||||||
gpuInfo := GetGPUInfo()
|
gpuInfo := GetGPUInfo()
|
||||||
if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
|
if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
|
||||||
// leave 10% or 384Mi of VRAM free for unaccounted for overhead
|
// leave 10% or 384Mi of VRAM free for unaccounted for overhead
|
||||||
overhead := gpuInfo.FreeMemory / 10
|
overhead := gpuInfo.FreeMemory * uint64(gpuInfo.DeviceCount) / 10
|
||||||
if overhead < 384*1024*1024 {
|
if overhead < 384*1024*1024 {
|
||||||
overhead = 384 * 1024 * 1024
|
overhead = 384 * 1024 * 1024
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,6 +42,7 @@ func getCPUMem() (memInfo, error) {
|
||||||
return memInfo{
|
return memInfo{
|
||||||
TotalMemory: 0,
|
TotalMemory: 0,
|
||||||
FreeMemory: 0,
|
FreeMemory: 0,
|
||||||
|
DeviceCount: 0,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -34,6 +34,7 @@ extern "C" {
|
||||||
typedef struct mem_info {
|
typedef struct mem_info {
|
||||||
uint64_t total;
|
uint64_t total;
|
||||||
uint64_t free;
|
uint64_t free;
|
||||||
|
unsigned int count;
|
||||||
char *err; // If non-nill, caller responsible for freeing
|
char *err; // If non-nill, caller responsible for freeing
|
||||||
} mem_info_t;
|
} mem_info_t;
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,7 @@ void cpu_check_ram(mem_info_t *resp) {
|
||||||
MEMORYSTATUSEX info;
|
MEMORYSTATUSEX info;
|
||||||
info.dwLength = sizeof(info);
|
info.dwLength = sizeof(info);
|
||||||
if (GlobalMemoryStatusEx(&info) != 0) {
|
if (GlobalMemoryStatusEx(&info) != 0) {
|
||||||
|
resp->count = 1;
|
||||||
resp->total = info.ullTotalPhys;
|
resp->total = info.ullTotalPhys;
|
||||||
resp->free = info.ullAvailPhys;
|
resp->free = info.ullAvailPhys;
|
||||||
} else {
|
} else {
|
||||||
|
@ -26,6 +27,7 @@ void cpu_check_ram(mem_info_t *resp) {
|
||||||
if (sysinfo(&info) != 0) {
|
if (sysinfo(&info) != 0) {
|
||||||
resp->err = strdup(strerror(errno));
|
resp->err = strdup(strerror(errno));
|
||||||
} else {
|
} else {
|
||||||
|
resp->count = 1;
|
||||||
resp->total = info.totalram * info.mem_unit;
|
resp->total = info.totalram * info.mem_unit;
|
||||||
resp->free = info.freeram * info.mem_unit;
|
resp->free = info.freeram * info.mem_unit;
|
||||||
}
|
}
|
||||||
|
|
|
@ -94,8 +94,7 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int devices;
|
ret = (*h.getCount)(&resp->count);
|
||||||
ret = (*h.getCount)(&devices);
|
|
||||||
if (ret != NVML_SUCCESS) {
|
if (ret != NVML_SUCCESS) {
|
||||||
snprintf(buf, buflen, "unable to get device count: %d", ret);
|
snprintf(buf, buflen, "unable to get device count: %d", ret);
|
||||||
resp->err = strdup(buf);
|
resp->err = strdup(buf);
|
||||||
|
@ -104,8 +103,7 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
|
||||||
|
|
||||||
resp->total = 0;
|
resp->total = 0;
|
||||||
resp->free = 0;
|
resp->free = 0;
|
||||||
|
for (i = 0; i < resp->count; i++) {
|
||||||
for (i = 0; i < devices; i++) {
|
|
||||||
ret = (*h.getHandle)(i, &device);
|
ret = (*h.getHandle)(i, &device);
|
||||||
if (ret != NVML_SUCCESS) {
|
if (ret != NVML_SUCCESS) {
|
||||||
snprintf(buf, buflen, "unable to get device handle %d: %d", i, ret);
|
snprintf(buf, buflen, "unable to get device handle %d: %d", i, ret);
|
||||||
|
|
|
@ -110,6 +110,8 @@ void rocm_check_vram(rocm_handle_t h, mem_info_t *resp) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: set this to the actual number of devices
|
||||||
|
resp->count = 1;
|
||||||
resp->total = totalMem;
|
resp->total = totalMem;
|
||||||
resp->free = totalMem - usedMem;
|
resp->free = totalMem - usedMem;
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -18,6 +18,7 @@ func TestBasicGetGPUInfo(t *testing.T) {
|
||||||
case "linux", "windows":
|
case "linux", "windows":
|
||||||
assert.Greater(t, info.TotalMemory, uint64(0))
|
assert.Greater(t, info.TotalMemory, uint64(0))
|
||||||
assert.Greater(t, info.FreeMemory, uint64(0))
|
assert.Greater(t, info.FreeMemory, uint64(0))
|
||||||
|
assert.Greater(t, info.DeviceCount, uint64(0))
|
||||||
default:
|
default:
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -35,7 +36,6 @@ func TestCPUMemInfo(t *testing.T) {
|
||||||
default:
|
default:
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO - add some logic to figure out card type through other means and actually verify we got back what we expected
|
// TODO - add some logic to figure out card type through other means and actually verify we got back what we expected
|
||||||
|
|
|
@ -3,6 +3,7 @@ package gpu
|
||||||
type memInfo struct {
|
type memInfo struct {
|
||||||
TotalMemory uint64 `json:"total_memory,omitempty"`
|
TotalMemory uint64 `json:"total_memory,omitempty"`
|
||||||
FreeMemory uint64 `json:"free_memory,omitempty"`
|
FreeMemory uint64 `json:"free_memory,omitempty"`
|
||||||
|
DeviceCount uint32 `json:"device_count,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Beginning of an `ollama info` command
|
// Beginning of an `ollama info` command
|
||||||
|
|
Loading…
Reference in a new issue