calculate overhead based number of gpu devices (#1875)

2024-01-09 15:53:33 -05:00 · 2024-01-09 15:53:33 -05:00 · c336693f07
commit c336693f07
parent e89dc1d54b
8 changed files with 13 additions and 6 deletions
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@ -110,6 +110,8 @@ func GetGPUInfo() GpuInfo {
 		C.free(unsafe.Pointer(memInfo.err))
 		return resp
 	}
+
+	resp.DeviceCount = uint32(memInfo.count)
 	resp.FreeMemory = uint64(memInfo.free)
 	resp.TotalMemory = uint64(memInfo.total)
 	return resp
@ -132,7 +134,7 @@ func CheckVRAM() (int64, error) {
 	gpuInfo := GetGPUInfo()
 	if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
 		// leave 10% or 384Mi of VRAM free for unaccounted for overhead
-		overhead := gpuInfo.FreeMemory / 10
+		overhead := gpuInfo.FreeMemory * uint64(gpuInfo.DeviceCount) / 10
 		if overhead < 384*1024*1024 {
 			overhead = 384 * 1024 * 1024
 		}
--- a/gpu/gpu_darwin.go
+++ b/gpu/gpu_darwin.go
@ -42,6 +42,7 @@ func getCPUMem() (memInfo, error) {
 	return memInfo{
 		TotalMemory: 0,
 		FreeMemory:  0,
+		DeviceCount: 0,
 	}, nil
 }

--- a/gpu/gpu_info.h
+++ b/gpu/gpu_info.h
@ -34,6 +34,7 @@ extern "C" {
 typedef struct mem_info {
  uint64_t total;
  uint64_t free;
+  unsigned int count;
  char *err;  // If non-nill, caller responsible for freeing
 } mem_info_t;

--- a/gpu/gpu_info_cpu.c
+++ b/gpu/gpu_info_cpu.c
@ -8,6 +8,7 @@ void cpu_check_ram(mem_info_t *resp) {
  MEMORYSTATUSEX info;
  info.dwLength = sizeof(info);
  if (GlobalMemoryStatusEx(&info) != 0) {
+    resp->count = 1;
    resp->total = info.ullTotalPhys;
    resp->free = info.ullAvailPhys;
  } else {
@ -26,6 +27,7 @@ void cpu_check_ram(mem_info_t *resp) {
  if (sysinfo(&info) != 0) {
    resp->err = strdup(strerror(errno));
  } else {
+    resp->count = 1;
    resp->total = info.totalram * info.mem_unit;
    resp->free = info.freeram * info.mem_unit;
  }
--- a/gpu/gpu_info_cuda.c
+++ b/gpu/gpu_info_cuda.c
@ -94,8 +94,7 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
    return;
  }

-  unsigned int devices;
-  ret = (*h.getCount)(&devices);
+  ret = (*h.getCount)(&resp->count);
  if (ret != NVML_SUCCESS) {
    snprintf(buf, buflen, "unable to get device count: %d", ret);
    resp->err = strdup(buf);
@ -104,8 +103,7 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {

  resp->total = 0;
  resp->free = 0;
-
-  for (i = 0; i < devices; i++) {
+  for (i = 0; i < resp->count; i++) {
    ret = (*h.getHandle)(i, &device);
    if (ret != NVML_SUCCESS) {
      snprintf(buf, buflen, "unable to get device handle %d: %d", i, ret);
--- a/gpu/gpu_info_rocm.c
+++ b/gpu/gpu_info_rocm.c
@ -110,6 +110,8 @@ void rocm_check_vram(rocm_handle_t h, mem_info_t *resp) {
    return;
  }

+  // TODO: set this to the actual number of devices
+  resp->count = 1;
  resp->total = totalMem;
  resp->free = totalMem - usedMem;
  return;
--- a/gpu/gpu_test.go
+++ b/gpu/gpu_test.go
@ -18,6 +18,7 @@ func TestBasicGetGPUInfo(t *testing.T) {
 	case "linux", "windows":
 		assert.Greater(t, info.TotalMemory, uint64(0))
 		assert.Greater(t, info.FreeMemory, uint64(0))
+		assert.Greater(t, info.DeviceCount, uint64(0))
 	default:
 		return
 	}
@ -35,7 +36,6 @@ func TestCPUMemInfo(t *testing.T) {
 	default:
 		return
 	}
-
 }

 // TODO - add some logic to figure out card type through other means and actually verify we got back what we expected
--- a/gpu/types.go
+++ b/gpu/types.go
@ -3,6 +3,7 @@ package gpu
 type memInfo struct {
 	TotalMemory uint64 `json:"total_memory,omitempty"`
 	FreeMemory  uint64 `json:"free_memory,omitempty"`
+	DeviceCount uint32 `json:"device_count,omitempty"`
 }

 // Beginning of an `ollama info` command