Fix windows system memory lookup

This refines the gpu package error handling and fixes a bug with the
system memory lookup on windows.
This commit is contained in:
Daniel Hiltgen 2023-12-22 15:43:31 -08:00
parent 5fea4410be
commit a2ad952440
8 changed files with 68 additions and 20 deletions

View file

@ -66,7 +66,7 @@ func GetGPUInfo() GpuInfo {
} }
var memInfo C.mem_info_t var memInfo C.mem_info_t
resp := GpuInfo{"", 0, 0} resp := GpuInfo{}
if gpuHandles.cuda != nil { if gpuHandles.cuda != nil {
C.cuda_check_vram(*gpuHandles.cuda, &memInfo) C.cuda_check_vram(*gpuHandles.cuda, &memInfo)
if memInfo.err != nil { if memInfo.err != nil {
@ -103,6 +103,19 @@ func GetGPUInfo() GpuInfo {
return resp return resp
} }
func getCPUMem() (memInfo, error) {
var ret memInfo
var info C.mem_info_t
C.cpu_check_ram(&info)
if info.err != nil {
defer C.free(unsafe.Pointer(info.err))
return ret, fmt.Errorf(C.GoString(info.err))
}
ret.FreeMemory = uint64(info.free)
ret.TotalMemory = uint64(info.total)
return ret, nil
}
func CheckVRAM() (int64, error) { func CheckVRAM() (int64, error) {
gpuInfo := GetGPUInfo() gpuInfo := GetGPUInfo()
if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") { if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {

View file

@ -18,12 +18,18 @@ func CheckVRAM() (int64, error) {
func GetGPUInfo() GpuInfo { func GetGPUInfo() GpuInfo {
// TODO - Metal vs. x86 macs... // TODO - Metal vs. x86 macs...
mem, _ := getCPUMem()
return GpuInfo{ return GpuInfo{
Library: "default", Library: "default",
memInfo: mem,
}
}
func getCPUMem() (memInfo, error) {
return memInfo{
TotalMemory: 0, TotalMemory: 0,
FreeMemory: 0, FreeMemory: 0,
} }, nil
} }
func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int { func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {

View file

@ -9,20 +9,21 @@
#include <dlfcn.h> #include <dlfcn.h>
#define LOAD_LIBRARY(lib, flags) dlopen(lib, flags) #define LOAD_LIBRARY(lib, flags) dlopen(lib, flags)
#define LOAD_SYMBOL(handle, sym) dlsym(handle, sym) #define LOAD_SYMBOL(handle, sym) dlsym(handle, sym)
#define LOAD_ERR() dlerror() #define LOAD_ERR() strdup(dlerror())
#define UNLOAD_LIBRARY(handle) dlclose(handle) #define UNLOAD_LIBRARY(handle) dlclose(handle)
#else #else
#include <windows.h> #include <windows.h>
#define LOAD_LIBRARY(lib, flags) LoadLibrary(lib) #define LOAD_LIBRARY(lib, flags) LoadLibrary(lib)
#define LOAD_SYMBOL(handle, sym) GetProcAddress(handle, sym) #define LOAD_SYMBOL(handle, sym) GetProcAddress(handle, sym)
#define UNLOAD_LIBRARY(handle) FreeLibrary(handle) #define UNLOAD_LIBRARY(handle) FreeLibrary(handle)
#define LOAD_ERR() ({\
// TODO - refactor this with proper error message handling on windows LPSTR messageBuffer = NULL; \
inline static char *LOAD_ERR() { size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, \
static char errbuf[8]; NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL); \
snprintf(errbuf, 8, "0x%lx", GetLastError()); char *resp = strdup(messageBuffer); \
return errbuf; LocalFree(messageBuffer); \
} resp; \
})
#endif #endif

View file

@ -6,11 +6,12 @@
void cpu_check_ram(mem_info_t *resp) { void cpu_check_ram(mem_info_t *resp) {
resp->err = NULL; resp->err = NULL;
MEMORYSTATUSEX info; MEMORYSTATUSEX info;
info.dwLength = sizeof(info);
if (GlobalMemoryStatusEx(&info) != 0) { if (GlobalMemoryStatusEx(&info) != 0) {
resp->total = info.ullTotalPhys; resp->total = info.ullTotalPhys;
resp->free = info.ullAvailPhys; resp->free = info.ullAvailPhys;
} else { } else {
resp->err = strdup(LOAD_ERR()); resp->err = LOAD_ERR();
} }
return; return;
} }

View file

@ -43,9 +43,11 @@ void cuda_init(cuda_init_resp_t *resp) {
if (!resp->ch.handle) { if (!resp->ch.handle) {
// TODO improve error message, as the LOAD_ERR will have typically have the // TODO improve error message, as the LOAD_ERR will have typically have the
// final path that was checked which might be confusing. // final path that was checked which might be confusing.
char *msg = LOAD_ERR();
snprintf(buf, buflen, snprintf(buf, buflen,
"Unable to load %s library to query for Nvidia GPUs: %s", "Unable to load %s library to query for Nvidia GPUs: %s",
cuda_lib_paths[0], LOAD_ERR()); cuda_lib_paths[0], msg);
free(msg);
resp->err = strdup(buf); resp->err = strdup(buf);
return; return;
} }
@ -55,8 +57,10 @@ void cuda_init(cuda_init_resp_t *resp) {
if (!l[i].p) { if (!l[i].p) {
UNLOAD_LIBRARY(resp->ch.handle); UNLOAD_LIBRARY(resp->ch.handle);
resp->ch.handle = NULL; resp->ch.handle = NULL;
char *msg = LOAD_ERR();
snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s, snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,
LOAD_ERR()); msg);
free(msg);
resp->err = strdup(buf); resp->err = strdup(buf);
return; return;
} }

View file

@ -40,9 +40,11 @@ void rocm_init(rocm_init_resp_t *resp) {
resp->rh.handle = LOAD_LIBRARY(rocm_lib_paths[i], RTLD_LAZY); resp->rh.handle = LOAD_LIBRARY(rocm_lib_paths[i], RTLD_LAZY);
} }
if (!resp->rh.handle) { if (!resp->rh.handle) {
char *msg = LOAD_ERR();
snprintf(buf, buflen, snprintf(buf, buflen,
"Unable to load %s library to query for Radeon GPUs: %s\n", "Unable to load %s library to query for Radeon GPUs: %s\n",
rocm_lib_paths[0], LOAD_ERR()); rocm_lib_paths[0], msg);
free(msg);
resp->err = strdup(buf); resp->err = strdup(buf);
return; return;
} }
@ -51,8 +53,10 @@ void rocm_init(rocm_init_resp_t *resp) {
*l[i].p = LOAD_SYMBOL(resp->rh.handle, l[i].s); *l[i].p = LOAD_SYMBOL(resp->rh.handle, l[i].s);
if (!l[i].p) { if (!l[i].p) {
UNLOAD_LIBRARY(resp->rh.handle); UNLOAD_LIBRARY(resp->rh.handle);
char *msg = LOAD_ERR();
snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s, snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,
LOAD_ERR()); msg);
free(msg);
resp->err = strdup(buf); resp->err = strdup(buf);
return; return;
} }

View file

@ -23,4 +23,19 @@ func TestBasicGetGPUInfo(t *testing.T) {
} }
} }
func TestCPUMemInfo(t *testing.T) {
info, err := getCPUMem()
assert.NoError(t, err)
switch runtime.GOOS {
case "darwin":
t.Skip("CPU memory not populated on darwin")
case "linux", "windows":
assert.Greater(t, info.TotalMemory, uint64(0))
assert.Greater(t, info.FreeMemory, uint64(0))
default:
return
}
}
// TODO - add some logic to figure out card type through other means and actually verify we got back what we expected // TODO - add some logic to figure out card type through other means and actually verify we got back what we expected

View file

@ -1,10 +1,14 @@
package gpu package gpu
type memInfo struct {
TotalMemory uint64 `json:"total_memory,omitempty"`
FreeMemory uint64 `json:"free_memory,omitempty"`
}
// Beginning of an `ollama info` command // Beginning of an `ollama info` command
type GpuInfo struct { type GpuInfo struct {
Library string `json:"library,omitempty"` memInfo
TotalMemory uint64 `json:"total_memory,omitempty"` Library string `json:"library,omitempty"`
FreeMemory uint64 `json:"free_memory,omitempty"`
// TODO add other useful attributes about the card here for discovery information // TODO add other useful attributes about the card here for discovery information
} }