diff --git a/gpu/gpu_darwin.go b/gpu/gpu_darwin.go index 824f43bf..39e257e4 100644 --- a/gpu/gpu_darwin.go +++ b/gpu/gpu_darwin.go @@ -32,6 +32,7 @@ func CheckVRAM() (uint64, error) { // gpu not supported, this may not be metal return 0, nil } + return uint64(C.getRecommendedMaxVRAM()), nil } @@ -52,7 +53,7 @@ func GetGPUInfo() GpuInfo { func getCPUMem() (memInfo, error) { return memInfo{ - TotalMemory: 0, + TotalMemory: uint64(C.getPhysicalMemory()), FreeMemory: 0, DeviceCount: 0, }, nil diff --git a/gpu/gpu_info_darwin.h b/gpu/gpu_info_darwin.h index 6ba30c0a..3edca237 100644 --- a/gpu/gpu_info_darwin.h +++ b/gpu/gpu_info_darwin.h @@ -1,3 +1,4 @@ #import #include uint64_t getRecommendedMaxVRAM(); +uint64_t getPhysicalMemory(); diff --git a/gpu/gpu_info_darwin.m b/gpu/gpu_info_darwin.m index 06d7b69b..a145ac07 100644 --- a/gpu/gpu_info_darwin.m +++ b/gpu/gpu_info_darwin.m @@ -1,11 +1,13 @@ -//go:build darwin +// go:build darwin #include "gpu_info_darwin.h" -uint64_t getRecommendedMaxVRAM() -{ - id device = MTLCreateSystemDefaultDevice(); - uint64_t result = device.recommendedMaxWorkingSetSize; - CFRelease(device); - return result; +uint64_t getRecommendedMaxVRAM() { + id device = MTLCreateSystemDefaultDevice(); + uint64_t result = device.recommendedMaxWorkingSetSize; + CFRelease(device); + return result; } +uint64_t getPhysicalMemory() { + return [[NSProcessInfo processInfo] physicalMemory]; +} diff --git a/llm/server.go b/llm/server.go index 4c1f9634..0bbdebce 100644 --- a/llm/server.go +++ b/llm/server.go @@ -108,7 +108,11 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option memoryLayerOutput := layers["output"].size() memoryRequiredTotal += memoryLayerOutput - if memoryAvailable > memoryRequiredTotal { + + if info.Library == "metal" && memoryRequiredTotal > info.TotalMemory { + // disable partial offloading when model is greater than total system memory + opts.NumGPU = 0 + } else if memoryAvailable > memoryRequiredTotal { layerCount = int(ggml.KV().BlockCount()) + 1 memoryRequiredPartial = memoryRequiredTotal }