Determine max VRAM on macOS using recommendedMaxWorkingSetSize (#2354)

* read iogpu.wired_limit_mb on macOS

Fix for https://github.com/ollama/ollama/issues/1826

* improved determination of available vram on macOS

read the recommended maximal vram on macOS via Metal API

* Removed macOS-specific logging

* Remove logging from gpu_darwin.go

* release Core Foundation object

fixes a possible memory leak
This commit is contained in:
peanut256 2024-02-26 00:16:45 +01:00 committed by GitHub
parent e95b896790
commit a189810df6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 21 additions and 16 deletions

View file

@ -1,12 +1,14 @@
//go:build darwin
package gpu
/*
#cgo CFLAGS: -x objective-c
#cgo LDFLAGS: -framework Foundation -framework CoreGraphics -framework Metal
#include "gpu_info_darwin.h"
*/
import "C"
import (
"runtime"
"github.com/pbnjay/memory"
)
// CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs
@ -15,19 +17,8 @@ func CheckVRAM() (int64, error) {
// gpu not supported, this may not be metal
return 0, nil
}
// on macOS, there's already buffer for available vram (see below) so just return the total
systemMemory := int64(memory.TotalMemory())
// macOS limits how much memory is available to the GPU based on the amount of system memory
// TODO: handle case where iogpu.wired_limit_mb is set to a higher value
if systemMemory <= 36*1024*1024*1024 {
systemMemory = systemMemory * 2 / 3
} else {
systemMemory = systemMemory * 3 / 4
}
return systemMemory, nil
recommendedMaxVRAM := int64(C.getRecommendedMaxVRAM())
return recommendedMaxVRAM, nil
}
func GetGPUInfo() GpuInfo {

3
gpu/gpu_info_darwin.h Normal file
View file

@ -0,0 +1,3 @@
#import <Metal/Metal.h>
#include <stdint.h>
uint64_t getRecommendedMaxVRAM();

11
gpu/gpu_info_darwin.m Normal file
View file

@ -0,0 +1,11 @@
//go:build darwin
#include "gpu_info_darwin.h"
uint64_t getRecommendedMaxVRAM()
{
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
uint64_t result = device.recommendedMaxWorkingSetSize;
CFRelease(device);
return result;
}