diff --git a/gpu/gpu.go b/gpu/gpu.go index b5baff38..8c7f1297 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -242,6 +242,15 @@ func getCPUMem() (memInfo, error) { } func CheckVRAM() (int64, error) { + userLimit := os.Getenv("OLLAMA_MAX_VRAM") + if userLimit != "" { + avail, err := strconv.ParseInt(userLimit, 10, 64) + if err != nil { + return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err) + } + slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail)) + return avail, nil + } gpuInfo := GetGPUInfo() if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") { // leave 10% or 1024MiB of VRAM free per GPU to handle unaccounted for overhead diff --git a/gpu/gpu_darwin.go b/gpu/gpu_darwin.go index 9a418c0b..f2856e29 100644 --- a/gpu/gpu_darwin.go +++ b/gpu/gpu_darwin.go @@ -1,6 +1,7 @@ //go:build darwin package gpu + /* #cgo CFLAGS: -x objective-c #cgo LDFLAGS: -framework Foundation -framework CoreGraphics -framework Metal @@ -8,11 +9,25 @@ package gpu */ import "C" import ( + "fmt" + "log/slog" + "os" "runtime" + "strconv" ) // CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs func CheckVRAM() (int64, error) { + userLimit := os.Getenv("OLLAMA_MAX_VRAM") + if userLimit != "" { + avail, err := strconv.ParseInt(userLimit, 10, 64) + if err != nil { + return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err) + } + slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail)) + return avail, nil + } + if runtime.GOARCH == "amd64" { // gpu not supported, this may not be metal return 0, nil