From 37708931fb37f228a9bf5c74c4c98ad1b1c9650e Mon Sep 17 00:00:00 2001 From: Jeffrey Morgan Date: Tue, 9 Jan 2024 00:05:23 -0500 Subject: [PATCH] update cuda overhead to 20% to fix crashes when switching between models and large context sizes --- gpu/gpu.go | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/gpu/gpu.go b/gpu/gpu.go index 7706a74b..2d32e9de 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -131,14 +131,8 @@ func getCPUMem() (memInfo, error) { func CheckVRAM() (int64, error) { gpuInfo := GetGPUInfo() if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") { - // leave 15% or 400MiB of VRAM free for overhead - overhead := gpuInfo.FreeMemory * 3 / 20 - minOverhead := uint64(400 * 1024 * 1024) - if overhead < minOverhead { - overhead = minOverhead - } - - return int64(gpuInfo.FreeMemory - overhead), nil + // leave 20% of VRAM free for overhead + return int64(gpuInfo.FreeMemory * 4 / 5), nil } return 0, fmt.Errorf("no GPU detected") // TODO - better handling of CPU based memory determiniation