update cuda overhead to 20% to fix crashes when switching between models and large context sizes
This commit is contained in:
parent
f6cb0a553c
commit
37708931fb
1 changed files with 2 additions and 8 deletions
10
gpu/gpu.go
10
gpu/gpu.go
|
@ -131,14 +131,8 @@ func getCPUMem() (memInfo, error) {
|
||||||
func CheckVRAM() (int64, error) {
|
func CheckVRAM() (int64, error) {
|
||||||
gpuInfo := GetGPUInfo()
|
gpuInfo := GetGPUInfo()
|
||||||
if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
|
if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
|
||||||
// leave 15% or 400MiB of VRAM free for overhead
|
// leave 20% of VRAM free for overhead
|
||||||
overhead := gpuInfo.FreeMemory * 3 / 20
|
return int64(gpuInfo.FreeMemory * 4 / 5), nil
|
||||||
minOverhead := uint64(400 * 1024 * 1024)
|
|
||||||
if overhead < minOverhead {
|
|
||||||
overhead = minOverhead
|
|
||||||
}
|
|
||||||
|
|
||||||
return int64(gpuInfo.FreeMemory - overhead), nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0, fmt.Errorf("no GPU detected") // TODO - better handling of CPU based memory determiniation
|
return 0, fmt.Errorf("no GPU detected") // TODO - better handling of CPU based memory determiniation
|
||||||
|
|
Loading…
Reference in a new issue