use 10% vram overhead for cuda
This commit is contained in:
parent
58ce2d8273
commit
cb534e6ac2
2 changed files with 6 additions and 4 deletions
|
@ -131,10 +131,11 @@ func getCPUMem() (memInfo, error) {
|
||||||
func CheckVRAM() (int64, error) {
|
func CheckVRAM() (int64, error) {
|
||||||
gpuInfo := GetGPUInfo()
|
gpuInfo := GetGPUInfo()
|
||||||
if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
|
if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
|
||||||
// allocate 384MiB for llama.cpp overhead (outside of model)
|
// leave 10% or 400MiB of VRAM free for overhead
|
||||||
overhead := uint64(384 * 1024 * 1024)
|
overhead := gpuInfo.FreeMemory / 10
|
||||||
if gpuInfo.FreeMemory <= overhead {
|
minOverhead := 400 * 1024 * 1024
|
||||||
return 0, nil
|
if overhead < minOverhead {
|
||||||
|
overhead = minOverhead
|
||||||
}
|
}
|
||||||
|
|
||||||
return int64(gpuInfo.FreeMemory - overhead), nil
|
return int64(gpuInfo.FreeMemory - overhead), nil
|
||||||
|
|
|
@ -117,6 +117,7 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
|
||||||
bytesPerLayer := int64((requiredModel + requiredKv) / int64(ggml.NumLayers()))
|
bytesPerLayer := int64((requiredModel + requiredKv) / int64(ggml.NumLayers()))
|
||||||
log.Println("bytes per layer:", bytesPerLayer)
|
log.Println("bytes per layer:", bytesPerLayer)
|
||||||
layers := available / bytesPerLayer
|
layers := available / bytesPerLayer
|
||||||
|
log.Println("total required with split:", requiredAlloc+(layers*bytesPerLayer))
|
||||||
if layers < int64(opts.NumGPU) {
|
if layers < int64(opts.NumGPU) {
|
||||||
opts.NumGPU = int(layers)
|
opts.NumGPU = int(layers)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue