gpu: add 512MiB to darwin minimum, metal doesn't have partial offloading overhead (#4068)
This commit is contained in:
parent
b9f74ff3d6
commit
f0c454ab57
2 changed files with 12 additions and 1 deletions
|
@ -10,6 +10,12 @@ package gpu
|
|||
import "C"
|
||||
import (
|
||||
"runtime"
|
||||
|
||||
"github.com/ollama/ollama/format"
|
||||
)
|
||||
|
||||
const (
|
||||
metalMinimumMemory = 512 * format.MebiByte
|
||||
)
|
||||
|
||||
func GetGPUInfo() GpuInfoList {
|
||||
|
@ -32,7 +38,7 @@ func GetGPUInfo() GpuInfoList {
|
|||
// TODO is there a way to gather actual allocated video memory? (currentAllocatedSize doesn't work)
|
||||
info.FreeMemory = info.TotalMemory
|
||||
|
||||
info.MinimumMemory = 0
|
||||
info.MinimumMemory = metalMinimumMemory
|
||||
return []GpuInfo{info}
|
||||
}
|
||||
|
||||
|
|
|
@ -88,6 +88,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
|||
graphFullOffload *= uint64(len(gpus))
|
||||
graphPartialOffload *= uint64(len(gpus))
|
||||
|
||||
// on metal there's no partial offload overhead
|
||||
if gpus[0].Library == "metal" {
|
||||
graphPartialOffload = graphFullOffload
|
||||
}
|
||||
|
||||
// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
|
||||
memoryRequiredTotal := memoryMinimum + graphFullOffload
|
||||
|
||||
|
|
Loading…
Reference in a new issue