gpu: add 512MiB to darwin minimum, metal doesn't have partial offloading overhead (#4068)
This commit is contained in:
parent
b9f74ff3d6
commit
f0c454ab57
2 changed files with 12 additions and 1 deletions
|
@ -10,6 +10,12 @@ package gpu
|
||||||
import "C"
|
import "C"
|
||||||
import (
|
import (
|
||||||
"runtime"
|
"runtime"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/format"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
metalMinimumMemory = 512 * format.MebiByte
|
||||||
)
|
)
|
||||||
|
|
||||||
func GetGPUInfo() GpuInfoList {
|
func GetGPUInfo() GpuInfoList {
|
||||||
|
@ -32,7 +38,7 @@ func GetGPUInfo() GpuInfoList {
|
||||||
// TODO is there a way to gather actual allocated video memory? (currentAllocatedSize doesn't work)
|
// TODO is there a way to gather actual allocated video memory? (currentAllocatedSize doesn't work)
|
||||||
info.FreeMemory = info.TotalMemory
|
info.FreeMemory = info.TotalMemory
|
||||||
|
|
||||||
info.MinimumMemory = 0
|
info.MinimumMemory = metalMinimumMemory
|
||||||
return []GpuInfo{info}
|
return []GpuInfo{info}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -88,6 +88,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
||||||
graphFullOffload *= uint64(len(gpus))
|
graphFullOffload *= uint64(len(gpus))
|
||||||
graphPartialOffload *= uint64(len(gpus))
|
graphPartialOffload *= uint64(len(gpus))
|
||||||
|
|
||||||
|
// on metal there's no partial offload overhead
|
||||||
|
if gpus[0].Library == "metal" {
|
||||||
|
graphPartialOffload = graphFullOffload
|
||||||
|
}
|
||||||
|
|
||||||
// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
|
// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
|
||||||
memoryRequiredTotal := memoryMinimum + graphFullOffload
|
memoryRequiredTotal := memoryMinimum + graphFullOffload
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue