Merge pull request #3684 from ollama/mxyng/scale-graph

scale graph based on gpu count
This commit is contained in:
Michael Yang 2024-04-16 14:57:09 -07:00 committed by GitHub
commit fb9580df85
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 4 additions and 1 deletions

View file

@ -55,6 +55,6 @@ func getCPUMem() (memInfo, error) {
return memInfo{
TotalMemory: uint64(C.getPhysicalMemory()),
FreeMemory: 0,
DeviceCount: 0,
DeviceCount: 1,
}, nil
}

View file

@ -79,6 +79,9 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
graphFullOffload = graphPartialOffload
}
graphFullOffload *= uint64(info.DeviceCount)
graphPartialOffload *= uint64(info.DeviceCount)
// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
memoryRequiredTotal := memoryMinimum + graphFullOffload