Merge pull request #3684 from ollama/mxyng/scale-graph
scale graph based on gpu count
This commit is contained in:
commit
fb9580df85
2 changed files with 4 additions and 1 deletions
|
@ -55,6 +55,6 @@ func getCPUMem() (memInfo, error) {
|
||||||
return memInfo{
|
return memInfo{
|
||||||
TotalMemory: uint64(C.getPhysicalMemory()),
|
TotalMemory: uint64(C.getPhysicalMemory()),
|
||||||
FreeMemory: 0,
|
FreeMemory: 0,
|
||||||
DeviceCount: 0,
|
DeviceCount: 1,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -79,6 +79,9 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
|
||||||
graphFullOffload = graphPartialOffload
|
graphFullOffload = graphPartialOffload
|
||||||
}
|
}
|
||||||
|
|
||||||
|
graphFullOffload *= uint64(info.DeviceCount)
|
||||||
|
graphPartialOffload *= uint64(info.DeviceCount)
|
||||||
|
|
||||||
// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
|
// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
|
||||||
memoryRequiredTotal := memoryMinimum + graphFullOffload
|
memoryRequiredTotal := memoryMinimum + graphFullOffload
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue