llm: prevent loading too large models on windows (#5926)
Don't allow loading models that would lead to memory exhaustion (across vram, system memory and disk paging). This check was already applied on Linux but should also be applied on Windows as well.
This commit is contained in:
parent
023451ce47
commit
25906d72d1
1 changed files with 3 additions and 2 deletions
|
@ -125,8 +125,9 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
|||
}
|
||||
}
|
||||
|
||||
// On linux, over-allocating CPU memory will almost always result in an error
|
||||
if runtime.GOOS == "linux" {
|
||||
// On linux and windows, over-allocating CPU memory will almost always result in an error
|
||||
// Darwin has fully dynamic swap so has no direct concept of free swap space
|
||||
if runtime.GOOS != "darwin" {
|
||||
systemMemoryRequired := estimate.TotalSize - estimate.VRAMSize
|
||||
available := systemFreeMemory + systemSwapFreeMemory
|
||||
if systemMemoryRequired > available {
|
||||
|
|
Loading…
Reference in a new issue