sched: don't error if paging to disk on Windows and macOS (#5523)

This commit is contained in:
Jeffrey Morgan 2024-07-06 22:01:52 -04:00 committed by GitHub
parent f8241bfba3
commit 0ee87615c7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -197,25 +197,36 @@ func (s *Scheduler) processPending(ctx context.Context) {
break break
} }
// Block attempting to load a model larger than system memory + GPU memory
estimate := llm.EstimateGPULayers(gpus, ggml, pending.model.ProjectorPaths, pending.opts) estimate := llm.EstimateGPULayers(gpus, ggml, pending.model.ProjectorPaths, pending.opts)
maxSize := systemMem.FreeMemory maxSize := systemMem.FreeMemory
for _, gpu := range gpus {
if gpu.Library == "cpu" { // Add available GPU memory to the total pool
continue // macOS hardware has unified memory so don't double count
} if runtime.GOOS != "darwin" {
if loadedCount == 0 { for _, gpu := range gpus {
// If no other models are loaded, set the limit based on what's available if gpu.Library == "cpu" {
maxSize += gpu.FreeMemory continue
} else { }
// Other models could be unloaded, favor total memory for limit if loadedCount == 0 {
maxSize += gpu.TotalMemory // If no other models are loaded, set the limit based on what's available
maxSize += gpu.FreeMemory
} else {
// Other models could be unloaded, favor total memory for limit
maxSize += gpu.TotalMemory
}
} }
} }
// Block attempting to load a model larger than system memory + GPU memory
if estimate.TotalSize > maxSize { if estimate.TotalSize > maxSize {
slog.Warn("model request too large for system", "requested", format.HumanBytes2(estimate.TotalSize), "system", format.HumanBytes2(maxSize)) slog.Warn("model request too large for system", "requested", format.HumanBytes2(estimate.TotalSize), "system", format.HumanBytes2(maxSize))
pending.errCh <- fmt.Errorf("requested model (%s) is too large for this system (%s)", format.HumanBytes2(estimate.TotalSize), format.HumanBytes2(maxSize))
break // Linux will crash if over-allocating memory - return an error to the user.
// TODO (jmorganca): add reasonable upper limits for darwin and windows as well
if runtime.GOOS == "linux" {
pending.errCh <- fmt.Errorf("requested model (%s) is too large for this system (%s)", format.HumanBytes2(estimate.TotalSize), format.HumanBytes2(maxSize))
break
}
} }
// Evaluate if the model will fit in the available system memory, or if we should unload a model first // Evaluate if the model will fit in the available system memory, or if we should unload a model first