From ec231a7923ab213f7bb64f3c2ecbead394a47ef0 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 14 May 2024 09:48:13 -0700 Subject: [PATCH] Remove VRAM convergence check for windows The APIs we query are optimistic on free space, and windows pages VRAM, so we don't have to wait to see reported usage recover on unload --- server/sched.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/sched.go b/server/sched.go index 198f0aca..ceddc526 100644 --- a/server/sched.go +++ b/server/sched.go @@ -6,6 +6,7 @@ import ( "fmt" "log/slog" "reflect" + "runtime" "sort" "strings" "sync" @@ -487,8 +488,8 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool func (runner *runnerRef) waitForVRAMRecovery() chan interface{} { finished := make(chan interface{}, 1) - // CPU or Metal don't need checking, so no waiting required - if len(runner.gpus) == 1 && (runner.gpus[0].Library == "cpu" || runner.gpus[0].Library == "metal") { + // CPU or Metal don't need checking, so no waiting required, windows can page VRAM, and the APIs we query tend to be optimistic on free space + if (len(runner.gpus) == 1 && (runner.gpus[0].Library == "cpu" || runner.gpus[0].Library == "metal")) || runtime.GOOS == "windows" { finished <- struct{}{} return finished }