Remove VRAM convergence check for windows

The APIs we query are optimistic on free space, and windows pages
VRAM, so we don't have to wait to see reported usage recover on unload
This commit is contained in:
Daniel Hiltgen 2024-05-14 09:48:13 -07:00
parent 7ca71a6b0f
commit ec231a7923

View file

@ -6,6 +6,7 @@ import (
"fmt"
"log/slog"
"reflect"
"runtime"
"sort"
"strings"
"sync"
@ -487,8 +488,8 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
func (runner *runnerRef) waitForVRAMRecovery() chan interface{} {
finished := make(chan interface{}, 1)
// CPU or Metal don't need checking, so no waiting required
if len(runner.gpus) == 1 && (runner.gpus[0].Library == "cpu" || runner.gpus[0].Library == "metal") {
// CPU or Metal don't need checking, so no waiting required, windows can page VRAM, and the APIs we query tend to be optimistic on free space
if (len(runner.gpus) == 1 && (runner.gpus[0].Library == "cpu" || runner.gpus[0].Library == "metal")) || runtime.GOOS == "windows" {
finished <- struct{}{}
return finished
}