Remove VRAM convergence check for windows
The APIs we query are optimistic on free space, and windows pages VRAM, so we don't have to wait to see reported usage recover on unload
This commit is contained in:
parent
7ca71a6b0f
commit
ec231a7923
1 changed files with 3 additions and 2 deletions
|
@ -6,6 +6,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"reflect"
|
"reflect"
|
||||||
|
"runtime"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
@ -487,8 +488,8 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
|
||||||
func (runner *runnerRef) waitForVRAMRecovery() chan interface{} {
|
func (runner *runnerRef) waitForVRAMRecovery() chan interface{} {
|
||||||
finished := make(chan interface{}, 1)
|
finished := make(chan interface{}, 1)
|
||||||
|
|
||||||
// CPU or Metal don't need checking, so no waiting required
|
// CPU or Metal don't need checking, so no waiting required, windows can page VRAM, and the APIs we query tend to be optimistic on free space
|
||||||
if len(runner.gpus) == 1 && (runner.gpus[0].Library == "cpu" || runner.gpus[0].Library == "metal") {
|
if (len(runner.gpus) == 1 && (runner.gpus[0].Library == "cpu" || runner.gpus[0].Library == "metal")) || runtime.GOOS == "windows" {
|
||||||
finished <- struct{}{}
|
finished <- struct{}{}
|
||||||
return finished
|
return finished
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue