Skip scheduling cancelled requests, always reload unloaded runners (#4189)
This commit is contained in:
parent
aa93423fbf
commit
c9f98622b1
1 changed files with 10 additions and 0 deletions
|
@ -100,6 +100,12 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
||||||
return
|
return
|
||||||
case pending := <-s.pendingReqCh:
|
case pending := <-s.pendingReqCh:
|
||||||
// Block other requests until we get this pending request running
|
// Block other requests until we get this pending request running
|
||||||
|
|
||||||
|
if pending.ctx.Err() != nil {
|
||||||
|
slog.Debug("pending request cancelled or timed out, skipping scheduling")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
for {
|
for {
|
||||||
var runnerToExpire *runnerRef
|
var runnerToExpire *runnerRef
|
||||||
s.loadedMu.Lock()
|
s.loadedMu.Lock()
|
||||||
|
@ -435,6 +441,10 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
|
||||||
timeout = 2 * time.Minute // Initial load can take a long time for big models on slow systems...
|
timeout = 2 * time.Minute // Initial load can take a long time for big models on slow systems...
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if runner.Options == nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
// Don't reload runner if num_gpu=-1 was provided
|
// Don't reload runner if num_gpu=-1 was provided
|
||||||
optsExisting := runner.Options.Runner
|
optsExisting := runner.Options.Runner
|
||||||
optsNew := req.opts.Runner
|
optsNew := req.opts.Runner
|
||||||
|
|
Loading…
Add table
Reference in a new issue