Skip scheduling cancelled requests, always reload unloaded runners (#4189)

This commit is contained in:
Jeffrey Morgan 2024-05-06 14:22:24 -07:00 committed by GitHub
parent aa93423fbf
commit c9f98622b1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -100,6 +100,12 @@ func (s *Scheduler) processPending(ctx context.Context) {
return
case pending := <-s.pendingReqCh:
// Block other requests until we get this pending request running
if pending.ctx.Err() != nil {
slog.Debug("pending request cancelled or timed out, skipping scheduling")
continue
}
for {
var runnerToExpire *runnerRef
s.loadedMu.Lock()
@ -435,6 +441,10 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
timeout = 2 * time.Minute // Initial load can take a long time for big models on slow systems...
}
if runner.Options == nil {
return true
}
// Don't reload runner if num_gpu=-1 was provided
optsExisting := runner.Options.Runner
optsNew := req.opts.Runner