unload in critical section (#4187)
This commit is contained in:
parent
840424a2c4
commit
dfa2f32ca0
2 changed files with 7 additions and 10 deletions
|
@ -116,7 +116,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
||||||
}
|
}
|
||||||
} else if envconfig.MaxRunners > 0 && loadedCount >= envconfig.MaxRunners {
|
} else if envconfig.MaxRunners > 0 && loadedCount >= envconfig.MaxRunners {
|
||||||
slog.Debug("max runners achieved, unloading one to make room", "runner_count", loadedCount)
|
slog.Debug("max runners achieved, unloading one to make room", "runner_count", loadedCount)
|
||||||
runnerToExpire = s.findRunnerToUnload(pending)
|
runnerToExpire = s.findRunnerToUnload()
|
||||||
} else {
|
} else {
|
||||||
// Either no models are loaded or below envconfig.MaxRunners
|
// Either no models are loaded or below envconfig.MaxRunners
|
||||||
// Get a refreshed GPU list
|
// Get a refreshed GPU list
|
||||||
|
@ -157,7 +157,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
||||||
s.loadFn(pending, ggml, gpus)
|
s.loadFn(pending, ggml, gpus)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
runnerToExpire = s.findRunnerToUnload(pending)
|
runnerToExpire = s.findRunnerToUnload()
|
||||||
}
|
}
|
||||||
|
|
||||||
if runnerToExpire == nil {
|
if runnerToExpire == nil {
|
||||||
|
@ -257,9 +257,9 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
s.loadedMu.Lock()
|
||||||
slog.Debug("got lock to unload", "model", runner.model)
|
slog.Debug("got lock to unload", "model", runner.model)
|
||||||
runner.unload()
|
runner.unload()
|
||||||
s.loadedMu.Lock()
|
|
||||||
delete(s.loaded, runner.model)
|
delete(s.loaded, runner.model)
|
||||||
s.loadedMu.Unlock()
|
s.loadedMu.Unlock()
|
||||||
slog.Debug("runner released", "model", runner.model)
|
slog.Debug("runner released", "model", runner.model)
|
||||||
|
@ -504,7 +504,7 @@ func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) gpu.
|
||||||
}
|
}
|
||||||
|
|
||||||
// findRunnerToUnload finds a runner to unload to make room for a new model
|
// findRunnerToUnload finds a runner to unload to make room for a new model
|
||||||
func (s *Scheduler) findRunnerToUnload(req *LlmRequest) *runnerRef {
|
func (s *Scheduler) findRunnerToUnload() *runnerRef {
|
||||||
s.loadedMu.Lock()
|
s.loadedMu.Lock()
|
||||||
runnerList := make([]*runnerRef, 0, len(s.loaded))
|
runnerList := make([]*runnerRef, 0, len(s.loaded))
|
||||||
for _, r := range s.loaded {
|
for _, r := range s.loaded {
|
||||||
|
|
|
@ -473,10 +473,7 @@ func TestUpdateFreeSpace(t *testing.T) {
|
||||||
func TestFindRunnerToUnload(t *testing.T) {
|
func TestFindRunnerToUnload(t *testing.T) {
|
||||||
ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
||||||
defer done()
|
defer done()
|
||||||
req := &LlmRequest{
|
|
||||||
ctx: ctx,
|
|
||||||
opts: api.DefaultOptions(),
|
|
||||||
}
|
|
||||||
r1 := &runnerRef{refCount: 1, sessionDuration: 1}
|
r1 := &runnerRef{refCount: 1, sessionDuration: 1}
|
||||||
r2 := &runnerRef{sessionDuration: 2}
|
r2 := &runnerRef{sessionDuration: 2}
|
||||||
|
|
||||||
|
@ -486,10 +483,10 @@ func TestFindRunnerToUnload(t *testing.T) {
|
||||||
s.loaded["b"] = r2
|
s.loaded["b"] = r2
|
||||||
s.loadedMu.Unlock()
|
s.loadedMu.Unlock()
|
||||||
|
|
||||||
resp := s.findRunnerToUnload(req)
|
resp := s.findRunnerToUnload()
|
||||||
require.Equal(t, r2, resp)
|
require.Equal(t, r2, resp)
|
||||||
r2.refCount = 1
|
r2.refCount = 1
|
||||||
resp = s.findRunnerToUnload(req)
|
resp = s.findRunnerToUnload()
|
||||||
require.Equal(t, r1, resp)
|
require.Equal(t, r1, resp)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue