Fix race in shutdown logic

Ensure the runners are terminated
This commit is contained in:
Daniel Hiltgen 2024-05-09 15:47:02 -07:00
parent 83d6d46e29
commit 3ae2f441e0

View file

@ -1036,7 +1036,8 @@ func Serve(ln net.Listener) error {
}
ctx, done := context.WithCancel(context.Background())
sched := InitScheduler(ctx)
schedCtx, schedDone := context.WithCancel(ctx)
sched := InitScheduler(schedCtx)
s := &Server{addr: ln.Addr(), sched: sched}
r := s.GenerateRoutes()
@ -1051,24 +1052,31 @@ func Serve(ln net.Listener) error {
go func() {
<-signals
srvr.Close()
done()
schedDone()
sched.unloadAllRunners()
gpu.Cleanup()
os.Exit(0)
done()
}()
if err := llm.Init(); err != nil {
return fmt.Errorf("unable to initialize llm library %w", err)
}
s.sched.Run(ctx)
s.sched.Run(schedCtx)
// At startup we retrieve GPU information so we can get log messages before loading a model
// This will log warnings to the log in case we have problems with detected GPUs
gpus := gpu.GetGPUInfo()
gpus.LogDetails()
return srvr.Serve(ln)
err = srvr.Serve(ln)
// If server is closed from the signal handler, wait for the ctx to be done
// otherwise error out quickly
if !errors.Is(err, http.ErrServerClosed) {
return err
}
<-ctx.Done()
return err
}
func waitForStream(c *gin.Context, ch chan interface{}) {