Fix race in shutdown logic

Ensure the runners are terminated
This commit is contained in:
Daniel Hiltgen 2024-05-09 15:47:02 -07:00
parent 83d6d46e29
commit 3ae2f441e0

View file

@ -1036,7 +1036,8 @@ func Serve(ln net.Listener) error {
} }
ctx, done := context.WithCancel(context.Background()) ctx, done := context.WithCancel(context.Background())
sched := InitScheduler(ctx) schedCtx, schedDone := context.WithCancel(ctx)
sched := InitScheduler(schedCtx)
s := &Server{addr: ln.Addr(), sched: sched} s := &Server{addr: ln.Addr(), sched: sched}
r := s.GenerateRoutes() r := s.GenerateRoutes()
@ -1051,24 +1052,31 @@ func Serve(ln net.Listener) error {
go func() { go func() {
<-signals <-signals
srvr.Close() srvr.Close()
done() schedDone()
sched.unloadAllRunners() sched.unloadAllRunners()
gpu.Cleanup() gpu.Cleanup()
os.Exit(0) done()
}() }()
if err := llm.Init(); err != nil { if err := llm.Init(); err != nil {
return fmt.Errorf("unable to initialize llm library %w", err) return fmt.Errorf("unable to initialize llm library %w", err)
} }
s.sched.Run(ctx) s.sched.Run(schedCtx)
// At startup we retrieve GPU information so we can get log messages before loading a model // At startup we retrieve GPU information so we can get log messages before loading a model
// This will log warnings to the log in case we have problems with detected GPUs // This will log warnings to the log in case we have problems with detected GPUs
gpus := gpu.GetGPUInfo() gpus := gpu.GetGPUInfo()
gpus.LogDetails() gpus.LogDetails()
return srvr.Serve(ln) err = srvr.Serve(ln)
// If server is closed from the signal handler, wait for the ctx to be done
// otherwise error out quickly
if !errors.Is(err, http.ErrServerClosed) {
return err
}
<-ctx.Done()
return err
} }
func waitForStream(c *gin.Context, ch chan interface{}) { func waitForStream(c *gin.Context, ch chan interface{}) {