Fix race in shutdown logic
Ensure the runners are terminated
This commit is contained in:
parent
83d6d46e29
commit
3ae2f441e0
1 changed files with 13 additions and 5 deletions
|
@ -1036,7 +1036,8 @@ func Serve(ln net.Listener) error {
|
|||
}
|
||||
|
||||
ctx, done := context.WithCancel(context.Background())
|
||||
sched := InitScheduler(ctx)
|
||||
schedCtx, schedDone := context.WithCancel(ctx)
|
||||
sched := InitScheduler(schedCtx)
|
||||
s := &Server{addr: ln.Addr(), sched: sched}
|
||||
r := s.GenerateRoutes()
|
||||
|
||||
|
@ -1051,24 +1052,31 @@ func Serve(ln net.Listener) error {
|
|||
go func() {
|
||||
<-signals
|
||||
srvr.Close()
|
||||
done()
|
||||
schedDone()
|
||||
sched.unloadAllRunners()
|
||||
gpu.Cleanup()
|
||||
os.Exit(0)
|
||||
done()
|
||||
}()
|
||||
|
||||
if err := llm.Init(); err != nil {
|
||||
return fmt.Errorf("unable to initialize llm library %w", err)
|
||||
}
|
||||
|
||||
s.sched.Run(ctx)
|
||||
s.sched.Run(schedCtx)
|
||||
|
||||
// At startup we retrieve GPU information so we can get log messages before loading a model
|
||||
// This will log warnings to the log in case we have problems with detected GPUs
|
||||
gpus := gpu.GetGPUInfo()
|
||||
gpus.LogDetails()
|
||||
|
||||
return srvr.Serve(ln)
|
||||
err = srvr.Serve(ln)
|
||||
// If server is closed from the signal handler, wait for the ctx to be done
|
||||
// otherwise error out quickly
|
||||
if !errors.Is(err, http.ErrServerClosed) {
|
||||
return err
|
||||
}
|
||||
<-ctx.Done()
|
||||
return err
|
||||
}
|
||||
|
||||
func waitForStream(c *gin.Context, ch chan interface{}) {
|
||||
|
|
Loading…
Reference in a new issue