Fix race in shutdown logic

Ensure the runners are terminated
2024-05-09 15:47:02 -07:00 · 2024-05-09 15:47:02 -07:00 · 3ae2f441e0
commit 3ae2f441e0
parent 83d6d46e29
1 changed files with 13 additions and 5 deletions
--- a/server/routes.go
+++ b/server/routes.go
@ -1036,7 +1036,8 @@ func Serve(ln net.Listener) error {
 	}
 	ctx, done := context.WithCancel(context.Background())
-	sched := InitScheduler(ctx)
+	schedCtx, schedDone := context.WithCancel(ctx)
 	sched := InitScheduler(schedCtx)
 	s := &Server{addr: ln.Addr(), sched: sched}
 	r := s.GenerateRoutes()
@ -1051,24 +1052,31 @@ func Serve(ln net.Listener) error {
 	go func() {
 		<-signals
 		srvr.Close()
-		done()
+		schedDone()
 		sched.unloadAllRunners()
 		gpu.Cleanup()
-		os.Exit(0)
+		done()
 	}()
 	if err := llm.Init(); err != nil {
 		return fmt.Errorf("unable to initialize llm library %w", err)
 	}
-	s.sched.Run(ctx)
+	s.sched.Run(schedCtx)
 	// At startup we retrieve GPU information so we can get log messages before loading a model
 	// This will log warnings to the log in case we have problems with detected GPUs
 	gpus := gpu.GetGPUInfo()
 	gpus.LogDetails()
-	return srvr.Serve(ln)
+	err = srvr.Serve(ln)
 	// If server is closed from the signal handler, wait for the ctx to be done
 	// otherwise error out quickly
 	if !errors.Is(err, http.ErrServerClosed) {
 		return err
 	}
 	<-ctx.Done()
 	return err
 }
 func waitForStream(c *gin.Context, ch chan interface{}) {