diff --git a/.gitignore b/.gitignore index e0362a19..0d826ab6 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,5 @@ ggml-metal.metal .idea test_data *.crt -llm/build \ No newline at end of file +llm/build +__debug_bin* \ No newline at end of file diff --git a/llm/server.go b/llm/server.go index 231100a0..b41f393d 100644 --- a/llm/server.go +++ b/llm/server.go @@ -300,12 +300,6 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr continue } - // reap subprocess when it exits - go func() { - // Exit status managed via getServerStatus - _ = s.cmd.Wait() - }() - // TODO - make sure this is all wired up correctly // if err = s.WaitUntilRunning(); err != nil { // slog.Error("error starting llama server", "server", servers[i], "error", err) @@ -899,7 +893,13 @@ func (s *llmServer) Detokenize(ctx context.Context, tokens []int) (string, error func (s *llmServer) Close() error { if s.cmd != nil { slog.Debug("stopping llama server") - return s.cmd.Process.Kill() + if err := s.cmd.Process.Kill(); err != nil { + return err + } + + _ = s.cmd.Wait() + + slog.Debug("llama server stopped") } return nil diff --git a/server/sched.go b/server/sched.go index 0a6738a2..61c5e1b3 100644 --- a/server/sched.go +++ b/server/sched.go @@ -250,6 +250,7 @@ func (s *Scheduler) processCompleted(ctx context.Context) { defer runner.refMu.Unlock() if runner.expireTimer != nil { runner.expireTimer.Stop() + runner.expireTimer = nil } s.expiredCh <- runner }) @@ -296,6 +297,10 @@ func (pending *LlmRequest) useLoadedRunner(runner *runnerRef, finished chan *Llm runner.refMu.Lock() defer runner.refMu.Unlock() runner.refCount++ + if runner.expireTimer != nil { + runner.expireTimer.Stop() + runner.expireTimer = nil + } runner.sessionDuration = pending.sessionDuration pending.successCh <- runner go func() { @@ -426,6 +431,10 @@ type runnerRef struct { // The refMu must already be held when calling unload func (runner *runnerRef) unload() { + if runner.expireTimer != nil { + runner.expireTimer.Stop() + runner.expireTimer = nil + } if runner.llama != nil { runner.llama.Close() }