Merge pull request #4031 from MarkWard0110/fix/issue-3736

Fix/issue 3736: When runners are closing or expiring. Scheduler is getting dirty VRAM size readings.
This commit is contained in:
Daniel Hiltgen 2024-05-01 12:13:26 -07:00 committed by GitHub
commit 4fd064bea6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 18 additions and 8 deletions

3
.gitignore vendored
View file

@ -11,4 +11,5 @@ ggml-metal.metal
.idea .idea
test_data test_data
*.crt *.crt
llm/build llm/build
__debug_bin*

View file

@ -300,12 +300,6 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
continue continue
} }
// reap subprocess when it exits
go func() {
// Exit status managed via getServerStatus
_ = s.cmd.Wait()
}()
// TODO - make sure this is all wired up correctly // TODO - make sure this is all wired up correctly
// if err = s.WaitUntilRunning(); err != nil { // if err = s.WaitUntilRunning(); err != nil {
// slog.Error("error starting llama server", "server", servers[i], "error", err) // slog.Error("error starting llama server", "server", servers[i], "error", err)
@ -899,7 +893,13 @@ func (s *llmServer) Detokenize(ctx context.Context, tokens []int) (string, error
func (s *llmServer) Close() error { func (s *llmServer) Close() error {
if s.cmd != nil { if s.cmd != nil {
slog.Debug("stopping llama server") slog.Debug("stopping llama server")
return s.cmd.Process.Kill() if err := s.cmd.Process.Kill(); err != nil {
return err
}
_ = s.cmd.Wait()
slog.Debug("llama server stopped")
} }
return nil return nil

View file

@ -250,6 +250,7 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
defer runner.refMu.Unlock() defer runner.refMu.Unlock()
if runner.expireTimer != nil { if runner.expireTimer != nil {
runner.expireTimer.Stop() runner.expireTimer.Stop()
runner.expireTimer = nil
} }
s.expiredCh <- runner s.expiredCh <- runner
}) })
@ -296,6 +297,10 @@ func (pending *LlmRequest) useLoadedRunner(runner *runnerRef, finished chan *Llm
runner.refMu.Lock() runner.refMu.Lock()
defer runner.refMu.Unlock() defer runner.refMu.Unlock()
runner.refCount++ runner.refCount++
if runner.expireTimer != nil {
runner.expireTimer.Stop()
runner.expireTimer = nil
}
runner.sessionDuration = pending.sessionDuration runner.sessionDuration = pending.sessionDuration
pending.successCh <- runner pending.successCh <- runner
go func() { go func() {
@ -426,6 +431,10 @@ type runnerRef struct {
// The refMu must already be held when calling unload // The refMu must already be held when calling unload
func (runner *runnerRef) unload() { func (runner *runnerRef) unload() {
if runner.expireTimer != nil {
runner.expireTimer.Stop()
runner.expireTimer = nil
}
if runner.llama != nil { if runner.llama != nil {
runner.llama.Close() runner.llama.Close()
} }