Merge pull request #4031 from MarkWard0110/fix/issue-3736
Fix/issue 3736: When runners are closing or expiring. Scheduler is getting dirty VRAM size readings.
This commit is contained in:
commit
4fd064bea6
3 changed files with 18 additions and 8 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -11,4 +11,5 @@ ggml-metal.metal
|
||||||
.idea
|
.idea
|
||||||
test_data
|
test_data
|
||||||
*.crt
|
*.crt
|
||||||
llm/build
|
llm/build
|
||||||
|
__debug_bin*
|
|
@ -300,12 +300,6 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// reap subprocess when it exits
|
|
||||||
go func() {
|
|
||||||
// Exit status managed via getServerStatus
|
|
||||||
_ = s.cmd.Wait()
|
|
||||||
}()
|
|
||||||
|
|
||||||
// TODO - make sure this is all wired up correctly
|
// TODO - make sure this is all wired up correctly
|
||||||
// if err = s.WaitUntilRunning(); err != nil {
|
// if err = s.WaitUntilRunning(); err != nil {
|
||||||
// slog.Error("error starting llama server", "server", servers[i], "error", err)
|
// slog.Error("error starting llama server", "server", servers[i], "error", err)
|
||||||
|
@ -899,7 +893,13 @@ func (s *llmServer) Detokenize(ctx context.Context, tokens []int) (string, error
|
||||||
func (s *llmServer) Close() error {
|
func (s *llmServer) Close() error {
|
||||||
if s.cmd != nil {
|
if s.cmd != nil {
|
||||||
slog.Debug("stopping llama server")
|
slog.Debug("stopping llama server")
|
||||||
return s.cmd.Process.Kill()
|
if err := s.cmd.Process.Kill(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
_ = s.cmd.Wait()
|
||||||
|
|
||||||
|
slog.Debug("llama server stopped")
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|
|
@ -250,6 +250,7 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
|
||||||
defer runner.refMu.Unlock()
|
defer runner.refMu.Unlock()
|
||||||
if runner.expireTimer != nil {
|
if runner.expireTimer != nil {
|
||||||
runner.expireTimer.Stop()
|
runner.expireTimer.Stop()
|
||||||
|
runner.expireTimer = nil
|
||||||
}
|
}
|
||||||
s.expiredCh <- runner
|
s.expiredCh <- runner
|
||||||
})
|
})
|
||||||
|
@ -296,6 +297,10 @@ func (pending *LlmRequest) useLoadedRunner(runner *runnerRef, finished chan *Llm
|
||||||
runner.refMu.Lock()
|
runner.refMu.Lock()
|
||||||
defer runner.refMu.Unlock()
|
defer runner.refMu.Unlock()
|
||||||
runner.refCount++
|
runner.refCount++
|
||||||
|
if runner.expireTimer != nil {
|
||||||
|
runner.expireTimer.Stop()
|
||||||
|
runner.expireTimer = nil
|
||||||
|
}
|
||||||
runner.sessionDuration = pending.sessionDuration
|
runner.sessionDuration = pending.sessionDuration
|
||||||
pending.successCh <- runner
|
pending.successCh <- runner
|
||||||
go func() {
|
go func() {
|
||||||
|
@ -426,6 +431,10 @@ type runnerRef struct {
|
||||||
|
|
||||||
// The refMu must already be held when calling unload
|
// The refMu must already be held when calling unload
|
||||||
func (runner *runnerRef) unload() {
|
func (runner *runnerRef) unload() {
|
||||||
|
if runner.expireTimer != nil {
|
||||||
|
runner.expireTimer.Stop()
|
||||||
|
runner.expireTimer = nil
|
||||||
|
}
|
||||||
if runner.llama != nil {
|
if runner.llama != nil {
|
||||||
runner.llama.Close()
|
runner.llama.Close()
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue