From 92c81e8117c5b2b81467798fe0d1187927d9002e Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 28 May 2024 08:56:18 -0700 Subject: [PATCH] Give the final model loading more time On some systems, 1 minute isn't sufficient to finish the load after it hits 100% This creates 2 distinct timers, although they're both set to the same value for now so we can refine the timeouts further. --- llm/server.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/llm/server.go b/llm/server.go index cf75de90..462f8484 100644 --- a/llm/server.go +++ b/llm/server.go @@ -519,11 +519,13 @@ func (s *llmServer) Ping(ctx context.Context) error { func (s *llmServer) WaitUntilRunning(ctx context.Context) error { start := time.Now() - stallDuration := 60 * time.Second - stallTimer := time.Now().Add(stallDuration) // give up if we stall for + stallDuration := 5 * time.Minute // If no progress happens + finalLoadDuration := 5 * time.Minute // After we hit 100%, give the runner more time to come online + stallTimer := time.Now().Add(stallDuration) // give up if we stall slog.Info("waiting for llama runner to start responding") var lastStatus ServerStatus = -1 + fullyLoaded := false for { select { @@ -572,6 +574,10 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error { if priorProgress != s.loadProgress { slog.Debug(fmt.Sprintf("model load progress %0.2f", s.loadProgress)) stallTimer = time.Now().Add(stallDuration) + } else if !fullyLoaded && int(s.loadProgress*100.0) >= 100 { + slog.Debug("model load completed, waiting for server to become available", "status", status.ToString()) + stallTimer = time.Now().Add(finalLoadDuration) + fullyLoaded = true } time.Sleep(time.Millisecond * 250) continue