From 92c81e8117c5b2b81467798fe0d1187927d9002e Mon Sep 17 00:00:00 2001
From: Daniel Hiltgen <daniel@ollama.com>
Date: Tue, 28 May 2024 08:56:18 -0700
Subject: [PATCH] Give the final model loading more time

On some systems, 1 minute isn't sufficient to finish the load after it
hits 100% This creates 2 distinct timers, although they're both set to
the same value for now so we can refine the timeouts further.
---
 llm/server.go | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/llm/server.go b/llm/server.go
index cf75de90..462f8484 100644
--- a/llm/server.go
+++ b/llm/server.go
@@ -519,11 +519,13 @@ func (s *llmServer) Ping(ctx context.Context) error {
 
 func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
 	start := time.Now()
-	stallDuration := 60 * time.Second
-	stallTimer := time.Now().Add(stallDuration) // give up if we stall for
+	stallDuration := 5 * time.Minute            // If no progress happens
+	finalLoadDuration := 5 * time.Minute        // After we hit 100%, give the runner more time to come online
+	stallTimer := time.Now().Add(stallDuration) // give up if we stall
 
 	slog.Info("waiting for llama runner to start responding")
 	var lastStatus ServerStatus = -1
+	fullyLoaded := false
 
 	for {
 		select {
@@ -572,6 +574,10 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
 			if priorProgress != s.loadProgress {
 				slog.Debug(fmt.Sprintf("model load progress %0.2f", s.loadProgress))
 				stallTimer = time.Now().Add(stallDuration)
+			} else if !fullyLoaded && int(s.loadProgress*100.0) >= 100 {
+				slog.Debug("model load completed, waiting for server to become available", "status", status.ToString())
+				stallTimer = time.Now().Add(finalLoadDuration)
+				fullyLoaded = true
 			}
 			time.Sleep(time.Millisecond * 250)
 			continue