diff --git a/llm/server.go b/llm/server.go
index cf75de90..462f8484 100644
--- a/llm/server.go
+++ b/llm/server.go
@@ -519,11 +519,13 @@ func (s *llmServer) Ping(ctx context.Context) error {
 
 func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
 	start := time.Now()
-	stallDuration := 60 * time.Second
-	stallTimer := time.Now().Add(stallDuration) // give up if we stall for
+	stallDuration := 5 * time.Minute            // If no progress happens
+	finalLoadDuration := 5 * time.Minute        // After we hit 100%, give the runner more time to come online
+	stallTimer := time.Now().Add(stallDuration) // give up if we stall
 
 	slog.Info("waiting for llama runner to start responding")
 	var lastStatus ServerStatus = -1
+	fullyLoaded := false
 
 	for {
 		select {
@@ -572,6 +574,10 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
 			if priorProgress != s.loadProgress {
 				slog.Debug(fmt.Sprintf("model load progress %0.2f", s.loadProgress))
 				stallTimer = time.Now().Add(stallDuration)
+			} else if !fullyLoaded && int(s.loadProgress*100.0) >= 100 {
+				slog.Debug("model load completed, waiting for server to become available", "status", status.ToString())
+				stallTimer = time.Now().Add(finalLoadDuration)
+				fullyLoaded = true
 			}
 			time.Sleep(time.Millisecond * 250)
 			continue