From c5ff443b9f7264d0973dcc2ce671d0ff174cc34f Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 9 Apr 2024 16:35:10 -0700 Subject: [PATCH] Handle very slow model loads During testing, we're seeing some models take over 3 minutes. --- llm/server.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llm/server.go b/llm/server.go index 0e084d5a..160effe7 100644 --- a/llm/server.go +++ b/llm/server.go @@ -383,7 +383,8 @@ func (s *LlamaServer) Ping(ctx context.Context) error { func (s *LlamaServer) waitUntilRunning() error { start := time.Now() - expiresAt := time.Now().Add(3 * time.Minute) // be generous with timeout, large models can take a while to load + // TODO we need to wire up a better way to detect hangs during model load and startup of the server + expiresAt := time.Now().Add(10 * time.Minute) // be generous with timeout, large models can take a while to load ticker := time.NewTicker(50 * time.Millisecond) defer ticker.Stop()