Give the final model loading more time
On some systems, 1 minute isn't sufficient to finish the load after it hits 100% This creates 2 distinct timers, although they're both set to the same value for now so we can refine the timeouts further.
This commit is contained in:
parent
9db0996ed4
commit
92c81e8117
1 changed files with 8 additions and 2 deletions
|
@ -519,11 +519,13 @@ func (s *llmServer) Ping(ctx context.Context) error {
|
||||||
|
|
||||||
func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
|
func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
stallDuration := 60 * time.Second
|
stallDuration := 5 * time.Minute // If no progress happens
|
||||||
stallTimer := time.Now().Add(stallDuration) // give up if we stall for
|
finalLoadDuration := 5 * time.Minute // After we hit 100%, give the runner more time to come online
|
||||||
|
stallTimer := time.Now().Add(stallDuration) // give up if we stall
|
||||||
|
|
||||||
slog.Info("waiting for llama runner to start responding")
|
slog.Info("waiting for llama runner to start responding")
|
||||||
var lastStatus ServerStatus = -1
|
var lastStatus ServerStatus = -1
|
||||||
|
fullyLoaded := false
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
|
@ -572,6 +574,10 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
|
||||||
if priorProgress != s.loadProgress {
|
if priorProgress != s.loadProgress {
|
||||||
slog.Debug(fmt.Sprintf("model load progress %0.2f", s.loadProgress))
|
slog.Debug(fmt.Sprintf("model load progress %0.2f", s.loadProgress))
|
||||||
stallTimer = time.Now().Add(stallDuration)
|
stallTimer = time.Now().Add(stallDuration)
|
||||||
|
} else if !fullyLoaded && int(s.loadProgress*100.0) >= 100 {
|
||||||
|
slog.Debug("model load completed, waiting for server to become available", "status", status.ToString())
|
||||||
|
stallTimer = time.Now().Add(finalLoadDuration)
|
||||||
|
fullyLoaded = true
|
||||||
}
|
}
|
||||||
time.Sleep(time.Millisecond * 250)
|
time.Sleep(time.Millisecond * 250)
|
||||||
continue
|
continue
|
||||||
|
|
Loading…
Reference in a new issue