Streamlined WaitUntilRunning
This commit is contained in:
parent
9df6c85c3a
commit
bd54b08261
1 changed files with 27 additions and 41 deletions
|
@ -381,22 +381,17 @@ func (s *LlamaServer) Ping(ctx context.Context) error {
|
||||||
|
|
||||||
func (s *LlamaServer) WaitUntilRunning() error {
|
func (s *LlamaServer) WaitUntilRunning() error {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
// TODO we need to wire up a better way to detect hangs during model load and startup of the server
|
|
||||||
expiresAt := time.Now().Add(10 * time.Minute) // be generous with timeout, large models can take a while to load
|
expiresAt := time.Now().Add(10 * time.Minute) // be generous with timeout, large models can take a while to load
|
||||||
ticker := time.NewTicker(50 * time.Millisecond)
|
|
||||||
defer ticker.Stop()
|
|
||||||
|
|
||||||
slog.Info("waiting for llama runner to start responding")
|
slog.Info("waiting for llama runner to start responding")
|
||||||
var lastStatus ServerStatus = -1
|
|
||||||
for {
|
for {
|
||||||
select {
|
ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
|
||||||
case err := <-s.done:
|
defer cancel()
|
||||||
msg := ""
|
status, err := s.getServerStatus(ctx)
|
||||||
if s.status != nil && s.status.LastErrMsg != "" {
|
if err != nil {
|
||||||
msg = s.status.LastErrMsg
|
slog.Debug("server not yet available", "error", err)
|
||||||
}
|
}
|
||||||
return fmt.Errorf("llama runner process has terminated: %v %s", err, msg)
|
|
||||||
case <-ticker.C:
|
|
||||||
if time.Now().After(expiresAt) {
|
if time.Now().After(expiresAt) {
|
||||||
// timeout
|
// timeout
|
||||||
msg := ""
|
msg := ""
|
||||||
|
@ -412,25 +407,16 @@ func (s *LlamaServer) WaitUntilRunning() error {
|
||||||
}
|
}
|
||||||
return fmt.Errorf("llama runner process no longer running: %d %s", s.cmd.ProcessState.ExitCode(), msg)
|
return fmt.Errorf("llama runner process no longer running: %d %s", s.cmd.ProcessState.ExitCode(), msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
|
|
||||||
defer cancel()
|
|
||||||
status, err := s.getServerStatus(ctx)
|
|
||||||
if err != nil && lastStatus != status {
|
|
||||||
slog.Debug("server not yet available", "error", err)
|
|
||||||
lastStatus = status
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
switch status {
|
switch status {
|
||||||
case ServerStatusLoadingModel:
|
case ServerStatusLoadingModel:
|
||||||
// TODO - this state never seems to happen with the current server.cpp code (bug?)
|
time.Sleep(time.Millisecond * 250)
|
||||||
// it doesn't respond to the health endpoint until after the model is loaded
|
|
||||||
slog.Debug("loading model")
|
slog.Debug("loading model")
|
||||||
case ServerStatusReady:
|
case ServerStatusReady:
|
||||||
slog.Debug(fmt.Sprintf("llama runner started in %f seconds", time.Since(start).Seconds()))
|
slog.Info(fmt.Sprintf("llama runner started in %0.2f seconds", time.Since(start).Seconds()))
|
||||||
return nil
|
return nil
|
||||||
}
|
default:
|
||||||
|
time.Sleep(time.Millisecond * 250)
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue