relay model runner error message to client (#720)

* give direction to user when runner fails
* also relay errors from timeout
* increase timeout to 3 minutes
This commit is contained in:
Bruce MacDonald 2023-10-12 11:16:37 -04:00 committed by GitHub
parent 92578798bb
commit 56497663c8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 42 additions and 15 deletions

View file

@ -479,18 +479,7 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool) error {
} }
if err := client.Generate(cancelCtx, &request, fn); err != nil { if err := client.Generate(cancelCtx, &request, fn); err != nil {
if strings.Contains(err.Error(), "failed to load model") { if strings.Contains(err.Error(), "context canceled") && abort {
// tell the user to check the server log, if it exists locally
home, nestedErr := os.UserHomeDir()
if nestedErr != nil {
// return the original error
return err
}
logPath := filepath.Join(home, ".ollama", "logs", "server.log")
if _, nestedErr := os.Stat(logPath); nestedErr == nil {
err = fmt.Errorf("%w\nFor more details, check the error logs at %s", err, logPath)
}
} else if strings.Contains(err.Error(), "context canceled") && abort {
spinner.Finish() spinner.Finish()
return nil return nil
} }

View file

@ -248,6 +248,25 @@ func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
return 1 return 1
} }
// StatusWriter is a writer that captures error messages from the llama runner process
type StatusWriter struct {
ErrCh chan error
}
func NewStatusWriter() *StatusWriter {
return &StatusWriter{
ErrCh: make(chan error, 1),
}
}
func (w *StatusWriter) Write(b []byte) (int, error) {
if _, after, ok := bytes.Cut(b, []byte("error:")); ok {
err := fmt.Errorf("llama runner: %s", after)
w.ErrCh <- err
}
return os.Stderr.Write(b)
}
func newLlama(model string, adapters []string, runners []ModelRunner, numLayers int64, opts api.Options) (*llama, error) { func newLlama(model string, adapters []string, runners []ModelRunner, numLayers int64, opts api.Options) (*llama, error) {
fileInfo, err := os.Stat(model) fileInfo, err := os.Stat(model)
if err != nil { if err != nil {
@ -294,6 +313,8 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
params = append(params, "--numa") params = append(params, "--numa")
} }
var runnerErr error
// start the llama.cpp server with a retry in case the port is already in use // start the llama.cpp server with a retry in case the port is already in use
for _, runner := range runners { for _, runner := range runners {
if _, err := os.Stat(runner.Path); err != nil { if _, err := os.Stat(runner.Path); err != nil {
@ -310,7 +331,8 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
) )
cmd.Env = append(os.Environ(), fmt.Sprintf("LD_LIBRARY_PATH=%s", filepath.Dir(runner.Path))) cmd.Env = append(os.Environ(), fmt.Sprintf("LD_LIBRARY_PATH=%s", filepath.Dir(runner.Path)))
cmd.Stdout = os.Stderr cmd.Stdout = os.Stderr
cmd.Stderr = os.Stderr statusWriter := NewStatusWriter()
cmd.Stderr = statusWriter
llm := &llama{Options: opts, Running: Running{Port: port, Cmd: cmd, Cancel: cancel, exitCh: make(chan error)}} llm := &llama{Options: opts, Running: Running{Port: port, Cmd: cmd, Cancel: cancel, exitCh: make(chan error)}}
@ -333,6 +355,17 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
if err := waitForServer(llm); err != nil { if err := waitForServer(llm); err != nil {
log.Printf("error starting llama runner: %v", err) log.Printf("error starting llama runner: %v", err)
llm.Close() llm.Close()
// default the runnerErr to the error returned by the most recent llama runner process
runnerErr = err
// capture the error directly from the runner process, if any
select {
case runnerErr = <-statusWriter.ErrCh:
default:
// the runner process probably timed out
}
// try again // try again
continue continue
} }
@ -341,12 +374,17 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
return llm, nil return llm, nil
} }
if runnerErr != nil {
// this is the error returned from the llama runner process that failed most recently
return nil, runnerErr
}
return nil, fmt.Errorf("failed to start a llama runner") return nil, fmt.Errorf("failed to start a llama runner")
} }
func waitForServer(llm *llama) error { func waitForServer(llm *llama) error {
start := time.Now() start := time.Now()
expiresAt := time.Now().Add(2 * time.Minute) // be generous with timeout, large models can take a while to load expiresAt := time.Now().Add(3 * time.Minute) // be generous with timeout, large models can take a while to load
ticker := time.NewTicker(200 * time.Millisecond) ticker := time.NewTicker(200 * time.Millisecond)
defer ticker.Stop() defer ticker.Stop()
@ -359,7 +397,7 @@ func waitForServer(llm *llama) error {
case <-ticker.C: case <-ticker.C:
if time.Now().After(expiresAt) { if time.Now().After(expiresAt) {
// timeout // timeout
return fmt.Errorf("llama runner did not start within allotted time, retrying") return fmt.Errorf("timed out waiting for llama runner to start")
} }
if err := llm.Ping(context.Background()); err == nil { if err := llm.Ping(context.Background()); err == nil {