relay model runner error message to client (#720)
* give direction to user when runner fails * also relay errors from timeout * increase timeout to 3 minutes
This commit is contained in:
parent
92578798bb
commit
56497663c8
2 changed files with 42 additions and 15 deletions
13
cmd/cmd.go
13
cmd/cmd.go
|
@ -479,18 +479,7 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := client.Generate(cancelCtx, &request, fn); err != nil {
|
if err := client.Generate(cancelCtx, &request, fn); err != nil {
|
||||||
if strings.Contains(err.Error(), "failed to load model") {
|
if strings.Contains(err.Error(), "context canceled") && abort {
|
||||||
// tell the user to check the server log, if it exists locally
|
|
||||||
home, nestedErr := os.UserHomeDir()
|
|
||||||
if nestedErr != nil {
|
|
||||||
// return the original error
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
logPath := filepath.Join(home, ".ollama", "logs", "server.log")
|
|
||||||
if _, nestedErr := os.Stat(logPath); nestedErr == nil {
|
|
||||||
err = fmt.Errorf("%w\nFor more details, check the error logs at %s", err, logPath)
|
|
||||||
}
|
|
||||||
} else if strings.Contains(err.Error(), "context canceled") && abort {
|
|
||||||
spinner.Finish()
|
spinner.Finish()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
44
llm/llama.go
44
llm/llama.go
|
@ -248,6 +248,25 @@ func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// StatusWriter is a writer that captures error messages from the llama runner process
|
||||||
|
type StatusWriter struct {
|
||||||
|
ErrCh chan error
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewStatusWriter() *StatusWriter {
|
||||||
|
return &StatusWriter{
|
||||||
|
ErrCh: make(chan error, 1),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *StatusWriter) Write(b []byte) (int, error) {
|
||||||
|
if _, after, ok := bytes.Cut(b, []byte("error:")); ok {
|
||||||
|
err := fmt.Errorf("llama runner: %s", after)
|
||||||
|
w.ErrCh <- err
|
||||||
|
}
|
||||||
|
return os.Stderr.Write(b)
|
||||||
|
}
|
||||||
|
|
||||||
func newLlama(model string, adapters []string, runners []ModelRunner, numLayers int64, opts api.Options) (*llama, error) {
|
func newLlama(model string, adapters []string, runners []ModelRunner, numLayers int64, opts api.Options) (*llama, error) {
|
||||||
fileInfo, err := os.Stat(model)
|
fileInfo, err := os.Stat(model)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -294,6 +313,8 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
|
||||||
params = append(params, "--numa")
|
params = append(params, "--numa")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var runnerErr error
|
||||||
|
|
||||||
// start the llama.cpp server with a retry in case the port is already in use
|
// start the llama.cpp server with a retry in case the port is already in use
|
||||||
for _, runner := range runners {
|
for _, runner := range runners {
|
||||||
if _, err := os.Stat(runner.Path); err != nil {
|
if _, err := os.Stat(runner.Path); err != nil {
|
||||||
|
@ -310,7 +331,8 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
|
||||||
)
|
)
|
||||||
cmd.Env = append(os.Environ(), fmt.Sprintf("LD_LIBRARY_PATH=%s", filepath.Dir(runner.Path)))
|
cmd.Env = append(os.Environ(), fmt.Sprintf("LD_LIBRARY_PATH=%s", filepath.Dir(runner.Path)))
|
||||||
cmd.Stdout = os.Stderr
|
cmd.Stdout = os.Stderr
|
||||||
cmd.Stderr = os.Stderr
|
statusWriter := NewStatusWriter()
|
||||||
|
cmd.Stderr = statusWriter
|
||||||
|
|
||||||
llm := &llama{Options: opts, Running: Running{Port: port, Cmd: cmd, Cancel: cancel, exitCh: make(chan error)}}
|
llm := &llama{Options: opts, Running: Running{Port: port, Cmd: cmd, Cancel: cancel, exitCh: make(chan error)}}
|
||||||
|
|
||||||
|
@ -333,6 +355,17 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
|
||||||
if err := waitForServer(llm); err != nil {
|
if err := waitForServer(llm); err != nil {
|
||||||
log.Printf("error starting llama runner: %v", err)
|
log.Printf("error starting llama runner: %v", err)
|
||||||
llm.Close()
|
llm.Close()
|
||||||
|
|
||||||
|
// default the runnerErr to the error returned by the most recent llama runner process
|
||||||
|
runnerErr = err
|
||||||
|
|
||||||
|
// capture the error directly from the runner process, if any
|
||||||
|
select {
|
||||||
|
case runnerErr = <-statusWriter.ErrCh:
|
||||||
|
default:
|
||||||
|
// the runner process probably timed out
|
||||||
|
}
|
||||||
|
|
||||||
// try again
|
// try again
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -341,12 +374,17 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
|
||||||
return llm, nil
|
return llm, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if runnerErr != nil {
|
||||||
|
// this is the error returned from the llama runner process that failed most recently
|
||||||
|
return nil, runnerErr
|
||||||
|
}
|
||||||
|
|
||||||
return nil, fmt.Errorf("failed to start a llama runner")
|
return nil, fmt.Errorf("failed to start a llama runner")
|
||||||
}
|
}
|
||||||
|
|
||||||
func waitForServer(llm *llama) error {
|
func waitForServer(llm *llama) error {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
expiresAt := time.Now().Add(2 * time.Minute) // be generous with timeout, large models can take a while to load
|
expiresAt := time.Now().Add(3 * time.Minute) // be generous with timeout, large models can take a while to load
|
||||||
ticker := time.NewTicker(200 * time.Millisecond)
|
ticker := time.NewTicker(200 * time.Millisecond)
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
@ -359,7 +397,7 @@ func waitForServer(llm *llama) error {
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
if time.Now().After(expiresAt) {
|
if time.Now().After(expiresAt) {
|
||||||
// timeout
|
// timeout
|
||||||
return fmt.Errorf("llama runner did not start within allotted time, retrying")
|
return fmt.Errorf("timed out waiting for llama runner to start")
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := llm.Ping(context.Background()); err == nil {
|
if err := llm.Ping(context.Background()); err == nil {
|
||||||
|
|
Loading…
Reference in a new issue