prevent waiting on exited command (#752)
* prevent waiting on exited command * close llama runner once
This commit is contained in:
parent
f2ba1311aa
commit
77295f716e
1 changed files with 37 additions and 27 deletions
54
llm/llama.go
54
llm/llama.go
|
@ -20,6 +20,7 @@ import (
|
||||||
"runtime"
|
"runtime"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/jmorganca/ollama/api"
|
"github.com/jmorganca/ollama/api"
|
||||||
|
@ -180,6 +181,9 @@ type Running struct {
|
||||||
Port int
|
Port int
|
||||||
Cmd *exec.Cmd
|
Cmd *exec.Cmd
|
||||||
Cancel context.CancelFunc
|
Cancel context.CancelFunc
|
||||||
|
exitOnce sync.Once
|
||||||
|
exitCh chan error // channel to receive the exit status of the subprocess
|
||||||
|
exitErr error // error returned by the subprocess
|
||||||
}
|
}
|
||||||
|
|
||||||
type llama struct {
|
type llama struct {
|
||||||
|
@ -308,7 +312,7 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
|
||||||
cmd.Stdout = os.Stderr
|
cmd.Stdout = os.Stderr
|
||||||
cmd.Stderr = os.Stderr
|
cmd.Stderr = os.Stderr
|
||||||
|
|
||||||
llm := &llama{Options: opts, Running: Running{Port: port, Cmd: cmd, Cancel: cancel}}
|
llm := &llama{Options: opts, Running: Running{Port: port, Cmd: cmd, Cancel: cancel, exitCh: make(chan error)}}
|
||||||
|
|
||||||
log.Print("starting llama runner")
|
log.Print("starting llama runner")
|
||||||
if err := llm.Cmd.Start(); err != nil {
|
if err := llm.Cmd.Start(); err != nil {
|
||||||
|
@ -316,14 +320,14 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// monitor the command, it is blocking, so if it exits we need to capture that
|
// monitor the llama runner process and signal when it exits
|
||||||
go func() {
|
go func() {
|
||||||
err := llm.Cmd.Wait() // this will block until the command exits
|
err := llm.Cmd.Wait()
|
||||||
if err != nil {
|
llm.exitErr = err
|
||||||
log.Printf("llama runner exited with error: %v", err)
|
// llm.Cmd.Wait() can only be called once, use this exit channel to signal that the process has exited
|
||||||
} else {
|
llm.exitOnce.Do(func() {
|
||||||
log.Printf("llama runner exited")
|
close(llm.exitCh)
|
||||||
}
|
})
|
||||||
}()
|
}()
|
||||||
|
|
||||||
if err := waitForServer(llm); err != nil {
|
if err := waitForServer(llm); err != nil {
|
||||||
|
@ -341,29 +345,30 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
|
||||||
}
|
}
|
||||||
|
|
||||||
func waitForServer(llm *llama) error {
|
func waitForServer(llm *llama) error {
|
||||||
// wait for the server to start responding
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
expiresAt := time.Now().Add(2 * time.Minute) // be generous with timeout, large models can take a while to load
|
expiresAt := time.Now().Add(2 * time.Minute) // be generous with timeout, large models can take a while to load
|
||||||
ticker := time.NewTicker(200 * time.Millisecond)
|
ticker := time.NewTicker(200 * time.Millisecond)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
log.Print("waiting for llama runner to start responding")
|
log.Print("waiting for llama runner to start responding")
|
||||||
for range ticker.C {
|
for {
|
||||||
if time.Now().After(expiresAt) {
|
select {
|
||||||
return fmt.Errorf("llama runner did not start within alloted time, retrying")
|
case <-llm.exitCh:
|
||||||
}
|
// failed to start subprocess
|
||||||
|
|
||||||
// check if the server process has terminated
|
|
||||||
if llm.Cmd.ProcessState != nil && llm.Cmd.ProcessState.Exited() {
|
|
||||||
return fmt.Errorf("llama runner process has terminated")
|
return fmt.Errorf("llama runner process has terminated")
|
||||||
|
case <-ticker.C:
|
||||||
|
if time.Now().After(expiresAt) {
|
||||||
|
// timeout
|
||||||
|
return fmt.Errorf("llama runner did not start within allotted time, retrying")
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := llm.Ping(context.Background()); err == nil {
|
if err := llm.Ping(context.Background()); err == nil {
|
||||||
break
|
// success
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Printf("llama runner started in %f seconds", time.Since(start).Seconds())
|
log.Printf("llama runner started in %f seconds", time.Since(start).Seconds())
|
||||||
return nil
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *llama) Close() {
|
func (llm *llama) Close() {
|
||||||
|
@ -371,8 +376,13 @@ func (llm *llama) Close() {
|
||||||
llm.Cancel()
|
llm.Cancel()
|
||||||
|
|
||||||
// wait for the command to exit to prevent race conditions with the next run
|
// wait for the command to exit to prevent race conditions with the next run
|
||||||
if err := llm.Cmd.Wait(); err != nil {
|
<-llm.exitCh
|
||||||
log.Printf("llama runner exited: %v", err)
|
err := llm.exitErr
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("llama runner stopped with error: %v", err)
|
||||||
|
} else {
|
||||||
|
log.Print("llama runner stopped successfully")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue