prevent waiting on exited command (#752)

* prevent waiting on exited command * close llama runner once
2023-10-11 12:32:13 -04:00 · 2023-10-11 12:32:13 -04:00 · 77295f716e
commit 77295f716e
parent f2ba1311aa
1 changed files with 37 additions and 27 deletions
--- a/llm/llama.go
+++ b/llm/llama.go
@ -20,6 +20,7 @@ import (
 	"runtime"
 	"strconv"
 	"strings"
 	"sync"
 	"time"
 	"github.com/jmorganca/ollama/api"
@ -177,9 +178,12 @@ type llamaHyperparameters struct {
 }
 type Running struct {
-	Port   int
+	Port     int
-	Cmd    *exec.Cmd
+	Cmd      *exec.Cmd
-	Cancel context.CancelFunc
+	Cancel   context.CancelFunc
 	exitOnce sync.Once
 	exitCh   chan error // channel to receive the exit status of the subprocess
 	exitErr  error      // error returned by the subprocess
 }
 type llama struct {
@ -308,7 +312,7 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
 		cmd.Stdout = os.Stderr
 		cmd.Stderr = os.Stderr
-		llm := &llama{Options: opts, Running: Running{Port: port, Cmd: cmd, Cancel: cancel}}
+		llm := &llama{Options: opts, Running: Running{Port: port, Cmd: cmd, Cancel: cancel, exitCh: make(chan error)}}
 		log.Print("starting llama runner")
 		if err := llm.Cmd.Start(); err != nil {
@ -316,14 +320,14 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
 			continue
 		}
-		// monitor the command, it is blocking, so if it exits we need to capture that
+		// monitor the llama runner process and signal when it exits
 		go func() {
-			err := llm.Cmd.Wait() // this will block until the command exits
+			err := llm.Cmd.Wait()
-			if err != nil {
+			llm.exitErr = err
-				log.Printf("llama runner exited with error: %v", err)
+			// llm.Cmd.Wait() can only be called once, use this exit channel to signal that the process has exited
-			} else {
+			llm.exitOnce.Do(func() {
-				log.Printf("llama runner exited")
+				close(llm.exitCh)
-			}
+			})
 		}()
 		if err := waitForServer(llm); err != nil {
@ -341,29 +345,30 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
 }
 func waitForServer(llm *llama) error {
 	// wait for the server to start responding
 	start := time.Now()
 	expiresAt := time.Now().Add(2 * time.Minute) // be generous with timeout, large models can take a while to load
 	ticker := time.NewTicker(200 * time.Millisecond)
 	defer ticker.Stop()
 	log.Print("waiting for llama runner to start responding")
-	for range ticker.C {
+	for {
-		if time.Now().After(expiresAt) {
+		select {
-			return fmt.Errorf("llama runner did not start within alloted time, retrying")
+		case <-llm.exitCh:
-		}
+			// failed to start subprocess
 		// check if the server process has terminated
 		if llm.Cmd.ProcessState != nil && llm.Cmd.ProcessState.Exited() {
 			return fmt.Errorf("llama runner process has terminated")
-		}
+		case <-ticker.C:
 			if time.Now().After(expiresAt) {
 				// timeout
 				return fmt.Errorf("llama runner did not start within allotted time, retrying")
 			}
-		if err := llm.Ping(context.Background()); err == nil {
+			if err := llm.Ping(context.Background()); err == nil {
-			break
+				// success
 				log.Printf("llama runner started in %f seconds", time.Since(start).Seconds())
 				return nil
 			}
 		}
 	}
 	log.Printf("llama runner started in %f seconds", time.Since(start).Seconds())
 	return nil
 }
 func (llm *llama) Close() {
@ -371,8 +376,13 @@ func (llm *llama) Close() {
 	llm.Cancel()
 	// wait for the command to exit to prevent race conditions with the next run
-	if err := llm.Cmd.Wait(); err != nil {
+	<-llm.exitCh
-		log.Printf("llama runner exited: %v", err)
+	err := llm.exitErr
 	if err != nil {
 		log.Printf("llama runner stopped with error: %v", err)
 	} else {
 		log.Print("llama runner stopped successfully")
 	}
 }