Restart server on failure when running Windows app (#3985)

* app: restart server on failure

* fix linter

* address comments

* refactor log directory creation to be where logs are written

* check all log dir creation errors
This commit is contained in:
Jeffrey Morgan 2024-04-29 10:07:52 -04:00 committed by GitHub
parent 7aa08a77ca
commit 95ead8ffba
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -43,37 +43,36 @@ func getCLIFullPath(command string) string {
return command return command
} }
func SpawnServer(ctx context.Context, command string) (chan int, error) { func start(ctx context.Context, command string) (*exec.Cmd, error) {
done := make(chan int)
logDir := filepath.Dir(ServerLogFile)
_, err := os.Stat(logDir)
if errors.Is(err, os.ErrNotExist) {
if err := os.MkdirAll(logDir, 0o755); err != nil {
return done, fmt.Errorf("create ollama server log dir %s: %v", logDir, err)
}
}
cmd := getCmd(ctx, getCLIFullPath(command)) cmd := getCmd(ctx, getCLIFullPath(command))
// send stdout and stderr to a file
stdout, err := cmd.StdoutPipe() stdout, err := cmd.StdoutPipe()
if err != nil { if err != nil {
return done, fmt.Errorf("failed to spawn server stdout pipe %s", err) return nil, fmt.Errorf("failed to spawn server stdout pipe: %w", err)
} }
stderr, err := cmd.StderrPipe() stderr, err := cmd.StderrPipe()
if err != nil { if err != nil {
return done, fmt.Errorf("failed to spawn server stderr pipe %s", err) return nil, fmt.Errorf("failed to spawn server stderr pipe: %w", err)
}
stdin, err := cmd.StdinPipe()
if err != nil {
return done, fmt.Errorf("failed to spawn server stdin pipe %s", err)
} }
// TODO - rotation // TODO - rotation
logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755) logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
if err != nil { if err != nil {
return done, fmt.Errorf("failed to create server log %w", err) return nil, fmt.Errorf("failed to create server log: %w", err)
} }
logDir := filepath.Dir(ServerLogFile)
_, err = os.Stat(logDir)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("stat ollama server log dir %s: %v", logDir, err)
}
if err := os.MkdirAll(logDir, 0o755); err != nil {
return nil, fmt.Errorf("create ollama server log dir %s: %v", logDir, err)
}
}
go func() { go func() {
defer logFile.Close() defer logFile.Close()
io.Copy(logFile, stdout) //nolint:errcheck io.Copy(logFile, stdout) //nolint:errcheck
@ -117,19 +116,33 @@ func SpawnServer(ctx context.Context, command string) (chan int, error) {
// run the command and wait for it to finish // run the command and wait for it to finish
if err := cmd.Start(); err != nil { if err := cmd.Start(); err != nil {
return done, fmt.Errorf("failed to start server %w", err) return nil, fmt.Errorf("failed to start server %w", err)
} }
if cmd.Process != nil { if cmd.Process != nil {
slog.Info(fmt.Sprintf("started ollama server with pid %d", cmd.Process.Pid)) slog.Info(fmt.Sprintf("started ollama server with pid %d", cmd.Process.Pid))
} }
slog.Info(fmt.Sprintf("ollama server logs %s", ServerLogFile)) slog.Info(fmt.Sprintf("ollama server logs %s", ServerLogFile))
return cmd, nil
}
func SpawnServer(ctx context.Context, command string) (chan int, error) {
done := make(chan int)
go func() { go func() {
// Keep the server running unless we're shuttind down the app // Keep the server running unless we're shuttind down the app
crashCount := 0 crashCount := 0
for { for {
slog.Info("starting server...")
cmd, err := start(ctx, command)
if err != nil {
crashCount++
slog.Error(fmt.Sprintf("failed to start server %s", err))
time.Sleep(500 * time.Millisecond * time.Duration(crashCount))
continue
}
cmd.Wait() //nolint:errcheck cmd.Wait() //nolint:errcheck
stdin.Close()
var code int var code int
if cmd.ProcessState != nil { if cmd.ProcessState != nil {
code = cmd.ProcessState.ExitCode() code = cmd.ProcessState.ExitCode()
@ -143,15 +156,12 @@ func SpawnServer(ctx context.Context, command string) (chan int, error) {
default: default:
crashCount++ crashCount++
slog.Warn(fmt.Sprintf("server crash %d - exit code %d - respawning", crashCount, code)) slog.Warn(fmt.Sprintf("server crash %d - exit code %d - respawning", crashCount, code))
time.Sleep(500 * time.Millisecond) time.Sleep(500 * time.Millisecond * time.Duration(crashCount))
if err := cmd.Start(); err != nil { break
slog.Error(fmt.Sprintf("failed to restart server %s", err))
// Keep trying, but back off if we keep failing
time.Sleep(time.Duration(crashCount) * time.Second)
}
} }
} }
}() }()
return done, nil return done, nil
} }