ollama/llm/status.go
Daniel Hiltgen 58d95cc9bd Switch back to subprocessing for llama.cpp
This should resolve a number of memory leak and stability defects by allowing
us to isolate llama.cpp in a separate process and shutdown when idle, and
gracefully restart if it has problems.  This also serves as a first step to be
able to run multiple copies to support multiple models concurrently.
2024-04-01 16:48:18 -07:00

42 lines
810 B
Go

package llm
import (
"bytes"
"os"
)
// StatusWriter is a writer that captures error messages from the llama runner process
type StatusWriter struct {
LastErrMsg string
out *os.File
}
func NewStatusWriter(out *os.File) *StatusWriter {
return &StatusWriter{
out: out,
}
}
// TODO - regex matching to detect errors like
// libcublasLt.so.11: cannot open shared object file: No such file or directory
var errorPrefixes = []string{
"error:",
"CUDA error",
"cudaMalloc failed",
"\"ERR\"",
}
func (w *StatusWriter) Write(b []byte) (int, error) {
var errMsg string
for _, prefix := range errorPrefixes {
if _, after, ok := bytes.Cut(b, []byte(prefix)); ok {
errMsg = prefix + string(bytes.TrimSpace(after))
}
}
if errMsg != "" {
w.LastErrMsg = errMsg
}
return w.out.Write(b)
}