diff --git a/llm/llama.go b/llm/llama.go index 26a0d588..ec067194 100644 --- a/llm/llama.go +++ b/llm/llama.go @@ -6,8 +6,12 @@ import ( _ "embed" "errors" "fmt" + "io" + "io/fs" + "log" "os" "os/exec" + "path/filepath" "sync" "time" @@ -116,6 +120,7 @@ type ImageData struct { var ( errNvidiaSMI = errors.New("warning: gpu support may not be enabled, check that you have installed GPU drivers: nvidia-smi command failed") errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only") + payloadMissing = fmt.Errorf("expected payload not included in this build of ollama") ) // StatusWriter is a writer that captures error messages from the llama runner process @@ -202,3 +207,42 @@ type EmbeddingRequest struct { type EmbeddingResponse struct { Embedding []float64 `json:"embedding"` } + +func extractLib(workDir, glob string) error { + files, err := fs.Glob(libEmbed, glob) + if err != nil || len(files) == 0 { + return payloadMissing + } + + if len(files) != 1 { + // Shouldn't happen, but just use the first one we find + log.Printf("WARNING: multiple payloads detected - using %s", files[0]) + } + + srcFile, err := libEmbed.Open(files[0]) + if err != nil { + return fmt.Errorf("read payload %s: %v", files[0], err) + } + defer srcFile.Close() + if err := os.MkdirAll(workDir, 0o755); err != nil { + return fmt.Errorf("create payload temp dir %s: %v", workDir, err) + } + + destFile := filepath.Join(workDir, filepath.Base(files[0])) + + _, err = os.Stat(destFile) + switch { + case errors.Is(err, os.ErrNotExist): + destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) + if err != nil { + return fmt.Errorf("write payload %s: %v", files[0], err) + } + defer destFile.Close() + if _, err := io.Copy(destFile, srcFile); err != nil { + return fmt.Errorf("copy payload %s: %v", files[0], err) + } + case err != nil: + return fmt.Errorf("stat payload %s: %v", files[0], err) + } + return nil +} diff --git a/llm/shim_darwin.go b/llm/shim_darwin.go index adf02108..3cefe4c5 100644 --- a/llm/shim_darwin.go +++ b/llm/shim_darwin.go @@ -1,18 +1,32 @@ package llm import ( + "embed" "fmt" + "log" + "os" "github.com/jmorganca/ollama/api" ) -// no-op stubs for mac +//go:embed llama.cpp/gguf/build/*/bin/ggml-metal.metal +var libEmbed embed.FS func newRocmShimExtServer(model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) { // should never happen... return nil, fmt.Errorf("ROCM GPUs not supported on Mac") } -func nativeInit(workDir string) error { +func nativeInit(workdir string) error { + err := extractLib(workdir, "llama.cpp/gguf/build/*/bin/ggml-metal.metal") + if err != nil { + if err == payloadMissing { + // TODO perhaps consider this a hard failure on arm macs? + log.Printf("ggml-meta.metal payload missing") + return nil + } + return err + } + os.Setenv("GGML_METAL_PATH_RESOURCES", workdir) return nil } diff --git a/llm/shim_ext_server.go b/llm/shim_ext_server.go index 7505adaa..fa841d49 100644 --- a/llm/shim_ext_server.go +++ b/llm/shim_ext_server.go @@ -14,7 +14,6 @@ import ( "embed" "errors" "fmt" - "io" "io/fs" "log" "os" @@ -109,13 +108,15 @@ func (llm *shimExtServer) Close() { } func nativeInit(workdir string) error { - err := extractLib(workdir) + err := extractLib(workdir, "llama.cpp/gguf/build/*/lib/*rocm_server*") if err != nil { - if err == RocmShimMissing { - log.Printf("%s", err) + if err == payloadMissing { + log.Printf("%s", RocmShimMissing) return nil } return err + } else { + ShimPresent = true } // Verify we have permissions - either running as root, or we have group access to the driver @@ -168,44 +169,3 @@ func nativeInit(workdir string) error { } return nil } - -func extractLib(workDir string) error { - files, err := fs.Glob(libEmbed, "llama.cpp/gguf/build/*/lib/*rocm_server*") - if err != nil || len(files) == 0 { - // this is expected, ollama may be compiled without shim library packed in - return RocmShimMissing - } - - if len(files) != 1 { - // Shouldn't happen, but just use the first one we find - log.Printf("WARNING: multiple rocm libraries detected - using %s", files[0]) - } - - srcFile, err := libEmbed.Open(files[0]) - if err != nil { - return fmt.Errorf("read ROCm shim %s: %v", files[0], err) - } - defer srcFile.Close() - if err := os.MkdirAll(workDir, 0o755); err != nil { - return fmt.Errorf("create ROCm shim temp dir %s: %v", workDir, err) - } - - destFile := filepath.Join(workDir, filepath.Base(files[0])) - - _, err = os.Stat(destFile) - switch { - case errors.Is(err, os.ErrNotExist): - destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) - if err != nil { - return fmt.Errorf("write ROCm shim %s: %v", files[0], err) - } - defer destFile.Close() - if _, err := io.Copy(destFile, srcFile); err != nil { - return fmt.Errorf("copy ROCm shim %s: %v", files[0], err) - } - case err != nil: - return fmt.Errorf("stat ROCm shim %s: %v", files[0], err) - } - ShimPresent = true - return nil -}