b2799f111b
We update the PATH on windows to get the CLI mapped, but this has an unintended side effect of causing other apps that may use our bundled DLLs to get terminated when we upgrade.
231 lines
5.6 KiB
Go
231 lines
5.6 KiB
Go
package llm
|
|
|
|
import (
|
|
"compress/gzip"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"io/fs"
|
|
"log/slog"
|
|
"os"
|
|
"path/filepath"
|
|
"runtime"
|
|
"slices"
|
|
"strings"
|
|
|
|
"golang.org/x/sync/errgroup"
|
|
|
|
"github.com/ollama/ollama/gpu"
|
|
)
|
|
|
|
var errPayloadMissing = errors.New("expected payloads not included in this build of ollama")
|
|
|
|
func Init() error {
|
|
payloadsDir, err := gpu.PayloadsDir()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if runtime.GOOS != "windows" {
|
|
slog.Info("extracting embedded files", "dir", payloadsDir)
|
|
binGlob := "build/*/*/*/bin/*"
|
|
|
|
// extract server libraries
|
|
err = extractFiles(payloadsDir, binGlob)
|
|
if err != nil {
|
|
return fmt.Errorf("extract binaries: %v", err)
|
|
}
|
|
}
|
|
|
|
var variants []string
|
|
for v := range availableServers() {
|
|
variants = append(variants, v)
|
|
}
|
|
slog.Info(fmt.Sprintf("Dynamic LLM libraries %v", variants))
|
|
slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
|
|
|
|
return nil
|
|
}
|
|
|
|
// binary names may contain an optional variant separated by '_'
|
|
// For example, "ollama_rocm_v6" and "ollama_rocm_v5" or "ollama_cpu" and "ollama_cpu_avx2"
|
|
// Any library without a variant is the lowest common denominator
|
|
func availableServers() map[string]string {
|
|
payloadsDir, err := gpu.PayloadsDir()
|
|
if err != nil {
|
|
slog.Error("payload lookup error", "error", err)
|
|
return nil
|
|
}
|
|
|
|
// glob payloadsDir for files that start with ollama_
|
|
pattern := filepath.Join(payloadsDir, "*", "ollama_*")
|
|
|
|
files, err := filepath.Glob(pattern)
|
|
if err != nil {
|
|
slog.Debug("could not glob", "pattern", pattern, "error", err)
|
|
return nil
|
|
}
|
|
|
|
servers := make(map[string]string)
|
|
for _, file := range files {
|
|
slog.Debug("availableServers : found", "file", file)
|
|
servers[filepath.Base(filepath.Dir(file))] = filepath.Dir(file)
|
|
}
|
|
|
|
return servers
|
|
}
|
|
|
|
// serversForGpu returns a list of compatible servers give the provided GPU
|
|
// info, ordered by performance. assumes Init() has been called
|
|
// TODO - switch to metadata based mapping
|
|
func serversForGpu(info gpu.GpuInfo) []string {
|
|
// glob workDir for files that start with ollama_
|
|
availableServers := availableServers()
|
|
requested := info.Library
|
|
if info.Variant != gpu.CPUCapabilityNone {
|
|
requested += "_" + info.Variant.String()
|
|
}
|
|
|
|
servers := []string{}
|
|
|
|
// exact match first
|
|
for a := range availableServers {
|
|
if a == requested {
|
|
servers = []string{a}
|
|
|
|
if a == "metal" {
|
|
return servers
|
|
}
|
|
|
|
break
|
|
}
|
|
}
|
|
|
|
alt := []string{}
|
|
|
|
// Then for GPUs load alternates and sort the list for consistent load ordering
|
|
if info.Library != "cpu" {
|
|
for a := range availableServers {
|
|
if info.Library == strings.Split(a, "_")[0] && a != requested {
|
|
alt = append(alt, a)
|
|
}
|
|
}
|
|
|
|
slices.Sort(alt)
|
|
servers = append(servers, alt...)
|
|
}
|
|
|
|
// Load up the best CPU variant if not primary requested
|
|
if info.Library != "cpu" {
|
|
variant := gpu.GetCPUCapability()
|
|
// If no variant, then we fall back to default
|
|
// If we have a variant, try that if we find an exact match
|
|
// Attempting to run the wrong CPU instructions will panic the
|
|
// process
|
|
if variant != gpu.CPUCapabilityNone {
|
|
for cmp := range availableServers {
|
|
if cmp == "cpu_"+variant.String() {
|
|
servers = append(servers, cmp)
|
|
break
|
|
}
|
|
}
|
|
} else {
|
|
servers = append(servers, "cpu")
|
|
}
|
|
}
|
|
|
|
if len(servers) == 0 {
|
|
servers = []string{"cpu"}
|
|
}
|
|
|
|
return servers
|
|
}
|
|
|
|
// Return the optimal server for this CPU architecture
|
|
func serverForCpu() string {
|
|
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
|
|
return "metal"
|
|
}
|
|
variant := gpu.GetCPUCapability()
|
|
availableServers := availableServers()
|
|
if variant != gpu.CPUCapabilityNone {
|
|
for cmp := range availableServers {
|
|
if cmp == "cpu_"+variant.String() {
|
|
return cmp
|
|
}
|
|
}
|
|
}
|
|
return "cpu"
|
|
}
|
|
|
|
// extract extracts the embedded files to the target directory
|
|
func extractFiles(targetDir string, glob string) error {
|
|
files, err := fs.Glob(libEmbed, glob)
|
|
if err != nil || len(files) == 0 {
|
|
return errPayloadMissing
|
|
}
|
|
|
|
if err := os.MkdirAll(targetDir, 0o755); err != nil {
|
|
return fmt.Errorf("extractFiles could not mkdir %s: %v", targetDir, err)
|
|
}
|
|
|
|
g := new(errgroup.Group)
|
|
|
|
// build/$OS/$GOARCH/$VARIANT/{bin,lib}/$FILE
|
|
for _, file := range files {
|
|
filename := file
|
|
|
|
variant := filepath.Base(filepath.Dir(filepath.Dir(filename)))
|
|
|
|
slog.Debug("extracting", "variant", variant, "file", filename)
|
|
|
|
g.Go(func() error {
|
|
srcf, err := libEmbed.Open(filename)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer srcf.Close()
|
|
|
|
src := io.Reader(srcf)
|
|
if strings.HasSuffix(filename, ".gz") {
|
|
src, err = gzip.NewReader(src)
|
|
if err != nil {
|
|
return fmt.Errorf("decompress payload %s: %v", filename, err)
|
|
}
|
|
filename = strings.TrimSuffix(filename, ".gz")
|
|
}
|
|
|
|
variantDir := filepath.Join(targetDir, variant)
|
|
if err := os.MkdirAll(variantDir, 0o755); err != nil {
|
|
return fmt.Errorf("extractFiles could not mkdir %s: %v", variantDir, err)
|
|
}
|
|
|
|
base := filepath.Base(filename)
|
|
destFilename := filepath.Join(variantDir, base)
|
|
|
|
_, err = os.Stat(destFilename)
|
|
switch {
|
|
case errors.Is(err, os.ErrNotExist):
|
|
destFile, err := os.OpenFile(destFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
|
|
if err != nil {
|
|
return fmt.Errorf("write payload %s: %v", filename, err)
|
|
}
|
|
defer destFile.Close()
|
|
if _, err := io.Copy(destFile, src); err != nil {
|
|
return fmt.Errorf("copy payload %s: %v", filename, err)
|
|
}
|
|
case err != nil:
|
|
return fmt.Errorf("stat payload %s: %v", filename, err)
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
err = g.Wait()
|
|
if err != nil {
|
|
// If we fail to extract, the payload dir is unusable, so cleanup whatever we extracted
|
|
gpu.Cleanup()
|
|
return err
|
|
}
|
|
return nil
|
|
}
|