package runners import ( "compress/gzip" "errors" "fmt" "io" "io/fs" "log/slog" "os" "path/filepath" "runtime" "slices" "strconv" "strings" "sync" "syscall" "golang.org/x/sync/errgroup" "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/gpu" ) const ( binGlob = "*/*/*/*" ) var ( lock sync.Mutex runnersDir = "" ) // Return the location where runners are stored // If runners are payloads, this will either extract them // or refresh them if any have disappeared due to tmp cleaners func Refresh(payloadFS fs.FS) (string, error) { lock.Lock() defer lock.Unlock() var err error // Wire up extra logging on our first load if runnersDir == "" { defer func() { var runners []string for v := range GetAvailableServers(runnersDir) { runners = append(runners, v) } slog.Info("Dynamic LLM libraries", "runners", runners) slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY") }() } if hasPayloads(payloadFS) { if runnersDir == "" { runnersDir, err = extractRunners(payloadFS) } else { err = refreshRunners(payloadFS, runnersDir) } } else if runnersDir == "" { runnersDir, err = locateRunners() } return runnersDir, err } func Cleanup(payloadFS fs.FS) { lock.Lock() defer lock.Unlock() if hasPayloads(payloadFS) && runnersDir != "" { // We want to fully clean up the tmpdir parent of the payloads dir tmpDir := filepath.Clean(filepath.Join(runnersDir, "..")) slog.Debug("cleaning up", "dir", tmpDir) err := os.RemoveAll(tmpDir) if err != nil { slog.Warn("failed to clean up", "dir", tmpDir, "err", err) } } } func locateRunners() (string, error) { exe, err := os.Executable() if err != nil { return "", err } cwd, err := os.Getwd() if err != nil { return "", err } var paths []string for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe()), cwd} { paths = append(paths, root, filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH), filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH), ) } // Try a few variations to improve developer experience when building from source in the local tree for _, path := range paths { candidate := filepath.Join(path, "lib", "ollama", "runners") if _, err := os.Stat(candidate); err == nil { return candidate, nil } } return "", fmt.Errorf("unable to locate runners in any search path %v", paths) } // Return true if we're carying nested payloads for the runners func hasPayloads(payloadFS fs.FS) bool { files, err := fs.Glob(payloadFS, binGlob) if err != nil || len(files) == 0 || (len(files) == 1 && strings.Contains(files[0], "placeholder")) { return false } return true } func extractRunners(payloadFS fs.FS) (string, error) { cleanupTmpDirs() tmpDir, err := os.MkdirTemp(envconfig.TmpDir(), "ollama") if err != nil { return "", fmt.Errorf("failed to generate tmp dir: %w", err) } // Track our pid so we can clean up orphaned tmpdirs n := filepath.Join(tmpDir, "ollama.pid") if err := os.WriteFile(n, []byte(strconv.Itoa(os.Getpid())), 0o644); err != nil { slog.Warn("failed to write pid file", "file", n, "error", err) } // We create a distinct subdirectory for payloads within the tmpdir // This will typically look like /tmp/ollama3208993108/runners on linux rDir := filepath.Join(tmpDir, "runners") slog.Info("extracting embedded files", "dir", rDir) return rDir, refreshRunners(payloadFS, rDir) } func refreshRunners(payloadFS fs.FS, rDir string) error { // extract or refresh server libraries err := extractFiles(payloadFS, rDir, binGlob) if err != nil { return fmt.Errorf("extract binaries: %v", err) } return nil } // extract extracts the embedded files to the target directory func extractFiles(payloadFS fs.FS, targetDir string, glob string) error { files, err := fs.Glob(payloadFS, glob) if err != nil || len(files) == 0 { // Should not happen return fmt.Errorf("extractFiles called without payload present") } if err := os.MkdirAll(targetDir, 0o755); err != nil { return fmt.Errorf("extractFiles could not mkdir %s: %v", targetDir, err) } g := new(errgroup.Group) // $OS/$GOARCH/$RUNNER/$FILE for _, file := range files { filename := file runner := filepath.Base(filepath.Dir(filename)) slog.Debug("extracting", "runner", runner, "payload", filename) g.Go(func() error { srcf, err := payloadFS.Open(filename) if err != nil { return err } defer srcf.Close() src := io.Reader(srcf) if strings.HasSuffix(filename, ".gz") { src, err = gzip.NewReader(src) if err != nil { return fmt.Errorf("decompress payload %s: %v", filename, err) } filename = strings.TrimSuffix(filename, ".gz") } runnerDir := filepath.Join(targetDir, runner) if err := os.MkdirAll(runnerDir, 0o755); err != nil { return fmt.Errorf("extractFiles could not mkdir %s: %v", runnerDir, err) } base := filepath.Base(filename) destFilename := filepath.Join(runnerDir, base) _, err = os.Stat(destFilename) switch { case errors.Is(err, os.ErrNotExist): destFile, err := os.OpenFile(destFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) if err != nil { return fmt.Errorf("write payload %s: %v", filename, err) } defer destFile.Close() if _, err := io.Copy(destFile, src); err != nil { return fmt.Errorf("copy payload %s: %v", filename, err) } case err != nil: return fmt.Errorf("stat payload %s: %v", filename, err) } return nil }) } err = g.Wait() if err != nil { slog.Error("failed to extract files", "error", err) // If we fail to extract, the payload dir is most likely unusable, so cleanup whatever we extracted err := os.RemoveAll(targetDir) if err != nil { slog.Warn("failed to cleanup incomplete payload dir", "dir", targetDir, "error", err) } return err } return nil } // Best effort to clean up prior tmpdirs func cleanupTmpDirs() { tmpDir := envconfig.TmpDir() if tmpDir == "" { tmpDir = os.TempDir() } matches, err := filepath.Glob(filepath.Join(tmpDir, "ollama*", "ollama.pid")) if err != nil { return } for _, match := range matches { raw, err := os.ReadFile(match) if errors.Is(err, os.ErrNotExist) { slog.Debug("not a ollama runtime directory, skipping", "path", match) continue } else if err != nil { slog.Warn("could not read ollama.pid, skipping", "path", match, "error", err) continue } pid, err := strconv.Atoi(string(raw)) if err != nil { slog.Warn("invalid pid, skipping", "path", match, "error", err) continue } p, err := os.FindProcess(pid) if err == nil && !errors.Is(p.Signal(syscall.Signal(0)), os.ErrProcessDone) { slog.Warn("process still running, skipping", "pid", pid, "path", match) continue } if err := os.Remove(match); err != nil { slog.Warn("could not cleanup stale pidfile", "path", match, "error", err) } runners := filepath.Join(filepath.Dir(match), "runners") if err := os.RemoveAll(runners); err != nil { slog.Warn("could not cleanup stale runners", "path", runners, "error", err) } if err := os.Remove(filepath.Dir(match)); err != nil { slog.Warn("could not cleanup stale tmpdir", "path", filepath.Dir(match), "error", err) } } } // directory names are the name of the runner and may contain an optional // variant prefixed with '_' as the separator. For example, "cuda_v11" and // "cuda_v12" or "cpu" and "cpu_avx2". Any library without a variant is the // lowest common denominator func GetAvailableServers(payloadsDir string) map[string]string { if payloadsDir == "" { slog.Error("empty runner dir") return nil } // glob payloadsDir for files that start with ollama_ pattern := filepath.Join(payloadsDir, "*", "ollama_*") files, err := filepath.Glob(pattern) if err != nil { slog.Debug("could not glob", "pattern", pattern, "error", err) return nil } servers := make(map[string]string) for _, file := range files { slog.Debug("availableServers : found", "file", file) servers[filepath.Base(filepath.Dir(file))] = filepath.Dir(file) } return servers } // serversForGpu returns a list of compatible servers give the provided GPU // info, ordered by performance. assumes Init() has been called // TODO - switch to metadata based mapping func ServersForGpu(info gpu.GpuInfo) []string { // glob workDir for files that start with ollama_ availableServers := GetAvailableServers(runnersDir) requested := info.Library if info.Variant != gpu.CPUCapabilityNone.String() { requested += "_" + info.Variant } servers := []string{} // exact match first for a := range availableServers { if a == requested { servers = []string{a} if a == "metal" { return servers } break } } alt := []string{} // Then for GPUs load alternates and sort the list for consistent load ordering if info.Library != "cpu" { for a := range availableServers { if info.Library == strings.Split(a, "_")[0] && a != requested { alt = append(alt, a) } } slices.Sort(alt) servers = append(servers, alt...) } if !(runtime.GOOS == "darwin" && runtime.GOARCH == "arm64") { // Load up the best CPU variant if not primary requested if info.Library != "cpu" { variant := gpu.GetCPUCapability() // If no variant, then we fall back to default // If we have a variant, try that if we find an exact match // Attempting to run the wrong CPU instructions will panic the // process if variant != gpu.CPUCapabilityNone { for cmp := range availableServers { if cmp == "cpu_"+variant.String() { servers = append(servers, cmp) break } } } else { servers = append(servers, "cpu") } } if len(servers) == 0 { servers = []string{"cpu"} } } return servers } // Return the optimal server for this CPU architecture func ServerForCpu() string { if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" { return "metal" } variant := gpu.GetCPUCapability() availableServers := GetAvailableServers(runnersDir) if variant != gpu.CPUCapabilityNone { for cmp := range availableServers { if cmp == "cpu_"+variant.String() { return cmp } } } return "cpu" }