ollama/runners/common.go

package runners

import (
	"compress/gzip"
	"errors"
	"fmt"
	"io"
	"io/fs"
	"log/slog"
	"os"
	"path/filepath"
	"runtime"
	"slices"
	"strconv"
	"strings"
	"sync"
	"syscall"

	"golang.org/x/sync/errgroup"

	"github.com/ollama/ollama/envconfig"
	"github.com/ollama/ollama/gpu"
)

const (
	binGlob = "*/*/*/*"
)

var (
	lock       sync.Mutex
	runnersDir = ""
)

// Return the location where runners are stored
// If runners are payloads, this will either extract them
// or refresh them if any have disappeared due to tmp cleaners
func Refresh(payloadFS fs.FS) (string, error) {
	lock.Lock()
	defer lock.Unlock()
	var err error

	// Wire up extra logging on our first load
	if runnersDir == "" {
		defer func() {
			var runners []string
			for v := range GetAvailableServers(runnersDir) {
				runners = append(runners, v)
			}
			slog.Info("Dynamic LLM libraries", "runners", runners)
			slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
		}()
	}

	if hasPayloads(payloadFS) {
		if runnersDir == "" {
			runnersDir, err = extractRunners(payloadFS)
		} else {
			err = refreshRunners(payloadFS, runnersDir)
		}
	} else if runnersDir == "" {
		runnersDir, err = locateRunners()
	}

	return runnersDir, err
}

func Cleanup(payloadFS fs.FS) {
	lock.Lock()
	defer lock.Unlock()
	if hasPayloads(payloadFS) && runnersDir != "" {
		// We want to fully clean up the tmpdir parent of the payloads dir
		tmpDir := filepath.Clean(filepath.Join(runnersDir, ".."))
		slog.Debug("cleaning up", "dir", tmpDir)
		err := os.RemoveAll(tmpDir)
		if err != nil {
			slog.Warn("failed to clean up", "dir", tmpDir, "err", err)
		}
	}
}

func locateRunners() (string, error) {
	exe, err := os.Executable()
	if err != nil {
		return "", err
	}

	cwd, err := os.Getwd()
	if err != nil {
		return "", err
	}

	var paths []string
	for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe()), cwd} {
		paths = append(paths,
			root,
			filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH),
			filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH),
		)
	}

	// Try a few variations to improve developer experience when building from source in the local tree
	for _, path := range paths {
		candidate := filepath.Join(path, "lib", "ollama", "runners")
		if _, err := os.Stat(candidate); err == nil {
			return candidate, nil
		}
	}
	return "", fmt.Errorf("unable to locate runners in any search path %v", paths)
}

// Return true if we're carying nested payloads for the runners
func hasPayloads(payloadFS fs.FS) bool {
	files, err := fs.Glob(payloadFS, binGlob)
	if err != nil || len(files) == 0 || (len(files) == 1 && strings.Contains(files[0], "placeholder")) {
		return false
	}
	return true
}

func extractRunners(payloadFS fs.FS) (string, error) {
	cleanupTmpDirs()
	tmpDir, err := os.MkdirTemp(envconfig.TmpDir(), "ollama")
	if err != nil {
		return "", fmt.Errorf("failed to generate tmp dir: %w", err)
	}
	// Track our pid so we can clean up orphaned tmpdirs
	n := filepath.Join(tmpDir, "ollama.pid")
	if err := os.WriteFile(n, []byte(strconv.Itoa(os.Getpid())), 0o644); err != nil {
		slog.Warn("failed to write pid file", "file", n, "error", err)
	}
	// We create a distinct subdirectory for payloads within the tmpdir
	// This will typically look like /tmp/ollama3208993108/runners on linux
	rDir := filepath.Join(tmpDir, "runners")

	slog.Info("extracting embedded files", "dir", rDir)
	return rDir, refreshRunners(payloadFS, rDir)
}

func refreshRunners(payloadFS fs.FS, rDir string) error {
	// extract or refresh server libraries
	err := extractFiles(payloadFS, rDir, binGlob)
	if err != nil {
		return fmt.Errorf("extract binaries: %v", err)
	}
	return nil
}

// extract extracts the embedded files to the target directory
func extractFiles(payloadFS fs.FS, targetDir string, glob string) error {
	files, err := fs.Glob(payloadFS, glob)
	if err != nil || len(files) == 0 {
		// Should not happen
		return fmt.Errorf("extractFiles called without payload present")
	}

	if err := os.MkdirAll(targetDir, 0o755); err != nil {
		return fmt.Errorf("extractFiles could not mkdir %s: %v", targetDir, err)
	}

	g := new(errgroup.Group)

	// $OS/$GOARCH/$RUNNER/$FILE
	for _, file := range files {
		filename := file

		runner := filepath.Base(filepath.Dir(filename))

		slog.Debug("extracting", "runner", runner, "payload", filename)

		g.Go(func() error {
			srcf, err := payloadFS.Open(filename)
			if err != nil {
				return err
			}
			defer srcf.Close()

			src := io.Reader(srcf)
			if strings.HasSuffix(filename, ".gz") {
				src, err = gzip.NewReader(src)
				if err != nil {
					return fmt.Errorf("decompress payload %s: %v", filename, err)
				}
				filename = strings.TrimSuffix(filename, ".gz")
			}

			runnerDir := filepath.Join(targetDir, runner)
			if err := os.MkdirAll(runnerDir, 0o755); err != nil {
				return fmt.Errorf("extractFiles could not mkdir %s: %v", runnerDir, err)
			}

			base := filepath.Base(filename)
			destFilename := filepath.Join(runnerDir, base)

			_, err = os.Stat(destFilename)
			switch {
			case errors.Is(err, os.ErrNotExist):
				destFile, err := os.OpenFile(destFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
				if err != nil {
					return fmt.Errorf("write payload %s: %v", filename, err)
				}
				defer destFile.Close()
				if _, err := io.Copy(destFile, src); err != nil {
					return fmt.Errorf("copy payload %s: %v", filename, err)
				}
			case err != nil:
				return fmt.Errorf("stat payload %s: %v", filename, err)
			}
			return nil
		})
	}

	err = g.Wait()
	if err != nil {
		slog.Error("failed to extract files", "error", err)
		// If we fail to extract, the payload dir is most likely unusable, so cleanup whatever we extracted
		err := os.RemoveAll(targetDir)
		if err != nil {
			slog.Warn("failed to cleanup incomplete payload dir", "dir", targetDir, "error", err)
		}
		return err
	}
	return nil
}

// Best effort to clean up prior tmpdirs
func cleanupTmpDirs() {
	tmpDir := envconfig.TmpDir()
	if tmpDir == "" {
		tmpDir = os.TempDir()
	}
	matches, err := filepath.Glob(filepath.Join(tmpDir, "ollama*", "ollama.pid"))
	if err != nil {
		return
	}

	for _, match := range matches {
		raw, err := os.ReadFile(match)
		if errors.Is(err, os.ErrNotExist) {
			slog.Debug("not a ollama runtime directory, skipping", "path", match)
			continue
		} else if err != nil {
			slog.Warn("could not read ollama.pid, skipping", "path", match, "error", err)
			continue
		}

		pid, err := strconv.Atoi(string(raw))
		if err != nil {
			slog.Warn("invalid pid, skipping", "path", match, "error", err)
			continue
		}

		p, err := os.FindProcess(pid)
		if err == nil && !errors.Is(p.Signal(syscall.Signal(0)), os.ErrProcessDone) {
			slog.Warn("process still running, skipping", "pid", pid, "path", match)
			continue
		}

		if err := os.Remove(match); err != nil {
			slog.Warn("could not cleanup stale pidfile", "path", match, "error", err)
		}

		runners := filepath.Join(filepath.Dir(match), "runners")
		if err := os.RemoveAll(runners); err != nil {
			slog.Warn("could not cleanup stale runners", "path", runners, "error", err)
		}

		if err := os.Remove(filepath.Dir(match)); err != nil {
			slog.Warn("could not cleanup stale tmpdir", "path", filepath.Dir(match), "error", err)
		}
	}
}

// directory names are the name of the runner and may contain an optional
// variant prefixed with '_' as the separator. For example, "cuda_v11" and
// "cuda_v12" or "cpu" and "cpu_avx2". Any library without a variant is the
// lowest common denominator
func GetAvailableServers(payloadsDir string) map[string]string {
	if payloadsDir == "" {
		slog.Error("empty runner dir")
		return nil
	}

	// glob payloadsDir for files that start with ollama_
	pattern := filepath.Join(payloadsDir, "*", "ollama_*")

	files, err := filepath.Glob(pattern)
	if err != nil {
		slog.Debug("could not glob", "pattern", pattern, "error", err)
		return nil
	}

	servers := make(map[string]string)
	for _, file := range files {
		slog.Debug("availableServers : found", "file", file)
		servers[filepath.Base(filepath.Dir(file))] = filepath.Dir(file)
	}

	return servers
}

// serversForGpu returns a list of compatible servers give the provided GPU
// info, ordered by performance. assumes Init() has been called
// TODO - switch to metadata based mapping
func ServersForGpu(info gpu.GpuInfo) []string {
	// glob workDir for files that start with ollama_
	availableServers := GetAvailableServers(runnersDir)
	requested := info.Library
	if info.Variant != gpu.CPUCapabilityNone.String() {
		requested += "_" + info.Variant
	}

	servers := []string{}

	// exact match first
	for a := range availableServers {
		if a == requested {
			servers = []string{a}

			if a == "metal" {
				return servers
			}

			break
		}
	}

	alt := []string{}

	// Then for GPUs load alternates and sort the list for consistent load ordering
	if info.Library != "cpu" {
		for a := range availableServers {
			if info.Library == strings.Split(a, "_")[0] && a != requested {
				alt = append(alt, a)
			}
		}

		slices.Sort(alt)
		servers = append(servers, alt...)
	}

	if !(runtime.GOOS == "darwin" && runtime.GOARCH == "arm64") {
		// Load up the best CPU variant if not primary requested
		if info.Library != "cpu" {
			variant := gpu.GetCPUCapability()
			// If no variant, then we fall back to default
			// If we have a variant, try that if we find an exact match
			// Attempting to run the wrong CPU instructions will panic the
			// process
			if variant != gpu.CPUCapabilityNone {
				for cmp := range availableServers {
					if cmp == "cpu_"+variant.String() {
						servers = append(servers, cmp)
						break
					}
				}
			} else {
				servers = append(servers, "cpu")
			}
		}

		if len(servers) == 0 {
			servers = []string{"cpu"}
		}
	}

	return servers
}

// Return the optimal server for this CPU architecture
func ServerForCpu() string {
	if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
		return "metal"
	}
	variant := gpu.GetCPUCapability()
	availableServers := GetAvailableServers(runnersDir)
	if variant != gpu.CPUCapabilityNone {
		for cmp := range availableServers {
			if cmp == "cpu_"+variant.String() {
				return cmp
			}
		}
	}
	return "cpu"
}