From 4a5c9b80350d5f430451568177da16fbe7baefe1 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Fri, 8 Mar 2024 09:45:55 -0800 Subject: [PATCH] Finish unwinding idempotent payload logic The recent ROCm change partially removed idempotent payloads, but the ggml-metal.metal file for mac was still idempotent. This finishes switching to always extract the payloads, and now that idempotentcy is gone, the version directory is no longer useful. --- docs/linux.md | 5 +++++ gpu/amd_linux.go | 34 ++++++++++++------------------ gpu/amd_windows.go | 24 +++++++-------------- gpu/assets.go | 49 +++++++++++++++++++++++-------------------- llm/payload_common.go | 47 ++++++++++++++++++----------------------- server/routes.go | 1 + 6 files changed, 72 insertions(+), 88 deletions(-) diff --git a/docs/linux.md b/docs/linux.md index c7014ece..0ef4a30f 100644 --- a/docs/linux.md +++ b/docs/linux.md @@ -72,6 +72,11 @@ Verify that the drivers are installed by running the following command, which sh nvidia-smi ``` +### Install ROCm (optional - for Radeon GPUs) +[Download and Install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html) + +Make sure to install ROCm v6 + ### Start Ollama Start Ollama using `systemd`: diff --git a/gpu/amd_linux.go b/gpu/amd_linux.go index c775b71d..25f9128f 100644 --- a/gpu/amd_linux.go +++ b/gpu/amd_linux.go @@ -11,14 +11,11 @@ import ( "slices" "strconv" "strings" - - "github.com/jmorganca/ollama/version" ) // Discovery logic for AMD/ROCm GPUs const ( - curlMsg = "curl -fsSL https://github.com/ollama/ollama/releases/download/v%s/rocm-amd64-deps.tgz | tar -zxf - -C %s" DriverVersionFile = "/sys/module/amdgpu/version" AMDNodesSysfsDir = "/sys/class/kfd/kfd/topology/nodes/" GPUPropertiesFileGlob = AMDNodesSysfsDir + "*/properties" @@ -278,22 +275,22 @@ func setupLink(source, target string) error { func AMDValidateLibDir() (string, error) { // We rely on the rpath compiled into our library to find rocm // so we establish a symlink to wherever we find it on the system - // to $AssetsDir/rocm + // to /rocm + payloadsDir, err := PayloadsDir() + if err != nil { + return "", err + } // If we already have a rocm dependency wired, nothing more to do - assetsDir, err := AssetsDir() - if err != nil { - return "", fmt.Errorf("unable to lookup lib dir: %w", err) - } - // Versioned directory - rocmTargetDir := filepath.Join(assetsDir, "rocm") + rocmTargetDir := filepath.Join(payloadsDir, "rocm") if rocmLibUsable(rocmTargetDir) { return rocmTargetDir, nil } - // Parent dir (unversioned) - commonRocmDir := filepath.Join(filepath.Dir(assetsDir), "rocm") - if rocmLibUsable(commonRocmDir) { - return rocmTargetDir, setupLink(commonRocmDir, rocmTargetDir) + + // Well known ollama installer path + installedRocmDir := "/usr/share/ollama/lib/rocm" + if rocmLibUsable(installedRocmDir) { + return rocmTargetDir, setupLink(installedRocmDir, rocmTargetDir) } // Prefer explicit HIP env var @@ -322,14 +319,9 @@ func AMDValidateLibDir() (string, error) { if rocmLibUsable("/opt/rocm/lib") { return rocmTargetDir, setupLink("/opt/rocm/lib", rocmTargetDir) } - err = os.MkdirAll(rocmTargetDir, 0755) - if err != nil { - return "", fmt.Errorf("failed to create empty rocm dir %s %w", rocmTargetDir, err) - } - // If we still haven't found a usable rocm, the user will have to download it on their own - slog.Warn("amdgpu detected, but no compatible rocm library found. Either install rocm v6, or run the following") - slog.Warn(fmt.Sprintf(curlMsg, version.Version, rocmTargetDir)) + // If we still haven't found a usable rocm, the user will have to install it on their own + slog.Warn("amdgpu detected, but no compatible rocm library found. Either install rocm v6, or follow manual install instructions at https://github.com/ollama/ollama/blob/main/docs/linux.md#manual-install") return "", fmt.Errorf("no suitable rocm found, falling back to CPU") } diff --git a/gpu/amd_windows.go b/gpu/amd_windows.go index 5a965482..be1be567 100644 --- a/gpu/amd_windows.go +++ b/gpu/amd_windows.go @@ -140,7 +140,7 @@ func AMDValidateLibDir() (string, error) { // $LibDir/rocm, we instead rely on setting PATH to point // to the location of the ROCm library - // Installer payload location + // Installer payload location if we're running the installed binary exe, err := os.Executable() if err == nil { rocmTargetDir := filepath.Join(filepath.Dir(exe), "rocm") @@ -150,13 +150,12 @@ func AMDValidateLibDir() (string, error) { } } - // If we already have a rocm dependency wired, nothing more to do - libDir, err := AssetsDir() - if err != nil { - return "", fmt.Errorf("unable to lookup lib dir: %w", err) - } - rocmTargetDir := filepath.Join(libDir, "rocm") + // Installer payload (if we're running from some other location) + localAppData := os.Getenv("LOCALAPPDATA") + appDir := filepath.Join(localAppData, "Programs", "Ollama") + rocmTargetDir := filepath.Join(appDir, "rocm") if rocmLibUsable(rocmTargetDir) { + slog.Debug("detected ollama installed ROCm at " + rocmTargetDir) return rocmTargetDir, nil } @@ -175,16 +174,7 @@ func AMDValidateLibDir() (string, error) { return RocmStandardLocation, nil } - // Installer payload (if we're running from some other location) - localAppData := os.Getenv("LOCALAPPDATA") - appDir := filepath.Join(localAppData, "Programs", "Ollama") - rocmTargetDir = filepath.Join(appDir, "rocm") - if rocmLibUsable(rocmTargetDir) { - slog.Debug("detected ollama installed ROCm at " + rocmTargetDir) - return rocmTargetDir, nil - } - // Should not happen on windows since we include it in the installer, but stand-alone binary might hit this - slog.Warn("amdgpu detected, but no compatible rocm library found. Please install ROCm v6") + slog.Warn("amdgpu detected, but no compatible rocm library found. Please install ROCm") return "", fmt.Errorf("no suitable rocm found, falling back to CPU") } diff --git a/gpu/assets.go b/gpu/assets.go index 41d0046a..b904d9ac 100644 --- a/gpu/assets.go +++ b/gpu/assets.go @@ -7,34 +7,37 @@ import ( "path/filepath" "runtime" "strings" - - "github.com/jmorganca/ollama/version" + "sync" ) -func AssetsDir() (string, error) { - home, err := os.UserHomeDir() - if err != nil { - return "", err +var ( + lock sync.Mutex + payloadsDir = "" +) + +func PayloadsDir() (string, error) { + lock.Lock() + defer lock.Unlock() + if payloadsDir == "" { + tmpDir, err := os.MkdirTemp("", "ollama") + if err != nil { + return "", fmt.Errorf("failed to generate tmp dir: %w", err) + } + payloadsDir = tmpDir } - baseDir := filepath.Join(home, ".ollama", "assets") - libDirs, err := os.ReadDir(baseDir) - if err == nil { - for _, d := range libDirs { - if d.Name() == version.Version { - continue - } - // Special case the rocm dependencies, which are handled by the installer - if d.Name() == "rocm" { - continue - } - slog.Debug("stale lib detected, cleaning up " + d.Name()) - err = os.RemoveAll(filepath.Join(baseDir, d.Name())) - if err != nil { - slog.Warn(fmt.Sprintf("unable to clean up stale library %s: %s", filepath.Join(baseDir, d.Name()), err)) - } + return payloadsDir, nil +} + +func Cleanup() { + lock.Lock() + defer lock.Unlock() + if payloadsDir != "" { + slog.Debug("cleaning up payloads dir " + payloadsDir) + err := os.RemoveAll(payloadsDir) + if err != nil { + slog.Warn(fmt.Sprintf("failed to cleanup tmp dir: %s", err)) } } - return filepath.Join(baseDir, version.Version), nil } func UpdatePath(dir string) { diff --git a/llm/payload_common.go b/llm/payload_common.go index ff38b63f..500d0582 100644 --- a/llm/payload_common.go +++ b/llm/payload_common.go @@ -104,13 +104,14 @@ func rocmDynLibPresent() bool { } func nativeInit() error { - slog.Info("Extracting dynamic libraries...") - assetsDir, err := gpu.AssetsDir() + payloadsDir, err := gpu.PayloadsDir() if err != nil { return err } + slog.Info(fmt.Sprintf("Extracting dynamic libraries to %s ...", payloadsDir)) + if runtime.GOOS == "darwin" { - err := extractPayloadFiles(assetsDir, "llama.cpp/ggml-metal.metal") + err := extractPayloadFiles(payloadsDir, "llama.cpp/ggml-metal.metal") if err != nil { if err == payloadMissing { // TODO perhaps consider this a hard failure on arm macs? @@ -119,10 +120,10 @@ func nativeInit() error { } return err } - os.Setenv("GGML_METAL_PATH_RESOURCES", assetsDir) + os.Setenv("GGML_METAL_PATH_RESOURCES", payloadsDir) } - libs, err := extractDynamicLibs(assetsDir, "llama.cpp/build/*/*/*/lib/*") + libs, err := extractDynamicLibs(payloadsDir, "llama.cpp/build/*/*/*/lib/*") if err != nil { if err == payloadMissing { slog.Info(fmt.Sprintf("%s", payloadMissing)) @@ -153,7 +154,7 @@ func nativeInit() error { return nil } -func extractDynamicLibs(assetsDir, glob string) ([]string, error) { +func extractDynamicLibs(payloadsDir, glob string) ([]string, error) { files, err := fs.Glob(libEmbed, glob) if err != nil || len(files) == 0 { return nil, payloadMissing @@ -172,14 +173,14 @@ func extractDynamicLibs(assetsDir, glob string) ([]string, error) { g.Go(func() error { // llama.cpp/build/$OS/$GOARCH/$VARIANT/lib/$LIBRARY // Include the variant in the path to avoid conflicts between multiple server libs - targetDir := filepath.Join(assetsDir, pathComps[pathComponentCount-3]) + targetDir := filepath.Join(payloadsDir, pathComps[pathComponentCount-3]) srcFile, err := libEmbed.Open(file) if err != nil { return fmt.Errorf("read payload %s: %v", file, err) } defer srcFile.Close() if err := os.MkdirAll(targetDir, 0o755); err != nil { - return fmt.Errorf("create payload lib dir %s: %v", assetsDir, err) + return fmt.Errorf("create payload lib dir %s: %v", payloadsDir, err) } src := io.Reader(srcFile) filename := file @@ -210,7 +211,7 @@ func extractDynamicLibs(assetsDir, glob string) ([]string, error) { return libs, g.Wait() } -func extractPayloadFiles(assetsDir, glob string) error { +func extractPayloadFiles(payloadsDir, glob string) error { files, err := fs.Glob(libEmbed, glob) if err != nil || len(files) == 0 { return payloadMissing @@ -222,8 +223,8 @@ func extractPayloadFiles(assetsDir, glob string) error { return fmt.Errorf("read payload %s: %v", file, err) } defer srcFile.Close() - if err := os.MkdirAll(assetsDir, 0o755); err != nil { - return fmt.Errorf("create payload lib dir %s: %v", assetsDir, err) + if err := os.MkdirAll(payloadsDir, 0o755); err != nil { + return fmt.Errorf("create payload lib dir %s: %v", payloadsDir, err) } src := io.Reader(srcFile) filename := file @@ -235,22 +236,14 @@ func extractPayloadFiles(assetsDir, glob string) error { filename = strings.TrimSuffix(filename, ".gz") } - destFile := filepath.Join(assetsDir, filepath.Base(filename)) - _, err = os.Stat(destFile) - switch { - case errors.Is(err, os.ErrNotExist): - destFp, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) - if err != nil { - return fmt.Errorf("write payload %s: %v", file, err) - } - defer destFp.Close() - if _, err := io.Copy(destFp, src); err != nil { - return fmt.Errorf("copy payload %s: %v", file, err) - } - case err != nil: - return fmt.Errorf("stat payload %s: %v", file, err) - case err == nil: - slog.Debug("payload already exists: " + destFile) + destFile := filepath.Join(payloadsDir, filepath.Base(filename)) + destFp, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) + if err != nil { + return fmt.Errorf("write payload %s: %v", file, err) + } + defer destFp.Close() + if _, err := io.Copy(destFp, src); err != nil { + return fmt.Errorf("copy payload %s: %v", file, err) } } return nil diff --git a/server/routes.go b/server/routes.go index 4d70358c..d99c858c 100644 --- a/server/routes.go +++ b/server/routes.go @@ -1092,6 +1092,7 @@ func Serve(ln net.Listener) error { if loaded.runner != nil { loaded.runner.Close() } + gpu.Cleanup() os.Exit(0) }()