d7c94e0ca6
* Better support for AMD multi-GPU This resolves a number of problems related to AMD multi-GPU setups on linux. The numeric IDs used by rocm are not the same as the numeric IDs exposed in sysfs although the ordering is consistent. We have to count up from the first valid gfx (major/minor/patch with non-zero values) we find starting at zero. There are 3 different env vars for selecting GPUs, and only ROCR_VISIBLE_DEVICES supports UUID based identification, so we should favor that one, and try to use UUIDs if detected to avoid potential ordering bugs with numeric IDs * ROCR_VISIBLE_DEVICES only works on linux Use the numeric ID only HIP_VISIBLE_DEVICES on windows
88 lines
2.1 KiB
Go
88 lines
2.1 KiB
Go
//go:build linux || windows
|
|
|
|
package discover
|
|
|
|
import (
|
|
"errors"
|
|
"log/slog"
|
|
"os"
|
|
"path/filepath"
|
|
"runtime"
|
|
"strings"
|
|
|
|
"github.com/ollama/ollama/envconfig"
|
|
)
|
|
|
|
// Determine if the given ROCm lib directory is usable by checking for existence of some glob patterns
|
|
func rocmLibUsable(libDir string) bool {
|
|
slog.Debug("evaluating potential rocm lib dir " + libDir)
|
|
for _, g := range ROCmLibGlobs {
|
|
res, _ := filepath.Glob(filepath.Join(libDir, g))
|
|
if len(res) == 0 {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func GetSupportedGFX(libDir string) ([]string, error) {
|
|
var ret []string
|
|
files, err := filepath.Glob(filepath.Join(libDir, "rocblas", "library", "TensileLibrary_lazy_gfx*.dat"))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
for _, file := range files {
|
|
ret = append(ret, strings.TrimSuffix(strings.TrimPrefix(filepath.Base(file), "TensileLibrary_lazy_"), ".dat"))
|
|
}
|
|
return ret, nil
|
|
}
|
|
|
|
func commonAMDValidateLibDir() (string, error) {
|
|
// Favor our bundled version
|
|
|
|
// Installer payload location if we're running the installed binary
|
|
exe, err := os.Executable()
|
|
if err == nil {
|
|
rocmTargetDir := filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama")
|
|
if rocmLibUsable(rocmTargetDir) {
|
|
slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir)
|
|
return rocmTargetDir, nil
|
|
}
|
|
}
|
|
|
|
// Prefer explicit HIP env var
|
|
hipPath := os.Getenv("HIP_PATH")
|
|
if hipPath != "" {
|
|
hipLibDir := filepath.Join(hipPath, "bin")
|
|
if rocmLibUsable(hipLibDir) {
|
|
slog.Debug("detected ROCM via HIP_PATH=" + hipPath)
|
|
return hipLibDir, nil
|
|
}
|
|
}
|
|
|
|
// Scan the LD_LIBRARY_PATH or PATH
|
|
pathEnv := "LD_LIBRARY_PATH"
|
|
if runtime.GOOS == "windows" {
|
|
pathEnv = "PATH"
|
|
}
|
|
|
|
paths := os.Getenv(pathEnv)
|
|
for _, path := range filepath.SplitList(paths) {
|
|
d, err := filepath.Abs(path)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
if rocmLibUsable(d) {
|
|
return d, nil
|
|
}
|
|
}
|
|
|
|
// Well known location(s)
|
|
for _, path := range RocmStandardLocations {
|
|
if rocmLibUsable(path) {
|
|
return path, nil
|
|
}
|
|
}
|
|
|
|
return "", errors.New("no suitable rocm found, falling back to CPU")
|
|
}
|