diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 66e7180d..28cdf09b 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -11,12 +11,12 @@ jobs:
generate:
strategy:
matrix:
- os: [ubuntu-latest, macos-latest, windows-latest]
+ os: [ubuntu-latest, macos-latest, windows-2019]
arch: [amd64, arm64]
exclude:
- os: ubuntu-latest
arch: arm64
- - os: windows-latest
+ - os: windows-2019
arch: arm64
runs-on: ${{ matrix.os }}
env:
@@ -28,7 +28,18 @@ jobs:
go-version: '1.22'
cache: true
- run: go get ./...
+ - run: |
+ $gopath=(get-command go).source | split-path -parent
+ & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
+ cd $env:GITHUB_WORKSPACE
+ $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
+ $env:PATH="$gopath;$env:PATH"
+ go generate -x ./...
+ if: ${{ startsWith(matrix.os, 'windows-') }}
+ name: "Windows Go Generate"
- run: go generate -x ./...
+ if: ${{ ! startsWith(matrix.os, 'windows-') }}
+ name: "Unix Go Generate"
- uses: actions/upload-artifact@v4
with:
name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
@@ -66,7 +77,6 @@ jobs:
strategy:
matrix:
rocm-version:
- - '5.7.1'
- '6.0'
runs-on: linux
container: rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}
@@ -95,12 +105,12 @@ jobs:
lint:
strategy:
matrix:
- os: [ubuntu-latest, macos-latest, windows-latest]
+ os: [ubuntu-latest, macos-latest, windows-2019]
arch: [amd64, arm64]
exclude:
- os: ubuntu-latest
arch: arm64
- - os: windows-latest
+ - os: windows-2019
arch: arm64
- os: macos-latest
arch: amd64
@@ -134,12 +144,12 @@ jobs:
needs: generate
strategy:
matrix:
- os: [ubuntu-latest, macos-latest, windows-latest]
+ os: [ubuntu-latest, macos-latest, windows-2019]
arch: [amd64]
exclude:
- os: ubuntu-latest
arch: arm64
- - os: windows-latest
+ - os: windows-2019
arch: arm64
runs-on: ${{ matrix.os }}
env:
diff --git a/Dockerfile b/Dockerfile
index f996bba0..741b6b08 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,7 @@
ARG GOLANG_VERSION=1.22.1
ARG CMAKE_VERSION=3.22.1
ARG CUDA_VERSION=11.3.1
+ARG ROCM_VERSION=6.0
# Copy the minimal context we need to run the generate scripts
FROM scratch AS llm-code
@@ -28,7 +29,7 @@ WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
ARG CGO_CFLAGS
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
-FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete AS rocm-5-build-amd64
+FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
ARG CMAKE_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
@@ -39,18 +40,14 @@ WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
ARG CGO_CFLAGS
ARG AMDGPU_TARGETS
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
+RUN mkdir /tmp/scratch && \
+ for dep in $(cat /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/x86_64/rocm*/lib/deps.txt) ; do \
+ cp ${dep} /tmp/scratch/ || exit 1 ; \
+ done && \
+ (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd /tmp/scratch/ && tar xf - ) && \
+ mkdir -p /go/src/github.com/jmorganca/ollama/dist/deps/ && \
+ (cd /tmp/scratch/ && tar czvf /go/src/github.com/jmorganca/ollama/dist/deps/rocm-amd64-deps.tgz . )
-FROM --platform=linux/amd64 rocm/dev-centos-7:6.0-complete AS rocm-6-build-amd64
-ARG CMAKE_VERSION
-COPY ./scripts/rh_linux_deps.sh /
-RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
-ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
-ENV LIBRARY_PATH /opt/amdgpu/lib64
-COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
-WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
-ARG CGO_CFLAGS
-ARG AMDGPU_TARGETS
-RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64
ARG CMAKE_VERSION
@@ -91,8 +88,8 @@ COPY . .
COPY --from=cpu_avx-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
COPY --from=cpu_avx2-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
-COPY --from=rocm-5-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
-COPY --from=rocm-6-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
+COPY --from=rocm-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
+COPY --from=rocm-build-amd64 /go/src/github.com/jmorganca/ollama/dist/deps/ ./dist/deps/
ARG GOFLAGS
ARG CGO_CFLAGS
RUN go build .
@@ -117,7 +114,7 @@ RUN apt-get update && apt-get install -y ca-certificates
COPY --from=build-arm64 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
# Radeon images are much larger so we keep it distinct from the CPU/CUDA image
-FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete as runtime-rocm
+FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm
RUN update-pciids
COPY --from=build-amd64 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
EXPOSE 11434
diff --git a/app/ollama.iss b/app/ollama.iss
index 473a85b3..df61ac4c 100644
--- a/app/ollama.iss
+++ b/app/ollama.iss
@@ -91,6 +91,14 @@ Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
Source: "..\dist\windeps\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64bit
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
+; Assumes v5.7, may need adjustments for v6
+#if GetEnv("HIP_PATH") != ""
+ Source: "{#GetEnv('HIP_PATH')}\bin\hipblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion
+ Source: "{#GetEnv('HIP_PATH')}\bin\rocblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion
+ ; amdhip64.dll dependency comes from the driver and must be installed already
+ Source: "{#GetEnv('HIP_PATH')}\bin\rocblas\library\*"; DestDir: "{app}\rocm\rocblas\library\"; Flags: ignoreversion
+#endif
+
[Icons]
Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
diff --git a/docs/development.md b/docs/development.md
index 33110e01..993aed9e 100644
--- a/docs/development.md
+++ b/docs/development.md
@@ -116,7 +116,7 @@ Note: The windows build for Ollama is still under development.
Install required tools:
-- MSVC toolchain - C/C++ and cmake as minimal requirements
+- MSVC toolchain - C/C++ and cmake as minimal requirements - You must build from a "Developer Shell" with the environment variables set
- go version 1.22 or higher
- MinGW (pick one variant) with GCC.
-
@@ -132,6 +132,6 @@ go build .
#### Windows CUDA (NVIDIA)
-In addition to the common Windows development tools described above, install:
+In addition to the common Windows development tools described above, install CUDA **AFTER** you install MSVC.
- [NVIDIA CUDA](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html)
diff --git a/docs/linux.md b/docs/linux.md
index 29110b05..c7014ece 100644
--- a/docs/linux.md
+++ b/docs/linux.md
@@ -10,6 +10,14 @@ Install Ollama running this one-liner:
curl -fsSL https://ollama.com/install.sh | sh
```
+## AMD Radeon GPU support
+
+While AMD has contributed the `amdgpu` driver upstream to the official linux
+kernel source, the version is older and may not support all ROCm features. We
+recommend you install the latest driver from
+https://www.amd.com/en/support/linux-drivers for best support of your Radeon
+GPU.
+
## Manual install
### Download the `ollama` binary
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
index a5fb301f..1f2a6b1f 100644
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -67,6 +67,43 @@ You can see what features your CPU has with the following.
cat /proc/cpuinfo| grep flags | head -1
```
+## AMD Radeon GPU Support
+
+Ollama leverages the AMD ROCm library, which does not support all AMD GPUs. In
+some cases you can force the system to try to use a close GPU type. For example
+The Radeon RX 5400 is `gfx1034` (also known as 10.3.4) however, ROCm does not
+support this patch-level, the closest support is `gfx1030`. You can use the
+environment variable `HSA_OVERRIDE_GFX_VERSION` with `x.y.z` syntax. So for
+example, to force the system to run on the RX 5400, you would set
+`HSA_OVERRIDE_GFX_VERSION="10.3.0"` as an environment variable for the server.
+
+At this time, the known supported GPU types are the following: (This may change from
+release to release)
+- gfx900
+- gfx906
+- gfx908
+- gfx90a
+- gfx940
+- gfx941
+- gfx942
+- gfx1030
+- gfx1100
+- gfx1101
+- gfx1102
+
+This will not work for all unsupported GPUs. Reach out on [Discord](https://discord.gg/ollama)
+or file an [issue](https://github.com/ollama/ollama/issues) for additional help.
+
+
+## Installing older versions on Linux
+
+If you run into problems on Linux and want to install an older version you can tell the install script
+which version to install.
+
+```sh
+curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION="0.1.27" sh
+```
+
## Known issues
* N/A
\ No newline at end of file
diff --git a/docs/windows.md b/docs/windows.md
index 875a3dc0..49d579c9 100644
--- a/docs/windows.md
+++ b/docs/windows.md
@@ -4,7 +4,7 @@ Welcome to the Ollama Windows preview.
No more WSL required!
-Ollama now runs as a native Windows application, including NVIDIA GPU support.
+Ollama now runs as a native Windows application, including NVIDIA and AMD Radeon GPU support.
After installing Ollama Windows Preview, Ollama will run in the background and
the `ollama` command line is available in `cmd`, `powershell` or your favorite
terminal application. As usual the Ollama [api](./api.md) will be served on
@@ -21,6 +21,7 @@ Logs will often be helpful in dianosing the problem (see
* Windows 10 or newer, Home or Pro
* NVIDIA 452.39 or newer Drivers if you have an NVIDIA card
+* AMD Radeon Driver https://www.amd.com/en/support if you have a Radeon card
## API Access
diff --git a/gpu/amd.go b/gpu/amd.go
deleted file mode 100644
index c21b7741..00000000
--- a/gpu/amd.go
+++ /dev/null
@@ -1,101 +0,0 @@
-package gpu
-
-import (
- "bufio"
- "errors"
- "fmt"
- "io"
- "log/slog"
- "os"
- "path/filepath"
- "strconv"
- "strings"
-)
-
-// TODO - windows vs. non-windows vs darwin
-
-// Discovery logic for AMD/ROCm GPUs
-
-const (
- DriverVersionFile = "/sys/module/amdgpu/version"
- GPUPropertiesFileGlob = "/sys/class/kfd/kfd/topology/nodes/*/properties"
- // TODO probably break these down per GPU to make the logic simpler
- GPUTotalMemoryFileGlob = "/sys/class/kfd/kfd/topology/nodes/*/mem_banks/*/properties" // size_in_bytes line
- GPUUsedMemoryFileGlob = "/sys/class/kfd/kfd/topology/nodes/*/mem_banks/*/used_memory"
-)
-
-func AMDDetected() bool {
- // Some driver versions (older?) don't have a version file, so just lookup the parent dir
- sysfsDir := filepath.Dir(DriverVersionFile)
- _, err := os.Stat(sysfsDir)
- if errors.Is(err, os.ErrNotExist) {
- slog.Debug("amd driver not detected " + sysfsDir)
- return false
- } else if err != nil {
- slog.Debug(fmt.Sprintf("error looking up amd driver %s %s", sysfsDir, err))
- return false
- }
- return true
-}
-
-func AMDDriverVersion() (string, error) {
- _, err := os.Stat(DriverVersionFile)
- if err != nil {
- return "", fmt.Errorf("amdgpu file stat error: %s %w", DriverVersionFile, err)
- }
- fp, err := os.Open(DriverVersionFile)
- if err != nil {
- return "", err
- }
- defer fp.Close()
- verString, err := io.ReadAll(fp)
- if err != nil {
- return "", err
- }
- return strings.TrimSpace(string(verString)), nil
-}
-
-func AMDGFXVersions() []Version {
- res := []Version{}
- matches, _ := filepath.Glob(GPUPropertiesFileGlob)
- for _, match := range matches {
- fp, err := os.Open(match)
- if err != nil {
- slog.Debug(fmt.Sprintf("failed to open sysfs node file %s: %s", match, err))
- continue
- }
- defer fp.Close()
-
- scanner := bufio.NewScanner(fp)
- // optionally, resize scanner's capacity for lines over 64K, see next example
- for scanner.Scan() {
- line := strings.TrimSpace(scanner.Text())
- if strings.HasPrefix(line, "gfx_target_version") {
- ver := strings.Fields(line)
- if len(ver) != 2 || len(ver[1]) < 5 {
- slog.Debug("malformed " + line)
- continue
- }
- l := len(ver[1])
- patch, err1 := strconv.ParseUint(ver[1][l-2:l], 10, 32)
- minor, err2 := strconv.ParseUint(ver[1][l-4:l-2], 10, 32)
- major, err3 := strconv.ParseUint(ver[1][:l-4], 10, 32)
- if err1 != nil || err2 != nil || err3 != nil {
- slog.Debug("malformed int " + line)
- continue
- }
-
- res = append(res, Version{
- Major: uint(major),
- Minor: uint(minor),
- Patch: uint(patch),
- })
- }
- }
- }
- return res
-}
-
-func (v Version) ToGFXString() string {
- return fmt.Sprintf("gfx%d%d%d", v.Major, v.Minor, v.Patch)
-}
diff --git a/gpu/amd_common.go b/gpu/amd_common.go
new file mode 100644
index 00000000..deb931ff
--- /dev/null
+++ b/gpu/amd_common.go
@@ -0,0 +1,58 @@
+//go:build linux || windows
+
+package gpu
+
+import (
+ "fmt"
+ "log/slog"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+)
+
+// Determine if the given ROCm lib directory is usable by checking for existence of some glob patterns
+func rocmLibUsable(libDir string) bool {
+ slog.Debug("evaluating potential rocm lib dir " + libDir)
+ for _, g := range ROCmLibGlobs {
+ res, _ := filepath.Glob(filepath.Join(libDir, g))
+ if len(res) == 0 {
+ return false
+ }
+ }
+ return true
+}
+
+func GetSupportedGFX(libDir string) ([]string, error) {
+ var ret []string
+ files, err := filepath.Glob(filepath.Join(libDir, "rocblas", "library", "TensileLibrary_lazy_gfx*.dat"))
+ if err != nil {
+ return nil, err
+ }
+ for _, file := range files {
+ ret = append(ret, strings.TrimSuffix(strings.TrimPrefix(filepath.Base(file), "TensileLibrary_lazy_"), ".dat"))
+ }
+ return ret, nil
+}
+
+func amdSetVisibleDevices(ids []int, skip map[int]interface{}) {
+ // Set the visible devices if not already set
+ // TODO - does sort order matter?
+ devices := []string{}
+ for i := range ids {
+ slog.Debug(fmt.Sprintf("i=%d", i))
+ if _, skipped := skip[i]; skipped {
+ slog.Debug("skipped")
+ continue
+ }
+ devices = append(devices, strconv.Itoa(i))
+ }
+ slog.Debug(fmt.Sprintf("devices=%v", devices))
+
+ val := strings.Join(devices, ",")
+ err := os.Setenv("HIP_VISIBLE_DEVICES", val)
+ if err != nil {
+ slog.Warn(fmt.Sprintf("failed to set env: %s", err))
+ }
+ slog.Debug("HIP_VISIBLE_DEVICES=" + val)
+}
diff --git a/gpu/amd_hip_windows.go b/gpu/amd_hip_windows.go
new file mode 100644
index 00000000..14a6c7d6
--- /dev/null
+++ b/gpu/amd_hip_windows.go
@@ -0,0 +1,141 @@
+package gpu
+
+import (
+ "fmt"
+ "log/slog"
+ "strconv"
+ "syscall"
+ "unsafe"
+
+ "golang.org/x/sys/windows"
+)
+
+const (
+ hipSuccess = 0
+ hipErrorNoDevice = 100
+)
+
+type hipDevicePropMinimal struct {
+ Name [256]byte
+ unused1 [140]byte
+ GcnArchName [256]byte // gfx####
+ iGPU int // Doesn't seem to actually report correctly
+ unused2 [128]byte
+}
+
+// Wrap the amdhip64.dll library for GPU discovery
+type HipLib struct {
+ dll windows.Handle
+ hipGetDeviceCount uintptr
+ hipGetDeviceProperties uintptr
+ hipMemGetInfo uintptr
+ hipSetDevice uintptr
+ hipDriverGetVersion uintptr
+}
+
+func NewHipLib() (*HipLib, error) {
+ h, err := windows.LoadLibrary("amdhip64.dll")
+ if err != nil {
+ return nil, fmt.Errorf("unable to load amdhip64.dll: %w", err)
+ }
+ hl := &HipLib{}
+ hl.dll = h
+ hl.hipGetDeviceCount, err = windows.GetProcAddress(hl.dll, "hipGetDeviceCount")
+ if err != nil {
+ return nil, err
+ }
+ hl.hipGetDeviceProperties, err = windows.GetProcAddress(hl.dll, "hipGetDeviceProperties")
+ if err != nil {
+ return nil, err
+ }
+ hl.hipMemGetInfo, err = windows.GetProcAddress(hl.dll, "hipMemGetInfo")
+ if err != nil {
+ return nil, err
+ }
+ hl.hipSetDevice, err = windows.GetProcAddress(hl.dll, "hipSetDevice")
+ if err != nil {
+ return nil, err
+ }
+ hl.hipDriverGetVersion, err = windows.GetProcAddress(hl.dll, "hipDriverGetVersion")
+ if err != nil {
+ return nil, err
+ }
+ return hl, nil
+}
+
+// The hip library only evaluates the HIP_VISIBLE_DEVICES variable at startup
+// so we have to unload/reset the library after we do our initial discovery
+// to make sure our updates to that variable are processed by llama.cpp
+func (hl *HipLib) Release() {
+ err := windows.FreeLibrary(hl.dll)
+ if err != nil {
+ slog.Warn(fmt.Sprintf("failed to unload amdhip64.dll: %s", err))
+ }
+ hl.dll = 0
+}
+
+func (hl *HipLib) AMDDriverVersion() (string, error) {
+ if hl.dll == 0 {
+ return "", fmt.Errorf("dll has been unloaded")
+ }
+ var version int
+ status, _, err := syscall.SyscallN(hl.hipDriverGetVersion, uintptr(unsafe.Pointer(&version)))
+ if status != hipSuccess {
+ return "", fmt.Errorf("failed call to hipDriverGetVersion: %d %s", status, err)
+ }
+ return strconv.Itoa(version), nil
+}
+
+func (hl *HipLib) HipGetDeviceCount() int {
+ if hl.dll == 0 {
+ slog.Error("dll has been unloaded")
+ return 0
+ }
+ var count int
+ status, _, err := syscall.SyscallN(hl.hipGetDeviceCount, uintptr(unsafe.Pointer(&count)))
+ if status == hipErrorNoDevice {
+ slog.Info("AMD ROCm reports no devices found")
+ return 0
+ }
+ if status != hipSuccess {
+ slog.Warn(fmt.Sprintf("failed call to hipGetDeviceCount: %d %s", status, err))
+ }
+ return count
+}
+
+func (hl *HipLib) HipSetDevice(device int) error {
+ if hl.dll == 0 {
+ return fmt.Errorf("dll has been unloaded")
+ }
+ status, _, err := syscall.SyscallN(hl.hipSetDevice, uintptr(device))
+ if status != hipSuccess {
+ return fmt.Errorf("failed call to hipSetDevice: %d %s", status, err)
+ }
+ return nil
+}
+
+func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, error) {
+ if hl.dll == 0 {
+ return nil, fmt.Errorf("dll has been unloaded")
+ }
+ var props hipDevicePropMinimal
+ status, _, err := syscall.SyscallN(hl.hipGetDeviceProperties, uintptr(unsafe.Pointer(&props)), uintptr(device))
+ if status != hipSuccess {
+ return nil, fmt.Errorf("failed call to hipGetDeviceProperties: %d %s", status, err)
+ }
+ return &props, nil
+}
+
+// free, total, err
+func (hl *HipLib) HipMemGetInfo() (uint64, uint64, error) {
+ if hl.dll == 0 {
+ return 0, 0, fmt.Errorf("dll has been unloaded")
+ }
+ var totalMemory uint64
+ var freeMemory uint64
+ status, _, err := syscall.SyscallN(hl.hipMemGetInfo, uintptr(unsafe.Pointer(&freeMemory)), uintptr(unsafe.Pointer(&totalMemory)))
+ if status != hipSuccess {
+ return 0, 0, fmt.Errorf("failed call to hipMemGetInfo: %d %s", status, err)
+ }
+ return freeMemory, totalMemory, nil
+}
diff --git a/gpu/amd_linux.go b/gpu/amd_linux.go
new file mode 100644
index 00000000..c775b71d
--- /dev/null
+++ b/gpu/amd_linux.go
@@ -0,0 +1,411 @@
+package gpu
+
+import (
+ "bufio"
+ "errors"
+ "fmt"
+ "io"
+ "log/slog"
+ "os"
+ "path/filepath"
+ "slices"
+ "strconv"
+ "strings"
+
+ "github.com/jmorganca/ollama/version"
+)
+
+// Discovery logic for AMD/ROCm GPUs
+
+const (
+ curlMsg = "curl -fsSL https://github.com/ollama/ollama/releases/download/v%s/rocm-amd64-deps.tgz | tar -zxf - -C %s"
+ DriverVersionFile = "/sys/module/amdgpu/version"
+ AMDNodesSysfsDir = "/sys/class/kfd/kfd/topology/nodes/"
+ GPUPropertiesFileGlob = AMDNodesSysfsDir + "*/properties"
+
+ // Prefix with the node dir
+ GPUTotalMemoryFileGlob = "mem_banks/*/properties" // size_in_bytes line
+ GPUUsedMemoryFileGlob = "mem_banks/*/used_memory"
+ RocmStandardLocation = "/opt/rocm/lib"
+)
+
+var (
+ // Used to validate if the given ROCm lib is usable
+ ROCmLibGlobs = []string{"libhipblas.so.2*", "rocblas"} // TODO - probably include more coverage of files here...
+)
+
+// Gather GPU information from the amdgpu driver if any supported GPUs are detected
+// HIP_VISIBLE_DEVICES will be set if we detect a mix of unsupported and supported devices
+// and the user hasn't already set this variable
+func AMDGetGPUInfo(resp *GpuInfo) {
+ // TODO - DRY this out with windows
+ if !AMDDetected() {
+ return
+ }
+ skip := map[int]interface{}{}
+
+ // Opportunistic logging of driver version to aid in troubleshooting
+ ver, err := AMDDriverVersion()
+ if err == nil {
+ slog.Info("AMD Driver: " + ver)
+ } else {
+ // TODO - if we see users crash and burn with the upstreamed kernel this can be adjusted to hard-fail rocm support and fallback to CPU
+ slog.Warn(fmt.Sprintf("ollama recommends running the https://www.amd.com/en/support/linux-drivers: %s", err))
+ }
+
+ // If the user has specified exactly which GPUs to use, look up their memory
+ visibleDevices := os.Getenv("HIP_VISIBLE_DEVICES")
+ if visibleDevices != "" {
+ ids := []int{}
+ for _, idStr := range strings.Split(visibleDevices, ",") {
+ id, err := strconv.Atoi(idStr)
+ if err != nil {
+ slog.Warn(fmt.Sprintf("malformed HIP_VISIBLE_DEVICES=%s %s", visibleDevices, err))
+ } else {
+ ids = append(ids, id)
+ }
+ }
+ amdProcMemLookup(resp, nil, ids)
+ return
+ }
+
+ // Gather GFX version information from all detected cards
+ gfx := AMDGFXVersions()
+ verStrings := []string{}
+ for i, v := range gfx {
+ verStrings = append(verStrings, v.ToGFXString())
+ if v.Major == 0 {
+ // Silently skip CPUs
+ skip[i] = struct{}{}
+ continue
+ }
+ if v.Major < 9 {
+ // TODO consider this a build-time setting if we can support 8xx family GPUs
+ slog.Warn(fmt.Sprintf("amdgpu [%d] too old %s", i, v.ToGFXString()))
+ skip[i] = struct{}{}
+ }
+ }
+ slog.Info(fmt.Sprintf("detected amdgpu versions %v", verStrings))
+
+ // Abort if all GPUs are skipped
+ if len(skip) >= len(gfx) {
+ slog.Info("all detected amdgpus are skipped, falling back to CPU")
+ return
+ }
+
+ // If we got this far, then we have at least 1 GPU that's a ROCm candidate, so make sure we have a lib
+ libDir, err := AMDValidateLibDir()
+ if err != nil {
+ slog.Warn(fmt.Sprintf("unable to verify rocm library, will use cpu: %s", err))
+ return
+ }
+
+ gfxOverride := os.Getenv("HSA_OVERRIDE_GFX_VERSION")
+ if gfxOverride == "" {
+ supported, err := GetSupportedGFX(libDir)
+ if err != nil {
+ slog.Warn(fmt.Sprintf("failed to lookup supported GFX types, falling back to CPU mode: %s", err))
+ return
+ }
+ slog.Debug(fmt.Sprintf("rocm supported GPU types %v", supported))
+
+ for i, v := range gfx {
+ if !slices.Contains[[]string, string](supported, v.ToGFXString()) {
+ slog.Warn(fmt.Sprintf("amdgpu [%d] %s is not supported by %s %v", i, v.ToGFXString(), libDir, supported))
+ // TODO - consider discrete markdown just for ROCM troubleshooting?
+ slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for HSA_OVERRIDE_GFX_VERSION usage")
+ skip[i] = struct{}{}
+ } else {
+ slog.Info(fmt.Sprintf("amdgpu [%d] %s is supported", i, v.ToGFXString()))
+ }
+ }
+ } else {
+ slog.Debug("skipping rocm gfx compatibility check with HSA_OVERRIDE_GFX_VERSION=" + gfxOverride)
+ }
+
+ if len(skip) >= len(gfx) {
+ slog.Info("all detected amdgpus are skipped, falling back to CPU")
+ return
+ }
+
+ ids := make([]int, len(gfx))
+ i := 0
+ for k := range gfx {
+ ids[i] = k
+ i++
+ }
+ amdProcMemLookup(resp, skip, ids)
+ if resp.memInfo.DeviceCount == 0 {
+ return
+ }
+ if len(skip) > 0 {
+ amdSetVisibleDevices(ids, skip)
+ }
+}
+
+// Walk the sysfs nodes for the available GPUs and gather information from them
+// skipping over any devices in the skip map
+func amdProcMemLookup(resp *GpuInfo, skip map[int]interface{}, ids []int) {
+ resp.memInfo.DeviceCount = 0
+ resp.memInfo.TotalMemory = 0
+ resp.memInfo.FreeMemory = 0
+ if len(ids) == 0 {
+ slog.Debug("discovering all amdgpu devices")
+ entries, err := os.ReadDir(AMDNodesSysfsDir)
+ if err != nil {
+ slog.Warn(fmt.Sprintf("failed to read amdgpu sysfs %s - %s", AMDNodesSysfsDir, err))
+ return
+ }
+ for _, node := range entries {
+ if !node.IsDir() {
+ continue
+ }
+ id, err := strconv.Atoi(node.Name())
+ if err != nil {
+ slog.Warn("malformed amdgpu sysfs node id " + node.Name())
+ continue
+ }
+ ids = append(ids, id)
+ }
+ }
+ slog.Debug(fmt.Sprintf("discovering amdgpu devices %v", ids))
+
+ for _, id := range ids {
+ if _, skipped := skip[id]; skipped {
+ continue
+ }
+ totalMemory := uint64(0)
+ usedMemory := uint64(0)
+ propGlob := filepath.Join(AMDNodesSysfsDir, strconv.Itoa(id), GPUTotalMemoryFileGlob)
+ propFiles, err := filepath.Glob(propGlob)
+ if err != nil {
+ slog.Warn(fmt.Sprintf("error looking up total GPU memory: %s %s", propGlob, err))
+ }
+ // 1 or more memory banks - sum the values of all of them
+ for _, propFile := range propFiles {
+ fp, err := os.Open(propFile)
+ if err != nil {
+ slog.Warn(fmt.Sprintf("failed to open sysfs node file %s: %s", propFile, err))
+ continue
+ }
+ defer fp.Close()
+ scanner := bufio.NewScanner(fp)
+ for scanner.Scan() {
+ line := strings.TrimSpace(scanner.Text())
+ if strings.HasPrefix(line, "size_in_bytes") {
+ ver := strings.Fields(line)
+ if len(ver) != 2 {
+ slog.Warn("malformed " + line)
+ continue
+ }
+ bankSizeInBytes, err := strconv.ParseUint(ver[1], 10, 64)
+ if err != nil {
+ slog.Warn("malformed int " + line)
+ continue
+ }
+ totalMemory += bankSizeInBytes
+ }
+ }
+ }
+ if totalMemory == 0 {
+ continue
+ }
+ usedGlob := filepath.Join(AMDNodesSysfsDir, strconv.Itoa(id), GPUUsedMemoryFileGlob)
+ usedFiles, err := filepath.Glob(usedGlob)
+ if err != nil {
+ slog.Warn(fmt.Sprintf("error looking up used GPU memory: %s %s", usedGlob, err))
+ continue
+ }
+ for _, usedFile := range usedFiles {
+ fp, err := os.Open(usedFile)
+ if err != nil {
+ slog.Warn(fmt.Sprintf("failed to open sysfs node file %s: %s", usedFile, err))
+ continue
+ }
+ defer fp.Close()
+ data, err := io.ReadAll(fp)
+ if err != nil {
+ slog.Warn(fmt.Sprintf("failed to read sysfs node file %s: %s", usedFile, err))
+ continue
+ }
+ used, err := strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
+ if err != nil {
+ slog.Warn(fmt.Sprintf("malformed used memory %s: %s", string(data), err))
+ continue
+ }
+ usedMemory += used
+ }
+ slog.Info(fmt.Sprintf("[%d] amdgpu totalMemory %d", id, totalMemory))
+ slog.Info(fmt.Sprintf("[%d] amdgpu freeMemory %d", id, (totalMemory - usedMemory)))
+ resp.memInfo.DeviceCount++
+ resp.memInfo.TotalMemory += totalMemory
+ resp.memInfo.FreeMemory += (totalMemory - usedMemory)
+ }
+ if resp.memInfo.DeviceCount > 0 {
+ resp.Library = "rocm"
+ }
+}
+
+// Quick check for AMD driver so we can skip amdgpu discovery if not present
+func AMDDetected() bool {
+ // Some driver versions (older?) don't have a version file, so just lookup the parent dir
+ sysfsDir := filepath.Dir(DriverVersionFile)
+ _, err := os.Stat(sysfsDir)
+ if errors.Is(err, os.ErrNotExist) {
+ slog.Debug("amdgpu driver not detected " + sysfsDir)
+ return false
+ } else if err != nil {
+ slog.Debug(fmt.Sprintf("error looking up amd driver %s %s", sysfsDir, err))
+ return false
+ }
+ return true
+}
+
+func setupLink(source, target string) error {
+ if err := os.RemoveAll(target); err != nil {
+ return fmt.Errorf("failed to remove old rocm directory %s %w", target, err)
+ }
+ if err := os.Symlink(source, target); err != nil {
+ return fmt.Errorf("failed to create link %s => %s %w", source, target, err)
+ }
+ slog.Debug(fmt.Sprintf("host rocm linked %s => %s", source, target))
+ return nil
+}
+
+// Ensure the AMD rocm lib dir is wired up
+// Prefer to use host installed ROCm, as long as it meets our minimum requirements
+// failing that, tell the user how to download it on their own
+func AMDValidateLibDir() (string, error) {
+ // We rely on the rpath compiled into our library to find rocm
+ // so we establish a symlink to wherever we find it on the system
+ // to $AssetsDir/rocm
+
+ // If we already have a rocm dependency wired, nothing more to do
+ assetsDir, err := AssetsDir()
+ if err != nil {
+ return "", fmt.Errorf("unable to lookup lib dir: %w", err)
+ }
+ // Versioned directory
+ rocmTargetDir := filepath.Join(assetsDir, "rocm")
+ if rocmLibUsable(rocmTargetDir) {
+ return rocmTargetDir, nil
+ }
+ // Parent dir (unversioned)
+ commonRocmDir := filepath.Join(filepath.Dir(assetsDir), "rocm")
+ if rocmLibUsable(commonRocmDir) {
+ return rocmTargetDir, setupLink(commonRocmDir, rocmTargetDir)
+ }
+
+ // Prefer explicit HIP env var
+ hipPath := os.Getenv("HIP_PATH")
+ if hipPath != "" {
+ hipLibDir := filepath.Join(hipPath, "lib")
+ if rocmLibUsable(hipLibDir) {
+ slog.Debug("detected ROCM via HIP_PATH=" + hipPath)
+ return rocmTargetDir, setupLink(hipLibDir, rocmTargetDir)
+ }
+ }
+
+ // Scan the library path for potential matches
+ ldPaths := strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
+ for _, ldPath := range ldPaths {
+ d, err := filepath.Abs(ldPath)
+ if err != nil {
+ continue
+ }
+ if rocmLibUsable(d) {
+ return rocmTargetDir, setupLink(d, rocmTargetDir)
+ }
+ }
+
+ // Well known location(s)
+ if rocmLibUsable("/opt/rocm/lib") {
+ return rocmTargetDir, setupLink("/opt/rocm/lib", rocmTargetDir)
+ }
+ err = os.MkdirAll(rocmTargetDir, 0755)
+ if err != nil {
+ return "", fmt.Errorf("failed to create empty rocm dir %s %w", rocmTargetDir, err)
+ }
+
+ // If we still haven't found a usable rocm, the user will have to download it on their own
+ slog.Warn("amdgpu detected, but no compatible rocm library found. Either install rocm v6, or run the following")
+ slog.Warn(fmt.Sprintf(curlMsg, version.Version, rocmTargetDir))
+ return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
+}
+
+func AMDDriverVersion() (string, error) {
+ _, err := os.Stat(DriverVersionFile)
+ if err != nil {
+ return "", fmt.Errorf("amdgpu version file missing: %s %w", DriverVersionFile, err)
+ }
+ fp, err := os.Open(DriverVersionFile)
+ if err != nil {
+ return "", err
+ }
+ defer fp.Close()
+ verString, err := io.ReadAll(fp)
+ if err != nil {
+ return "", err
+ }
+ return strings.TrimSpace(string(verString)), nil
+}
+
+func AMDGFXVersions() map[int]Version {
+ res := map[int]Version{}
+ matches, _ := filepath.Glob(GPUPropertiesFileGlob)
+ for _, match := range matches {
+ fp, err := os.Open(match)
+ if err != nil {
+ slog.Debug(fmt.Sprintf("failed to open sysfs node file %s: %s", match, err))
+ continue
+ }
+ defer fp.Close()
+ i, err := strconv.Atoi(filepath.Base(filepath.Dir(match)))
+ if err != nil {
+ slog.Debug(fmt.Sprintf("failed to parse node ID %s", err))
+ continue
+ }
+
+ scanner := bufio.NewScanner(fp)
+ for scanner.Scan() {
+ line := strings.TrimSpace(scanner.Text())
+ if strings.HasPrefix(line, "gfx_target_version") {
+ ver := strings.Fields(line)
+ if len(ver) != 2 || len(ver[1]) < 5 {
+
+ if ver[1] == "0" {
+ // Silently skip the CPU
+ continue
+ } else {
+ slog.Debug("malformed " + line)
+ }
+ res[i] = Version{
+ Major: 0,
+ Minor: 0,
+ Patch: 0,
+ }
+ continue
+ }
+ l := len(ver[1])
+ patch, err1 := strconv.ParseUint(ver[1][l-2:l], 10, 32)
+ minor, err2 := strconv.ParseUint(ver[1][l-4:l-2], 10, 32)
+ major, err3 := strconv.ParseUint(ver[1][:l-4], 10, 32)
+ if err1 != nil || err2 != nil || err3 != nil {
+ slog.Debug("malformed int " + line)
+ continue
+ }
+
+ res[i] = Version{
+ Major: uint(major),
+ Minor: uint(minor),
+ Patch: uint(patch),
+ }
+ }
+ }
+ }
+ return res
+}
+
+func (v Version) ToGFXString() string {
+ return fmt.Sprintf("gfx%d%d%d", v.Major, v.Minor, v.Patch)
+}
diff --git a/gpu/amd_windows.go b/gpu/amd_windows.go
new file mode 100644
index 00000000..5a965482
--- /dev/null
+++ b/gpu/amd_windows.go
@@ -0,0 +1,190 @@
+package gpu
+
+import (
+ "bytes"
+ "fmt"
+ "log/slog"
+ "os"
+ "path/filepath"
+ "slices"
+ "strings"
+)
+
+const (
+ RocmStandardLocation = "C:\\Program Files\\AMD\\ROCm\\5.7\\bin" // TODO glob?
+
+ // TODO We're lookinng for this exact name to detect iGPUs since hipGetDeviceProperties never reports integrated==true
+ iGPUName = "AMD Radeon(TM) Graphics"
+)
+
+var (
+ // Used to validate if the given ROCm lib is usable
+ ROCmLibGlobs = []string{"hipblas.dll", "rocblas"} // TODO - probably include more coverage of files here...
+)
+
+func AMDGetGPUInfo(resp *GpuInfo) {
+ hl, err := NewHipLib()
+ if err != nil {
+ slog.Debug(err.Error())
+ return
+ }
+ defer hl.Release()
+ skip := map[int]interface{}{}
+ ids := []int{}
+ resp.memInfo.DeviceCount = 0
+ resp.memInfo.TotalMemory = 0
+ resp.memInfo.FreeMemory = 0
+
+ ver, err := hl.AMDDriverVersion()
+ if err == nil {
+ slog.Info("AMD Driver: " + ver)
+ } else {
+ // For now this is benign, but we may eventually need to fail compatibility checks
+ slog.Debug(fmt.Sprintf("error looking up amd driver version: %s", err))
+ }
+
+ // Note: the HIP library automatically handles HIP_VISIBLE_DEVICES
+ count := hl.HipGetDeviceCount()
+ if count == 0 {
+ return
+ }
+ libDir, err := AMDValidateLibDir()
+ if err != nil {
+ slog.Warn(fmt.Sprintf("unable to verify rocm library, will use cpu: %s", err))
+ return
+ }
+
+ var supported []string
+ gfxOverride := os.Getenv("HSA_OVERRIDE_GFX_VERSION")
+ if gfxOverride == "" {
+ supported, err = GetSupportedGFX(libDir)
+ if err != nil {
+ slog.Warn(fmt.Sprintf("failed to lookup supported GFX types, falling back to CPU mode: %s", err))
+ return
+ }
+ } else {
+ slog.Debug("skipping rocm gfx compatibility check with HSA_OVERRIDE_GFX_VERSION=" + gfxOverride)
+ }
+
+ slog.Info(fmt.Sprintf("detected %d hip devices", count))
+ for i := 0; i < count; i++ {
+ ids = append(ids, i)
+ err = hl.HipSetDevice(i)
+ if err != nil {
+ slog.Warn(fmt.Sprintf("[%d] %s", i, err))
+ skip[i] = struct{}{}
+ continue
+ }
+
+ props, err := hl.HipGetDeviceProperties(i)
+ if err != nil {
+ slog.Warn(fmt.Sprintf("[%d] %s", i, err))
+ skip[i] = struct{}{}
+ continue
+ }
+ n := bytes.IndexByte(props.Name[:], 0)
+ name := string(props.Name[:n])
+ slog.Info(fmt.Sprintf("[%d] Name: %s", i, name))
+ n = bytes.IndexByte(props.GcnArchName[:], 0)
+ gfx := string(props.GcnArchName[:n])
+ slog.Info(fmt.Sprintf("[%d] GcnArchName: %s", i, gfx))
+ //slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0
+ // TODO Why isn't props.iGPU accurate!?
+ if strings.EqualFold(name, iGPUName) {
+ slog.Info(fmt.Sprintf("iGPU detected [%d] skipping", i))
+ skip[i] = struct{}{}
+ continue
+ }
+ if gfxOverride == "" {
+ if !slices.Contains[[]string, string](supported, gfx) {
+ slog.Warn(fmt.Sprintf("amdgpu [%d] %s is not supported by %s %v", i, gfx, libDir, supported))
+ // TODO - consider discrete markdown just for ROCM troubleshooting?
+ slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for HSA_OVERRIDE_GFX_VERSION usage")
+ skip[i] = struct{}{}
+ continue
+ } else {
+ slog.Info(fmt.Sprintf("amdgpu [%d] %s is supported", i, gfx))
+ }
+ }
+
+ totalMemory, freeMemory, err := hl.HipMemGetInfo()
+ if err != nil {
+ slog.Warn(fmt.Sprintf("[%d] %s", i, err))
+ continue
+ }
+
+ // TODO according to docs, freeMem may lie on windows!
+ slog.Info(fmt.Sprintf("[%d] Total Mem: %d", i, totalMemory))
+ slog.Info(fmt.Sprintf("[%d] Free Mem: %d", i, freeMemory))
+ resp.memInfo.DeviceCount++
+ resp.memInfo.TotalMemory += totalMemory
+ resp.memInfo.FreeMemory += freeMemory
+ }
+ if resp.memInfo.DeviceCount > 0 {
+ resp.Library = "rocm"
+ }
+ // Abort if all GPUs are skipped
+ if len(skip) >= count {
+ slog.Info("all detected amdgpus are skipped, falling back to CPU")
+ return
+ }
+ if len(skip) > 0 {
+ amdSetVisibleDevices(ids, skip)
+ }
+ UpdatePath(libDir)
+}
+
+func AMDValidateLibDir() (string, error) {
+ // On windows non-admins typically can't create links
+ // so instead of trying to rely on rpath and a link in
+ // $LibDir/rocm, we instead rely on setting PATH to point
+ // to the location of the ROCm library
+
+ // Installer payload location
+ exe, err := os.Executable()
+ if err == nil {
+ rocmTargetDir := filepath.Join(filepath.Dir(exe), "rocm")
+ if rocmLibUsable(rocmTargetDir) {
+ slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir)
+ return rocmTargetDir, nil
+ }
+ }
+
+ // If we already have a rocm dependency wired, nothing more to do
+ libDir, err := AssetsDir()
+ if err != nil {
+ return "", fmt.Errorf("unable to lookup lib dir: %w", err)
+ }
+ rocmTargetDir := filepath.Join(libDir, "rocm")
+ if rocmLibUsable(rocmTargetDir) {
+ return rocmTargetDir, nil
+ }
+
+ // Prefer explicit HIP env var
+ hipPath := os.Getenv("HIP_PATH")
+ if hipPath != "" {
+ hipLibDir := filepath.Join(hipPath, "bin")
+ if rocmLibUsable(hipLibDir) {
+ slog.Debug("detected ROCM via HIP_PATH=" + hipPath)
+ return hipLibDir, nil
+ }
+ }
+
+ // Well known location(s)
+ if rocmLibUsable(RocmStandardLocation) {
+ return RocmStandardLocation, nil
+ }
+
+ // Installer payload (if we're running from some other location)
+ localAppData := os.Getenv("LOCALAPPDATA")
+ appDir := filepath.Join(localAppData, "Programs", "Ollama")
+ rocmTargetDir = filepath.Join(appDir, "rocm")
+ if rocmLibUsable(rocmTargetDir) {
+ slog.Debug("detected ollama installed ROCm at " + rocmTargetDir)
+ return rocmTargetDir, nil
+ }
+
+ // Should not happen on windows since we include it in the installer, but stand-alone binary might hit this
+ slog.Warn("amdgpu detected, but no compatible rocm library found. Please install ROCm v6")
+ return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
+}
diff --git a/gpu/assets.go b/gpu/assets.go
new file mode 100644
index 00000000..41d0046a
--- /dev/null
+++ b/gpu/assets.go
@@ -0,0 +1,60 @@
+package gpu
+
+import (
+ "fmt"
+ "log/slog"
+ "os"
+ "path/filepath"
+ "runtime"
+ "strings"
+
+ "github.com/jmorganca/ollama/version"
+)
+
+func AssetsDir() (string, error) {
+ home, err := os.UserHomeDir()
+ if err != nil {
+ return "", err
+ }
+ baseDir := filepath.Join(home, ".ollama", "assets")
+ libDirs, err := os.ReadDir(baseDir)
+ if err == nil {
+ for _, d := range libDirs {
+ if d.Name() == version.Version {
+ continue
+ }
+ // Special case the rocm dependencies, which are handled by the installer
+ if d.Name() == "rocm" {
+ continue
+ }
+ slog.Debug("stale lib detected, cleaning up " + d.Name())
+ err = os.RemoveAll(filepath.Join(baseDir, d.Name()))
+ if err != nil {
+ slog.Warn(fmt.Sprintf("unable to clean up stale library %s: %s", filepath.Join(baseDir, d.Name()), err))
+ }
+ }
+ }
+ return filepath.Join(baseDir, version.Version), nil
+}
+
+func UpdatePath(dir string) {
+ if runtime.GOOS == "windows" {
+ tmpDir := filepath.Dir(dir)
+ pathComponents := strings.Split(os.Getenv("PATH"), ";")
+ i := 0
+ for _, comp := range pathComponents {
+ if strings.EqualFold(comp, dir) {
+ return
+ }
+ // Remove any other prior paths to our temp dir
+ if !strings.HasPrefix(strings.ToLower(comp), strings.ToLower(tmpDir)) {
+ pathComponents[i] = comp
+ i++
+ }
+ }
+ newPath := strings.Join(append([]string{dir}, pathComponents...), ";")
+ slog.Info(fmt.Sprintf("Updating PATH to %s", newPath))
+ os.Setenv("PATH", newPath)
+ }
+ // linux and darwin rely on rpath
+}
diff --git a/gpu/gpu.go b/gpu/gpu.go
index 8c7f1297..e0c18e26 100644
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -24,7 +24,6 @@ import (
type handles struct {
cuda *C.cuda_handle_t
- rocm *C.rocm_handle_t
}
var gpuMutex sync.Mutex
@@ -54,39 +53,23 @@ var CudaWindowsGlobs = []string{
"c:\\Windows\\System32\\nvml.dll",
}
-var RocmLinuxGlobs = []string{
- "/opt/rocm*/lib*/librocm_smi64.so*",
-}
-
-var RocmWindowsGlobs = []string{
- "c:\\Windows\\System32\\rocm_smi64.dll",
-}
-
// Note: gpuMutex must already be held
func initGPUHandles() {
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
- gpuHandles = &handles{nil, nil}
+ gpuHandles = &handles{nil}
var cudaMgmtName string
var cudaMgmtPatterns []string
- var rocmMgmtName string
- var rocmMgmtPatterns []string
switch runtime.GOOS {
case "windows":
cudaMgmtName = "nvml.dll"
cudaMgmtPatterns = make([]string, len(CudaWindowsGlobs))
copy(cudaMgmtPatterns, CudaWindowsGlobs)
- rocmMgmtName = "rocm_smi64.dll"
- rocmMgmtPatterns = make([]string, len(RocmWindowsGlobs))
- copy(rocmMgmtPatterns, RocmWindowsGlobs)
case "linux":
cudaMgmtName = "libnvidia-ml.so"
cudaMgmtPatterns = make([]string, len(CudaLinuxGlobs))
copy(cudaMgmtPatterns, CudaLinuxGlobs)
- rocmMgmtName = "librocm_smi64.so"
- rocmMgmtPatterns = make([]string, len(RocmLinuxGlobs))
- copy(rocmMgmtPatterns, RocmLinuxGlobs)
default:
return
}
@@ -101,16 +84,6 @@ func initGPUHandles() {
return
}
}
-
- rocmLibPaths := FindGPULibs(rocmMgmtName, rocmMgmtPatterns)
- if len(rocmLibPaths) > 0 {
- rocm := LoadROCMMgmt(rocmLibPaths)
- if rocm != nil {
- slog.Info("Radeon GPU detected")
- gpuHandles.rocm = rocm
- return
- }
- }
}
func GetGPUInfo() GpuInfo {
@@ -149,66 +122,10 @@ func GetGPUInfo() GpuInfo {
slog.Info(fmt.Sprintf("CUDA GPU is too old. Falling back to CPU mode. Compute Capability detected: %d.%d", cc.major, cc.minor))
}
}
- } else if AMDDetected() && gpuHandles.rocm != nil && (cpuVariant != "" || runtime.GOARCH != "amd64") {
- ver, err := AMDDriverVersion()
- if err == nil {
- slog.Info("AMD Driver: " + ver)
- } else {
- // For now this is benign, but we may eventually need to fail compatibility checks
- slog.Debug("error looking up amd driver version: %s", err)
- }
- gfx := AMDGFXVersions()
- tooOld := false
- for _, v := range gfx {
- if v.Major < 9 {
- slog.Info("AMD GPU too old, falling back to CPU " + v.ToGFXString())
- tooOld = true
- break
- }
-
- // TODO - remap gfx strings for unsupporetd minor/patch versions to supported for the same major
- // e.g. gfx1034 works if we map it to gfx1030 at runtime
-
- }
- if !tooOld {
- // TODO - this algo can be shifted over to use sysfs instead of the rocm info library...
- C.rocm_check_vram(*gpuHandles.rocm, &memInfo)
- if memInfo.err != nil {
- slog.Info(fmt.Sprintf("error looking up ROCm GPU memory: %s", C.GoString(memInfo.err)))
- C.free(unsafe.Pointer(memInfo.err))
- } else if memInfo.igpu_index >= 0 && memInfo.count == 1 {
- // Only one GPU detected and it appears to be an integrated GPU - skip it
- slog.Info("ROCm unsupported integrated GPU detected")
- } else if memInfo.count > 0 {
- if memInfo.igpu_index >= 0 {
- // We have multiple GPUs reported, and one of them is an integrated GPU
- // so we have to set the env var to bypass it
- // If the user has specified their own ROCR_VISIBLE_DEVICES, don't clobber it
- val := os.Getenv("ROCR_VISIBLE_DEVICES")
- if val == "" {
- devices := []string{}
- for i := 0; i < int(memInfo.count); i++ {
- if i == int(memInfo.igpu_index) {
- continue
- }
- devices = append(devices, strconv.Itoa(i))
- }
- val = strings.Join(devices, ",")
- os.Setenv("ROCR_VISIBLE_DEVICES", val)
- }
- slog.Info(fmt.Sprintf("ROCm integrated GPU detected - ROCR_VISIBLE_DEVICES=%s", val))
- }
- resp.Library = "rocm"
- var version C.rocm_version_resp_t
- C.rocm_get_version(*gpuHandles.rocm, &version)
- verString := C.GoString(version.str)
- if version.status == 0 {
- resp.Variant = "v" + verString
- } else {
- slog.Info(fmt.Sprintf("failed to look up ROCm version: %s", verString))
- }
- C.free(unsafe.Pointer(version.str))
- }
+ } else {
+ AMDGetGPUInfo(&resp)
+ if resp.Library != "" {
+ return resp
}
}
if resp.Library == "" {
@@ -338,23 +255,6 @@ func LoadCUDAMgmt(cudaLibPaths []string) *C.cuda_handle_t {
return nil
}
-func LoadROCMMgmt(rocmLibPaths []string) *C.rocm_handle_t {
- var resp C.rocm_init_resp_t
- resp.rh.verbose = getVerboseState()
- for _, libPath := range rocmLibPaths {
- lib := C.CString(libPath)
- defer C.free(unsafe.Pointer(lib))
- C.rocm_init(lib, &resp)
- if resp.err != nil {
- slog.Info(fmt.Sprintf("Unable to load ROCm management library %s: %s", libPath, C.GoString(resp.err)))
- C.free(unsafe.Pointer(resp.err))
- } else {
- return &resp.rh
- }
- }
- return nil
-}
-
func getVerboseState() C.uint16_t {
if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
return C.uint16_t(1)
diff --git a/gpu/gpu_info.h b/gpu/gpu_info.h
index e52d2066..8186a3f0 100644
--- a/gpu/gpu_info.h
+++ b/gpu/gpu_info.h
@@ -53,7 +53,6 @@ void cpu_check_ram(mem_info_t *resp);
#endif
#include "gpu_info_cuda.h"
-#include "gpu_info_rocm.h"
#endif // __GPU_INFO_H__
#endif // __APPLE__
\ No newline at end of file
diff --git a/gpu/gpu_info_cuda.c b/gpu/gpu_info_cuda.c
index a64b4587..509bf5c6 100644
--- a/gpu/gpu_info_cuda.c
+++ b/gpu/gpu_info_cuda.c
@@ -124,31 +124,31 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
// When in verbose mode, report more information about
// the card we discover, but don't fail on error
ret = (*h.nvmlDeviceGetName)(device, buf, buflen);
- if (ret != RSMI_STATUS_SUCCESS) {
+ if (ret != NVML_SUCCESS) {
LOG(h.verbose, "nvmlDeviceGetName failed: %d\n", ret);
} else {
LOG(h.verbose, "[%d] CUDA device name: %s\n", i, buf);
}
ret = (*h.nvmlDeviceGetBoardPartNumber)(device, buf, buflen);
- if (ret != RSMI_STATUS_SUCCESS) {
+ if (ret != NVML_SUCCESS) {
LOG(h.verbose, "nvmlDeviceGetBoardPartNumber failed: %d\n", ret);
} else {
LOG(h.verbose, "[%d] CUDA part number: %s\n", i, buf);
}
ret = (*h.nvmlDeviceGetSerial)(device, buf, buflen);
- if (ret != RSMI_STATUS_SUCCESS) {
+ if (ret != NVML_SUCCESS) {
LOG(h.verbose, "nvmlDeviceGetSerial failed: %d\n", ret);
} else {
LOG(h.verbose, "[%d] CUDA S/N: %s\n", i, buf);
}
ret = (*h.nvmlDeviceGetVbiosVersion)(device, buf, buflen);
- if (ret != RSMI_STATUS_SUCCESS) {
+ if (ret != NVML_SUCCESS) {
LOG(h.verbose, "nvmlDeviceGetVbiosVersion failed: %d\n", ret);
} else {
LOG(h.verbose, "[%d] CUDA vbios version: %s\n", i, buf);
}
ret = (*h.nvmlDeviceGetBrand)(device, &brand);
- if (ret != RSMI_STATUS_SUCCESS) {
+ if (ret != NVML_SUCCESS) {
LOG(h.verbose, "nvmlDeviceGetBrand failed: %d\n", ret);
} else {
LOG(h.verbose, "[%d] CUDA brand: %d\n", i, brand);
diff --git a/gpu/gpu_info_rocm.c b/gpu/gpu_info_rocm.c
deleted file mode 100644
index 7ac88611..00000000
--- a/gpu/gpu_info_rocm.c
+++ /dev/null
@@ -1,198 +0,0 @@
-#ifndef __APPLE__
-
-#include "gpu_info_rocm.h"
-
-#include
-
-void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp) {
- rsmi_status_t ret;
- resp->err = NULL;
- const int buflen = 256;
- char buf[buflen + 1];
- int i;
- struct lookup {
- char *s;
- void **p;
- } l[] = {
- {"rsmi_init", (void *)&resp->rh.rsmi_init},
- {"rsmi_shut_down", (void *)&resp->rh.rsmi_shut_down},
- {"rsmi_dev_memory_total_get", (void *)&resp->rh.rsmi_dev_memory_total_get},
- {"rsmi_dev_memory_usage_get", (void *)&resp->rh.rsmi_dev_memory_usage_get},
- {"rsmi_version_get", (void *)&resp->rh.rsmi_version_get},
- {"rsmi_num_monitor_devices", (void*)&resp->rh.rsmi_num_monitor_devices},
- {"rsmi_dev_id_get", (void*)&resp->rh.rsmi_dev_id_get},
- {"rsmi_dev_name_get", (void *)&resp->rh.rsmi_dev_name_get},
- {"rsmi_dev_brand_get", (void *)&resp->rh.rsmi_dev_brand_get},
- {"rsmi_dev_vendor_name_get", (void *)&resp->rh.rsmi_dev_vendor_name_get},
- {"rsmi_dev_vram_vendor_get", (void *)&resp->rh.rsmi_dev_vram_vendor_get},
- {"rsmi_dev_serial_number_get", (void *)&resp->rh.rsmi_dev_serial_number_get},
- {"rsmi_dev_subsystem_name_get", (void *)&resp->rh.rsmi_dev_subsystem_name_get},
- {"rsmi_dev_vbios_version_get", (void *)&resp->rh.rsmi_dev_vbios_version_get},
- {NULL, NULL},
- };
-
- resp->rh.handle = LOAD_LIBRARY(rocm_lib_path, RTLD_LAZY);
- if (!resp->rh.handle) {
- char *msg = LOAD_ERR();
- snprintf(buf, buflen,
- "Unable to load %s library to query for Radeon GPUs: %s\n",
- rocm_lib_path, msg);
- free(msg);
- resp->err = strdup(buf);
- return;
- }
-
- // TODO once we've squashed the remaining corner cases remove this log
- LOG(resp->rh.verbose, "wiring rocm management library functions in %s\n", rocm_lib_path);
-
- for (i = 0; l[i].s != NULL; i++) {
- // TODO once we've squashed the remaining corner cases remove this log
- LOG(resp->rh.verbose, "dlsym: %s\n", l[i].s);
-
- *l[i].p = LOAD_SYMBOL(resp->rh.handle, l[i].s);
- if (!l[i].p) {
- resp->rh.handle = NULL;
- char *msg = LOAD_ERR();
- LOG(resp->rh.verbose, "dlerr: %s\n", msg);
- UNLOAD_LIBRARY(resp->rh.handle);
- snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,
- msg);
- free(msg);
- resp->err = strdup(buf);
- return;
- }
- }
-
- ret = (*resp->rh.rsmi_init)(0);
- if (ret != RSMI_STATUS_SUCCESS) {
- LOG(resp->rh.verbose, "rsmi_init err: %d\n", ret);
- UNLOAD_LIBRARY(resp->rh.handle);
- resp->rh.handle = NULL;
- snprintf(buf, buflen, "rocm vram init failure: %d", ret);
- resp->err = strdup(buf);
- }
-
- return;
-}
-
-void rocm_check_vram(rocm_handle_t h, mem_info_t *resp) {
- resp->err = NULL;
- resp->igpu_index = -1;
- uint64_t totalMem = 0;
- uint64_t usedMem = 0;
- rsmi_status_t ret;
- const int buflen = 256;
- char buf[buflen + 1];
- int i;
-
- if (h.handle == NULL) {
- resp->err = strdup("rocm handle not initialized");
- return;
- }
-
- ret = (*h.rsmi_num_monitor_devices)(&resp->count);
- if (ret != RSMI_STATUS_SUCCESS) {
- snprintf(buf, buflen, "unable to get device count: %d", ret);
- resp->err = strdup(buf);
- return;
- }
- LOG(h.verbose, "discovered %d ROCm GPU Devices\n", resp->count);
-
- resp->total = 0;
- resp->free = 0;
- for (i = 0; i < resp->count; i++) {
- if (h.verbose) {
- // When in verbose mode, report more information about
- // the card we discover, but don't fail on error
- ret = (*h.rsmi_dev_name_get)(i, buf, buflen);
- if (ret != RSMI_STATUS_SUCCESS) {
- LOG(h.verbose, "rsmi_dev_name_get failed: %d\n", ret);
- } else {
- LOG(h.verbose, "[%d] ROCm device name: %s\n", i, buf);
- }
- ret = (*h.rsmi_dev_brand_get)(i, buf, buflen);
- if (ret != RSMI_STATUS_SUCCESS) {
- LOG(h.verbose, "rsmi_dev_brand_get failed: %d\n", ret);
- } else {
- LOG(h.verbose, "[%d] ROCm brand: %s\n", i, buf);
- }
- ret = (*h.rsmi_dev_vendor_name_get)(i, buf, buflen);
- if (ret != RSMI_STATUS_SUCCESS) {
- LOG(h.verbose, "rsmi_dev_vendor_name_get failed: %d\n", ret);
- } else {
- LOG(h.verbose, "[%d] ROCm vendor: %s\n", i, buf);
- }
- ret = (*h.rsmi_dev_vram_vendor_get)(i, buf, buflen);
- if (ret != RSMI_STATUS_SUCCESS) {
- LOG(h.verbose, "rsmi_dev_vram_vendor_get failed: %d\n", ret);
- } else {
- LOG(h.verbose, "[%d] ROCm VRAM vendor: %s\n", i, buf);
- }
- ret = (*h.rsmi_dev_serial_number_get)(i, buf, buflen);
- if (ret != RSMI_STATUS_SUCCESS) {
- LOG(h.verbose, "rsmi_dev_serial_number_get failed: %d\n", ret);
- } else {
- LOG(h.verbose, "[%d] ROCm S/N: %s\n", i, buf);
- }
- ret = (*h.rsmi_dev_subsystem_name_get)(i, buf, buflen);
- if (ret != RSMI_STATUS_SUCCESS) {
- LOG(h.verbose, "rsmi_dev_subsystem_name_get failed: %d\n", ret);
- } else {
- LOG(h.verbose, "[%d] ROCm subsystem name: %s\n", i, buf);
- }
- ret = (*h.rsmi_dev_vbios_version_get)(i, buf, buflen);
- if (ret != RSMI_STATUS_SUCCESS) {
- LOG(h.verbose, "rsmi_dev_vbios_version_get failed: %d\n", ret);
- } else {
- LOG(h.verbose, "[%d] ROCm vbios version: %s\n", i, buf);
- }
- }
-
- // Get total memory - used memory for available memory
- ret = (*h.rsmi_dev_memory_total_get)(i, RSMI_MEM_TYPE_VRAM, &totalMem);
- if (ret != RSMI_STATUS_SUCCESS) {
- snprintf(buf, buflen, "rocm total mem lookup failure: %d", ret);
- resp->err = strdup(buf);
- return;
- }
- ret = (*h.rsmi_dev_memory_usage_get)(i, RSMI_MEM_TYPE_VRAM, &usedMem);
- if (ret != RSMI_STATUS_SUCCESS) {
- snprintf(buf, buflen, "rocm usage mem lookup failure: %d", ret);
- resp->err = strdup(buf);
- return;
- }
- LOG(h.verbose, "[%d] ROCm totalMem %ld\n", i, totalMem);
- LOG(h.verbose, "[%d] ROCm usedMem %ld\n", i, usedMem);
- if (totalMem < 1024 * 1024 * 1024) {
- // Do not add up integrated GPU memory capacity, it's a bogus 512M, and actually uses system memory
- LOG(h.verbose, "[%d] ROCm integrated GPU\n", i);
- resp->igpu_index = i;
- } else {
- resp->total += totalMem;
- resp->free += totalMem - usedMem;
- }
- }
-}
-
-void rocm_get_version(rocm_handle_t h, rocm_version_resp_t *resp) {
- const int buflen = 256;
- char buf[buflen + 1];
- if (h.handle == NULL) {
- resp->str = strdup("rocm handle not initialized");
- resp->status = 1;
- return;
- }
- rsmi_version_t ver;
- rsmi_status_t ret;
- ret = h.rsmi_version_get(&ver);
- if (ret != RSMI_STATUS_SUCCESS) {
- snprintf(buf, buflen, "unexpected response on version lookup %d", ret);
- resp->status = 1;
- } else {
- snprintf(buf, buflen, "%d", ver.major);
- resp->status = 0;
- }
- resp->str = strdup(buf);
-}
-
-#endif // __APPLE__
diff --git a/gpu/gpu_info_rocm.h b/gpu/gpu_info_rocm.h
deleted file mode 100644
index 0a8d50c0..00000000
--- a/gpu/gpu_info_rocm.h
+++ /dev/null
@@ -1,59 +0,0 @@
-#ifndef __APPLE__
-#ifndef __GPU_INFO_ROCM_H__
-#define __GPU_INFO_ROCM_H__
-#include "gpu_info.h"
-
-// Just enough typedef's to dlopen/dlsym for memory information
-typedef enum rsmi_status_return {
- RSMI_STATUS_SUCCESS = 0,
- // Other values omitted for now...
-} rsmi_status_t;
-
-typedef enum rsmi_memory_type {
- RSMI_MEM_TYPE_VRAM = 0,
- RSMI_MEM_TYPE_VIS_VRAM,
- RSMI_MEM_TYPE_GTT,
-} rsmi_memory_type_t;
-
- typedef struct {
- uint32_t major;
- uint32_t minor;
- uint32_t patch;
- const char *build;
- } rsmi_version_t;
-
-typedef struct rocm_handle {
- void *handle;
- uint16_t verbose;
- rsmi_status_t (*rsmi_init)(uint64_t);
- rsmi_status_t (*rsmi_shut_down)(void);
- rsmi_status_t (*rsmi_dev_memory_total_get)(uint32_t, rsmi_memory_type_t, uint64_t *);
- rsmi_status_t (*rsmi_dev_memory_usage_get)(uint32_t, rsmi_memory_type_t, uint64_t *);
- rsmi_status_t (*rsmi_version_get) (rsmi_version_t *version);
- rsmi_status_t (*rsmi_num_monitor_devices) (uint32_t *);
- rsmi_status_t (*rsmi_dev_id_get)(uint32_t, uint16_t *);
- rsmi_status_t (*rsmi_dev_name_get) (uint32_t,char *,size_t);
- rsmi_status_t (*rsmi_dev_brand_get) (uint32_t, char *, uint32_t);
- rsmi_status_t (*rsmi_dev_vendor_name_get) (uint32_t, char *, uint32_t);
- rsmi_status_t (*rsmi_dev_vram_vendor_get) (uint32_t, char *, uint32_t);
- rsmi_status_t (*rsmi_dev_serial_number_get) (uint32_t, char *, uint32_t);
- rsmi_status_t (*rsmi_dev_subsystem_name_get) (uint32_t, char *, uint32_t);
- rsmi_status_t (*rsmi_dev_vbios_version_get) (uint32_t, char *, uint32_t);
-} rocm_handle_t;
-
-typedef struct rocm_init_resp {
- char *err; // If err is non-null handle is invalid
- rocm_handle_t rh;
-} rocm_init_resp_t;
-
-typedef struct rocm_version_resp {
- rsmi_status_t status;
- char *str; // Contains version or error string if status != 0
-} rocm_version_resp_t;
-
-void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp);
-void rocm_check_vram(rocm_handle_t rh, mem_info_t *resp);
-void rocm_get_version(rocm_handle_t rh, rocm_version_resp_t *resp);
-
-#endif // __GPU_INFO_ROCM_H__
-#endif // __APPLE__
\ No newline at end of file
diff --git a/llm/dyn_ext_server.c b/llm/dyn_ext_server.c
index 47dc4e99..dab49f85 100644
--- a/llm/dyn_ext_server.c
+++ b/llm/dyn_ext_server.c
@@ -14,17 +14,14 @@
#define LOAD_LIBRARY(lib, flags) LoadLibrary(lib)
#define LOAD_SYMBOL(handle, sym) GetProcAddress(handle, sym)
#define UNLOAD_LIBRARY(handle) FreeLibrary(handle)
-inline char *LOAD_ERR() {
- LPSTR messageBuffer = NULL;
- size_t size = FormatMessageA(
- FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
- FORMAT_MESSAGE_IGNORE_INSERTS,
- NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
- (LPSTR)&messageBuffer, 0, NULL);
- char *resp = strdup(messageBuffer);
- LocalFree(messageBuffer);
- return resp;
-}
+#define LOAD_ERR() ({\
+ LPSTR messageBuffer = NULL; \
+ size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, \
+ NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL); \
+ char *resp = strdup(messageBuffer); \
+ LocalFree(messageBuffer); \
+ resp; \
+})
#else
#include
#define LOAD_LIBRARY(lib, flags) dlopen(lib, flags)
diff --git a/llm/dyn_ext_server.go b/llm/dyn_ext_server.go
index 8d7ebf9e..fa5a3477 100644
--- a/llm/dyn_ext_server.go
+++ b/llm/dyn_ext_server.go
@@ -28,13 +28,13 @@ import (
"log/slog"
"os"
"path/filepath"
- "runtime"
"strings"
"sync"
"time"
"unsafe"
"github.com/jmorganca/ollama/api"
+ "github.com/jmorganca/ollama/gpu"
)
type dynExtServer struct {
@@ -72,7 +72,7 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts
slog.Info("concurrent llm servers not yet supported, waiting for prior server to complete")
mutex.Lock()
}
- updatePath(filepath.Dir(library))
+ gpu.UpdatePath(filepath.Dir(library))
libPath := C.CString(library)
defer C.free(unsafe.Pointer(libPath))
resp := newExtServerResp(512)
@@ -148,6 +148,7 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts
}
slog.Info("Initializing llama server")
+ slog.Debug(fmt.Sprintf("server params: %+v", sparams))
initResp := newExtServerResp(128)
defer freeExtServerResp(initResp)
C.dyn_llama_server_init(llm.s, &sparams, &initResp)
@@ -365,25 +366,3 @@ func (llm *dynExtServer) Close() {
C.dyn_llama_server_stop(llm.s)
mutex.Unlock()
}
-
-func updatePath(dir string) {
- if runtime.GOOS == "windows" {
- tmpDir := filepath.Dir(dir)
- pathComponents := strings.Split(os.Getenv("PATH"), ";")
- i := 0
- for _, comp := range pathComponents {
- if strings.EqualFold(comp, dir) {
- return
- }
- // Remove any other prior paths to our temp dir
- if !strings.HasPrefix(strings.ToLower(comp), strings.ToLower(tmpDir)) {
- pathComponents[i] = comp
- i++
- }
- }
- newPath := strings.Join(append([]string{dir}, pathComponents...), ";")
- slog.Info(fmt.Sprintf("Updating PATH to %s", newPath))
- os.Setenv("PATH", newPath)
- }
- // linux and darwin rely on rpath
-}
diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh
index e6a7d077..0b8cb344 100755
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -179,17 +179,21 @@ fi
if [ -d "${ROCM_PATH}" ]; then
echo "ROCm libraries detected - building dynamic ROCm library"
- if [ -f ${ROCM_PATH}/lib/librocm_smi64.so.? ]; then
- ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocm_smi64.so.? | cut -f3 -d. || true)
+ if [ -f ${ROCM_PATH}/lib/librocblas.so.*.*.????? ]; then
+ ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true)
fi
init_vars
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/rocm${ROCM_VARIANT}"
- EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,${ROCM_PATH}/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
+ EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,\$ORIGIN/../rocm/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
build
- # Note: the ROCM libs and runtime library files are too large to embed, so we depend on
- # them being present at runtime on the host
+ # Record the ROCM dependencies
+ rm -f "${BUILD_DIR}/lib/deps.txt"
+ touch "${BUILD_DIR}/lib/deps.txt"
+ for dep in $(ldd "${BUILD_DIR}/lib/libext_server.so" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo ); do
+ echo "${dep}" >> "${BUILD_DIR}/lib/deps.txt"
+ done
compress_libs
fi
diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1
index e0313420..579b2bca 100644
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -2,19 +2,52 @@
$ErrorActionPreference = "Stop"
+function amdGPUs {
+ if ($env:AMDGPU_TARGETS) {
+ return $env:AMDGPU_TARGETS
+ }
+ # TODO - load from some common data file for linux + windows build consistency
+ $GPU_LIST = @(
+ "gfx900"
+ "gfx906:xnack-"
+ "gfx908:xnack-"
+ "gfx90a:xnack+"
+ "gfx90a:xnack-"
+ "gfx1010"
+ "gfx1012"
+ "gfx1030"
+ "gfx1100"
+ "gfx1101"
+ "gfx1102"
+ )
+ $GPU_LIST -join ';'
+}
+
function init_vars {
+ # Verify the environment is a Developer Shell for MSVC 2019
+ write-host $env:VSINSTALLDIR
+ if (($env:VSINSTALLDIR -eq $null)) {
+ Write-Error "`r`nBUILD ERROR - YOUR DEVELOPMENT ENVIRONMENT IS NOT SET UP CORRECTLY`r`nTo build Ollama you must run from an MSVC Developer Shell`r`nSee .\docs\development.md for instructions to set up your dev environment"
+ exit 1
+ }
$script:SRC_DIR = $(resolve-path "..\..\")
$script:llamacppDir = "../llama.cpp"
- $script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-A", "x64")
+ $script:cmakeDefs = @(
+ "-DBUILD_SHARED_LIBS=on",
+ "-DLLAMA_NATIVE=off"
+ )
$script:cmakeTargets = @("ext_server")
$script:ARCH = "amd64" # arm not yet supported.
if ($env:CGO_CFLAGS -contains "-g") {
- $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on")
+ $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo")
$script:config = "RelWithDebInfo"
} else {
- $script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off")
+ $script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off", "-DCMAKE_BUILD_TYPE=Release")
$script:config = "Release"
}
+ if ($null -ne $env:CMAKE_SYSTEM_VERSION) {
+ $script:cmakeDefs += @("-DCMAKE_SYSTEM_VERSION=${env:CMAKE_SYSTEM_VERSION}")
+ }
# Try to find the CUDA dir
if ($env:CUDA_LIB_DIR -eq $null) {
$d=(get-command -ea 'silentlycontinue' nvcc).path
@@ -157,7 +190,7 @@ apply_patches
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
init_vars
-$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
+$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu"
write-host "Building LCD CPU"
build
@@ -166,7 +199,7 @@ sign
compress_libs
init_vars
-$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
+$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx"
write-host "Building AVX CPU"
build
@@ -175,7 +208,7 @@ sign
compress_libs
init_vars
-$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
+$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx2"
write-host "Building AVX2 CPU"
build
@@ -192,18 +225,51 @@ if ($null -ne $script:CUDA_LIB_DIR) {
}
init_vars
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
- $script:cmakeDefs += @("-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
+ $script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
+ write-host "Building CUDA"
build
install
sign
compress_libs
}
-# TODO - actually implement ROCm support on windows
-$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/rocm"
-rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
-md "${script:buildDir}/lib" -ea 0 > $null
-echo $null >> "${script:buildDir}/lib/.generated"
+if ($null -ne $env:HIP_PATH) {
+ $script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
+ if ($null -ne $script:ROCM_VERSION) {
+ $script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
+ }
+
+ init_vars
+ $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
+ $script:cmakeDefs += @(
+ "-G", "Ninja",
+ "-DCMAKE_C_COMPILER=clang.exe",
+ "-DCMAKE_CXX_COMPILER=clang++.exe",
+ "-DLLAMA_HIPBLAS=on",
+ "-DLLAMA_AVX=on",
+ "-DLLAMA_AVX2=off",
+ "-DCMAKE_POSITION_INDEPENDENT_CODE=on",
+ "-DAMDGPU_TARGETS=$(amdGPUs)",
+ "-DGPU_TARGETS=$(amdGPUs)"
+ )
+
+ # Make sure the ROCm binary dir is first in the path
+ $env:PATH="$env:HIP_PATH\bin;$env:VSINSTALLDIR\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja;$env:PATH"
+
+ # We have to clobber the LIB var from the developer shell for clang to work properly
+ $env:LIB=""
+
+ write-host "Building ROCm"
+ build
+ # Ninja doesn't prefix with config name
+ ${script:config}=""
+ install
+ if ($null -ne $script:DUMPBIN) {
+ & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/${script:config}/ext_server.dll" | select-string ".dll"
+ }
+ sign
+ compress_libs
+}
cleanup
write-host "`ngo generate completed"
diff --git a/llm/llm.go b/llm/llm.go
index 81bab122..b0ac0f60 100644
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -19,7 +19,7 @@ type LLM interface {
Close()
}
-func New(workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
+func New(model string, adapters, projectors []string, opts api.Options) (LLM, error) {
if _, err := os.Stat(model); err != nil {
return nil, err
}
@@ -120,15 +120,15 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
opts.RopeFrequencyBase = 0.0
opts.RopeFrequencyScale = 0.0
- return newLlmServer(info, workDir, model, adapters, projectors, opts)
+ return newLlmServer(info, model, adapters, projectors, opts)
}
// Give any native cgo implementations an opportunity to initialize
-func Init(workdir string) error {
- return nativeInit(workdir)
+func Init() error {
+ return nativeInit()
}
-func newLlmServer(gpuInfo gpu.GpuInfo, workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
+func newLlmServer(gpuInfo gpu.GpuInfo, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
dynLibs := getDynLibs(gpuInfo)
// Check to see if the user has requested a specific library instead of auto-detecting
@@ -147,7 +147,7 @@ func newLlmServer(gpuInfo gpu.GpuInfo, workDir, model string, adapters, projecto
_, err := os.Stat(dynLibs[0])
if err != nil {
slog.Info(fmt.Sprintf("%s has disappeared, reloading libraries", dynLibs[0]))
- err = nativeInit(workDir)
+ err = nativeInit()
if err != nil {
return nil, err
}
diff --git a/llm/payload_common.go b/llm/payload_common.go
index 3958b9f5..ff38b63f 100644
--- a/llm/payload_common.go
+++ b/llm/payload_common.go
@@ -103,10 +103,14 @@ func rocmDynLibPresent() bool {
return false
}
-func nativeInit(workdir string) error {
+func nativeInit() error {
slog.Info("Extracting dynamic libraries...")
+ assetsDir, err := gpu.AssetsDir()
+ if err != nil {
+ return err
+ }
if runtime.GOOS == "darwin" {
- err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal")
+ err := extractPayloadFiles(assetsDir, "llama.cpp/ggml-metal.metal")
if err != nil {
if err == payloadMissing {
// TODO perhaps consider this a hard failure on arm macs?
@@ -115,10 +119,10 @@ func nativeInit(workdir string) error {
}
return err
}
- os.Setenv("GGML_METAL_PATH_RESOURCES", workdir)
+ os.Setenv("GGML_METAL_PATH_RESOURCES", assetsDir)
}
- libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/*/lib/*")
+ libs, err := extractDynamicLibs(assetsDir, "llama.cpp/build/*/*/*/lib/*")
if err != nil {
if err == payloadMissing {
slog.Info(fmt.Sprintf("%s", payloadMissing))
@@ -149,17 +153,13 @@ func nativeInit(workdir string) error {
return nil
}
-func extractDynamicLibs(workDir, glob string) ([]string, error) {
+func extractDynamicLibs(assetsDir, glob string) ([]string, error) {
files, err := fs.Glob(libEmbed, glob)
if err != nil || len(files) == 0 {
return nil, payloadMissing
}
libs := []string{}
- // TODO consider making this idempotent with some sort of persistent directory (where we store models probably)
- // and tracking by version so we don't reexpand the files every time
- // Also maybe consider lazy loading only what is needed
-
g := new(errgroup.Group)
for _, file := range files {
pathComps := strings.Split(file, "/")
@@ -172,14 +172,14 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
g.Go(func() error {
// llama.cpp/build/$OS/$GOARCH/$VARIANT/lib/$LIBRARY
// Include the variant in the path to avoid conflicts between multiple server libs
- targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
+ targetDir := filepath.Join(assetsDir, pathComps[pathComponentCount-3])
srcFile, err := libEmbed.Open(file)
if err != nil {
return fmt.Errorf("read payload %s: %v", file, err)
}
defer srcFile.Close()
if err := os.MkdirAll(targetDir, 0o755); err != nil {
- return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
+ return fmt.Errorf("create payload lib dir %s: %v", assetsDir, err)
}
src := io.Reader(srcFile)
filename := file
@@ -196,19 +196,13 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
libs = append(libs, destFile)
}
- _, err = os.Stat(destFile)
- switch {
- case errors.Is(err, os.ErrNotExist):
- destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
- if err != nil {
- return fmt.Errorf("write payload %s: %v", file, err)
- }
- defer destFile.Close()
- if _, err := io.Copy(destFile, src); err != nil {
- return fmt.Errorf("copy payload %s: %v", file, err)
- }
- case err != nil:
- return fmt.Errorf("stat payload %s: %v", file, err)
+ destFp, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
+ if err != nil {
+ return fmt.Errorf("write payload %s: %v", file, err)
+ }
+ defer destFp.Close()
+ if _, err := io.Copy(destFp, src); err != nil {
+ return fmt.Errorf("copy payload %s: %v", file, err)
}
return nil
})
@@ -216,7 +210,7 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
return libs, g.Wait()
}
-func extractPayloadFiles(workDir, glob string) error {
+func extractPayloadFiles(assetsDir, glob string) error {
files, err := fs.Glob(libEmbed, glob)
if err != nil || len(files) == 0 {
return payloadMissing
@@ -228,8 +222,8 @@ func extractPayloadFiles(workDir, glob string) error {
return fmt.Errorf("read payload %s: %v", file, err)
}
defer srcFile.Close()
- if err := os.MkdirAll(workDir, 0o755); err != nil {
- return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
+ if err := os.MkdirAll(assetsDir, 0o755); err != nil {
+ return fmt.Errorf("create payload lib dir %s: %v", assetsDir, err)
}
src := io.Reader(srcFile)
filename := file
@@ -241,20 +235,22 @@ func extractPayloadFiles(workDir, glob string) error {
filename = strings.TrimSuffix(filename, ".gz")
}
- destFile := filepath.Join(workDir, filepath.Base(filename))
+ destFile := filepath.Join(assetsDir, filepath.Base(filename))
_, err = os.Stat(destFile)
switch {
case errors.Is(err, os.ErrNotExist):
- destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
+ destFp, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
if err != nil {
return fmt.Errorf("write payload %s: %v", file, err)
}
- defer destFile.Close()
- if _, err := io.Copy(destFile, src); err != nil {
+ defer destFp.Close()
+ if _, err := io.Copy(destFp, src); err != nil {
return fmt.Errorf("copy payload %s: %v", file, err)
}
case err != nil:
return fmt.Errorf("stat payload %s: %v", file, err)
+ case err == nil:
+ slog.Debug("payload already exists: " + destFile)
}
}
return nil
diff --git a/llm/payload_linux.go b/llm/payload_linux.go
index fc366209..276705c7 100644
--- a/llm/payload_linux.go
+++ b/llm/payload_linux.go
@@ -4,5 +4,5 @@ import (
"embed"
)
-//go:embed llama.cpp/build/linux/*/*/lib/*.so*
+//go:embed llama.cpp/build/linux/*/*/lib/*
var libEmbed embed.FS
diff --git a/scripts/build_linux.sh b/scripts/build_linux.sh
index 338dbcd5..77e21d40 100755
--- a/scripts/build_linux.sh
+++ b/scripts/build_linux.sh
@@ -22,5 +22,6 @@ for TARGETARCH in ${BUILD_ARCH}; do
.
docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH
+ docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/dist/deps/ ./dist/
docker rm builder-$TARGETARCH
done
diff --git a/server/routes.go b/server/routes.go
index 28165721..e5adc345 100644
--- a/server/routes.go
+++ b/server/routes.go
@@ -66,8 +66,6 @@ var defaultSessionDuration = 5 * time.Minute
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.Duration) error {
- workDir := c.GetString("workDir")
-
needLoad := loaded.runner == nil || // is there a model loaded?
loaded.ModelPath != model.ModelPath || // has the base model changed?
!reflect.DeepEqual(loaded.AdapterPaths, model.AdapterPaths) || // have the adapters changed?
@@ -82,7 +80,7 @@ func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.D
loaded.Options = nil
}
- llmRunner, err := llm.New(workDir, model.ModelPath, model.AdapterPaths, model.ProjectorPaths, opts)
+ llmRunner, err := llm.New(model.ModelPath, model.AdapterPaths, model.ProjectorPaths, opts)
if err != nil {
// some older models are not compatible with newer versions of llama.cpp
// show a generalized compatibility error until there is a better way to
@@ -1035,7 +1033,7 @@ func Serve(ln net.Listener) error {
os.Exit(0)
}()
- if err := llm.Init(s.WorkDir); err != nil {
+ if err := llm.Init(); err != nil {
return fmt.Errorf("unable to initialize llm library %w", err)
}
if runtime.GOOS == "linux" { // TODO - windows too