diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 66e7180d..28cdf09b 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -11,12 +11,12 @@ jobs: generate: strategy: matrix: - os: [ubuntu-latest, macos-latest, windows-latest] + os: [ubuntu-latest, macos-latest, windows-2019] arch: [amd64, arm64] exclude: - os: ubuntu-latest arch: arm64 - - os: windows-latest + - os: windows-2019 arch: arm64 runs-on: ${{ matrix.os }} env: @@ -28,7 +28,18 @@ jobs: go-version: '1.22' cache: true - run: go get ./... + - run: | + $gopath=(get-command go).source | split-path -parent + & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1" + cd $env:GITHUB_WORKSPACE + $env:CMAKE_SYSTEM_VERSION="10.0.22621.0" + $env:PATH="$gopath;$env:PATH" + go generate -x ./... + if: ${{ startsWith(matrix.os, 'windows-') }} + name: "Windows Go Generate" - run: go generate -x ./... + if: ${{ ! startsWith(matrix.os, 'windows-') }} + name: "Unix Go Generate" - uses: actions/upload-artifact@v4 with: name: ${{ matrix.os }}-${{ matrix.arch }}-libraries @@ -66,7 +77,6 @@ jobs: strategy: matrix: rocm-version: - - '5.7.1' - '6.0' runs-on: linux container: rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }} @@ -95,12 +105,12 @@ jobs: lint: strategy: matrix: - os: [ubuntu-latest, macos-latest, windows-latest] + os: [ubuntu-latest, macos-latest, windows-2019] arch: [amd64, arm64] exclude: - os: ubuntu-latest arch: arm64 - - os: windows-latest + - os: windows-2019 arch: arm64 - os: macos-latest arch: amd64 @@ -134,12 +144,12 @@ jobs: needs: generate strategy: matrix: - os: [ubuntu-latest, macos-latest, windows-latest] + os: [ubuntu-latest, macos-latest, windows-2019] arch: [amd64] exclude: - os: ubuntu-latest arch: arm64 - - os: windows-latest + - os: windows-2019 arch: arm64 runs-on: ${{ matrix.os }} env: diff --git a/Dockerfile b/Dockerfile index f996bba0..741b6b08 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,7 @@ ARG GOLANG_VERSION=1.22.1 ARG CMAKE_VERSION=3.22.1 ARG CUDA_VERSION=11.3.1 +ARG ROCM_VERSION=6.0 # Copy the minimal context we need to run the generate scripts FROM scratch AS llm-code @@ -28,7 +29,7 @@ WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate ARG CGO_CFLAGS RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh -FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete AS rocm-5-build-amd64 +FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64 ARG CMAKE_VERSION COPY ./scripts/rh_linux_deps.sh / RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh @@ -39,18 +40,14 @@ WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate ARG CGO_CFLAGS ARG AMDGPU_TARGETS RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh +RUN mkdir /tmp/scratch && \ + for dep in $(cat /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/x86_64/rocm*/lib/deps.txt) ; do \ + cp ${dep} /tmp/scratch/ || exit 1 ; \ + done && \ + (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd /tmp/scratch/ && tar xf - ) && \ + mkdir -p /go/src/github.com/jmorganca/ollama/dist/deps/ && \ + (cd /tmp/scratch/ && tar czvf /go/src/github.com/jmorganca/ollama/dist/deps/rocm-amd64-deps.tgz . ) -FROM --platform=linux/amd64 rocm/dev-centos-7:6.0-complete AS rocm-6-build-amd64 -ARG CMAKE_VERSION -COPY ./scripts/rh_linux_deps.sh / -RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh -ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH -ENV LIBRARY_PATH /opt/amdgpu/lib64 -COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/ -WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate -ARG CGO_CFLAGS -ARG AMDGPU_TARGETS -RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64 ARG CMAKE_VERSION @@ -91,8 +88,8 @@ COPY . . COPY --from=cpu_avx-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/ COPY --from=cpu_avx2-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/ COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/ -COPY --from=rocm-5-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/ -COPY --from=rocm-6-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/ +COPY --from=rocm-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/ +COPY --from=rocm-build-amd64 /go/src/github.com/jmorganca/ollama/dist/deps/ ./dist/deps/ ARG GOFLAGS ARG CGO_CFLAGS RUN go build . @@ -117,7 +114,7 @@ RUN apt-get update && apt-get install -y ca-certificates COPY --from=build-arm64 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama # Radeon images are much larger so we keep it distinct from the CPU/CUDA image -FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete as runtime-rocm +FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm RUN update-pciids COPY --from=build-amd64 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama EXPOSE 11434 diff --git a/app/ollama.iss b/app/ollama.iss index 473a85b3..df61ac4c 100644 --- a/app/ollama.iss +++ b/app/ollama.iss @@ -91,6 +91,14 @@ Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit Source: "..\dist\windeps\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64bit Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion +; Assumes v5.7, may need adjustments for v6 +#if GetEnv("HIP_PATH") != "" + Source: "{#GetEnv('HIP_PATH')}\bin\hipblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion + Source: "{#GetEnv('HIP_PATH')}\bin\rocblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion + ; amdhip64.dll dependency comes from the driver and must be installed already + Source: "{#GetEnv('HIP_PATH')}\bin\rocblas\library\*"; DestDir: "{app}\rocm\rocblas\library\"; Flags: ignoreversion +#endif + [Icons] Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico" diff --git a/docs/development.md b/docs/development.md index 33110e01..993aed9e 100644 --- a/docs/development.md +++ b/docs/development.md @@ -116,7 +116,7 @@ Note: The windows build for Ollama is still under development. Install required tools: -- MSVC toolchain - C/C++ and cmake as minimal requirements +- MSVC toolchain - C/C++ and cmake as minimal requirements - You must build from a "Developer Shell" with the environment variables set - go version 1.22 or higher - MinGW (pick one variant) with GCC. - @@ -132,6 +132,6 @@ go build . #### Windows CUDA (NVIDIA) -In addition to the common Windows development tools described above, install: +In addition to the common Windows development tools described above, install CUDA **AFTER** you install MSVC. - [NVIDIA CUDA](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html) diff --git a/docs/linux.md b/docs/linux.md index 29110b05..c7014ece 100644 --- a/docs/linux.md +++ b/docs/linux.md @@ -10,6 +10,14 @@ Install Ollama running this one-liner: curl -fsSL https://ollama.com/install.sh | sh ``` +## AMD Radeon GPU support + +While AMD has contributed the `amdgpu` driver upstream to the official linux +kernel source, the version is older and may not support all ROCm features. We +recommend you install the latest driver from +https://www.amd.com/en/support/linux-drivers for best support of your Radeon +GPU. + ## Manual install ### Download the `ollama` binary diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index a5fb301f..1f2a6b1f 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -67,6 +67,43 @@ You can see what features your CPU has with the following. cat /proc/cpuinfo| grep flags | head -1 ``` +## AMD Radeon GPU Support + +Ollama leverages the AMD ROCm library, which does not support all AMD GPUs. In +some cases you can force the system to try to use a close GPU type. For example +The Radeon RX 5400 is `gfx1034` (also known as 10.3.4) however, ROCm does not +support this patch-level, the closest support is `gfx1030`. You can use the +environment variable `HSA_OVERRIDE_GFX_VERSION` with `x.y.z` syntax. So for +example, to force the system to run on the RX 5400, you would set +`HSA_OVERRIDE_GFX_VERSION="10.3.0"` as an environment variable for the server. + +At this time, the known supported GPU types are the following: (This may change from +release to release) +- gfx900 +- gfx906 +- gfx908 +- gfx90a +- gfx940 +- gfx941 +- gfx942 +- gfx1030 +- gfx1100 +- gfx1101 +- gfx1102 + +This will not work for all unsupported GPUs. Reach out on [Discord](https://discord.gg/ollama) +or file an [issue](https://github.com/ollama/ollama/issues) for additional help. + + +## Installing older versions on Linux + +If you run into problems on Linux and want to install an older version you can tell the install script +which version to install. + +```sh +curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION="0.1.27" sh +``` + ## Known issues * N/A \ No newline at end of file diff --git a/docs/windows.md b/docs/windows.md index 875a3dc0..49d579c9 100644 --- a/docs/windows.md +++ b/docs/windows.md @@ -4,7 +4,7 @@ Welcome to the Ollama Windows preview. No more WSL required! -Ollama now runs as a native Windows application, including NVIDIA GPU support. +Ollama now runs as a native Windows application, including NVIDIA and AMD Radeon GPU support. After installing Ollama Windows Preview, Ollama will run in the background and the `ollama` command line is available in `cmd`, `powershell` or your favorite terminal application. As usual the Ollama [api](./api.md) will be served on @@ -21,6 +21,7 @@ Logs will often be helpful in dianosing the problem (see * Windows 10 or newer, Home or Pro * NVIDIA 452.39 or newer Drivers if you have an NVIDIA card +* AMD Radeon Driver https://www.amd.com/en/support if you have a Radeon card ## API Access diff --git a/gpu/amd.go b/gpu/amd.go deleted file mode 100644 index c21b7741..00000000 --- a/gpu/amd.go +++ /dev/null @@ -1,101 +0,0 @@ -package gpu - -import ( - "bufio" - "errors" - "fmt" - "io" - "log/slog" - "os" - "path/filepath" - "strconv" - "strings" -) - -// TODO - windows vs. non-windows vs darwin - -// Discovery logic for AMD/ROCm GPUs - -const ( - DriverVersionFile = "/sys/module/amdgpu/version" - GPUPropertiesFileGlob = "/sys/class/kfd/kfd/topology/nodes/*/properties" - // TODO probably break these down per GPU to make the logic simpler - GPUTotalMemoryFileGlob = "/sys/class/kfd/kfd/topology/nodes/*/mem_banks/*/properties" // size_in_bytes line - GPUUsedMemoryFileGlob = "/sys/class/kfd/kfd/topology/nodes/*/mem_banks/*/used_memory" -) - -func AMDDetected() bool { - // Some driver versions (older?) don't have a version file, so just lookup the parent dir - sysfsDir := filepath.Dir(DriverVersionFile) - _, err := os.Stat(sysfsDir) - if errors.Is(err, os.ErrNotExist) { - slog.Debug("amd driver not detected " + sysfsDir) - return false - } else if err != nil { - slog.Debug(fmt.Sprintf("error looking up amd driver %s %s", sysfsDir, err)) - return false - } - return true -} - -func AMDDriverVersion() (string, error) { - _, err := os.Stat(DriverVersionFile) - if err != nil { - return "", fmt.Errorf("amdgpu file stat error: %s %w", DriverVersionFile, err) - } - fp, err := os.Open(DriverVersionFile) - if err != nil { - return "", err - } - defer fp.Close() - verString, err := io.ReadAll(fp) - if err != nil { - return "", err - } - return strings.TrimSpace(string(verString)), nil -} - -func AMDGFXVersions() []Version { - res := []Version{} - matches, _ := filepath.Glob(GPUPropertiesFileGlob) - for _, match := range matches { - fp, err := os.Open(match) - if err != nil { - slog.Debug(fmt.Sprintf("failed to open sysfs node file %s: %s", match, err)) - continue - } - defer fp.Close() - - scanner := bufio.NewScanner(fp) - // optionally, resize scanner's capacity for lines over 64K, see next example - for scanner.Scan() { - line := strings.TrimSpace(scanner.Text()) - if strings.HasPrefix(line, "gfx_target_version") { - ver := strings.Fields(line) - if len(ver) != 2 || len(ver[1]) < 5 { - slog.Debug("malformed " + line) - continue - } - l := len(ver[1]) - patch, err1 := strconv.ParseUint(ver[1][l-2:l], 10, 32) - minor, err2 := strconv.ParseUint(ver[1][l-4:l-2], 10, 32) - major, err3 := strconv.ParseUint(ver[1][:l-4], 10, 32) - if err1 != nil || err2 != nil || err3 != nil { - slog.Debug("malformed int " + line) - continue - } - - res = append(res, Version{ - Major: uint(major), - Minor: uint(minor), - Patch: uint(patch), - }) - } - } - } - return res -} - -func (v Version) ToGFXString() string { - return fmt.Sprintf("gfx%d%d%d", v.Major, v.Minor, v.Patch) -} diff --git a/gpu/amd_common.go b/gpu/amd_common.go new file mode 100644 index 00000000..deb931ff --- /dev/null +++ b/gpu/amd_common.go @@ -0,0 +1,58 @@ +//go:build linux || windows + +package gpu + +import ( + "fmt" + "log/slog" + "os" + "path/filepath" + "strconv" + "strings" +) + +// Determine if the given ROCm lib directory is usable by checking for existence of some glob patterns +func rocmLibUsable(libDir string) bool { + slog.Debug("evaluating potential rocm lib dir " + libDir) + for _, g := range ROCmLibGlobs { + res, _ := filepath.Glob(filepath.Join(libDir, g)) + if len(res) == 0 { + return false + } + } + return true +} + +func GetSupportedGFX(libDir string) ([]string, error) { + var ret []string + files, err := filepath.Glob(filepath.Join(libDir, "rocblas", "library", "TensileLibrary_lazy_gfx*.dat")) + if err != nil { + return nil, err + } + for _, file := range files { + ret = append(ret, strings.TrimSuffix(strings.TrimPrefix(filepath.Base(file), "TensileLibrary_lazy_"), ".dat")) + } + return ret, nil +} + +func amdSetVisibleDevices(ids []int, skip map[int]interface{}) { + // Set the visible devices if not already set + // TODO - does sort order matter? + devices := []string{} + for i := range ids { + slog.Debug(fmt.Sprintf("i=%d", i)) + if _, skipped := skip[i]; skipped { + slog.Debug("skipped") + continue + } + devices = append(devices, strconv.Itoa(i)) + } + slog.Debug(fmt.Sprintf("devices=%v", devices)) + + val := strings.Join(devices, ",") + err := os.Setenv("HIP_VISIBLE_DEVICES", val) + if err != nil { + slog.Warn(fmt.Sprintf("failed to set env: %s", err)) + } + slog.Debug("HIP_VISIBLE_DEVICES=" + val) +} diff --git a/gpu/amd_hip_windows.go b/gpu/amd_hip_windows.go new file mode 100644 index 00000000..14a6c7d6 --- /dev/null +++ b/gpu/amd_hip_windows.go @@ -0,0 +1,141 @@ +package gpu + +import ( + "fmt" + "log/slog" + "strconv" + "syscall" + "unsafe" + + "golang.org/x/sys/windows" +) + +const ( + hipSuccess = 0 + hipErrorNoDevice = 100 +) + +type hipDevicePropMinimal struct { + Name [256]byte + unused1 [140]byte + GcnArchName [256]byte // gfx#### + iGPU int // Doesn't seem to actually report correctly + unused2 [128]byte +} + +// Wrap the amdhip64.dll library for GPU discovery +type HipLib struct { + dll windows.Handle + hipGetDeviceCount uintptr + hipGetDeviceProperties uintptr + hipMemGetInfo uintptr + hipSetDevice uintptr + hipDriverGetVersion uintptr +} + +func NewHipLib() (*HipLib, error) { + h, err := windows.LoadLibrary("amdhip64.dll") + if err != nil { + return nil, fmt.Errorf("unable to load amdhip64.dll: %w", err) + } + hl := &HipLib{} + hl.dll = h + hl.hipGetDeviceCount, err = windows.GetProcAddress(hl.dll, "hipGetDeviceCount") + if err != nil { + return nil, err + } + hl.hipGetDeviceProperties, err = windows.GetProcAddress(hl.dll, "hipGetDeviceProperties") + if err != nil { + return nil, err + } + hl.hipMemGetInfo, err = windows.GetProcAddress(hl.dll, "hipMemGetInfo") + if err != nil { + return nil, err + } + hl.hipSetDevice, err = windows.GetProcAddress(hl.dll, "hipSetDevice") + if err != nil { + return nil, err + } + hl.hipDriverGetVersion, err = windows.GetProcAddress(hl.dll, "hipDriverGetVersion") + if err != nil { + return nil, err + } + return hl, nil +} + +// The hip library only evaluates the HIP_VISIBLE_DEVICES variable at startup +// so we have to unload/reset the library after we do our initial discovery +// to make sure our updates to that variable are processed by llama.cpp +func (hl *HipLib) Release() { + err := windows.FreeLibrary(hl.dll) + if err != nil { + slog.Warn(fmt.Sprintf("failed to unload amdhip64.dll: %s", err)) + } + hl.dll = 0 +} + +func (hl *HipLib) AMDDriverVersion() (string, error) { + if hl.dll == 0 { + return "", fmt.Errorf("dll has been unloaded") + } + var version int + status, _, err := syscall.SyscallN(hl.hipDriverGetVersion, uintptr(unsafe.Pointer(&version))) + if status != hipSuccess { + return "", fmt.Errorf("failed call to hipDriverGetVersion: %d %s", status, err) + } + return strconv.Itoa(version), nil +} + +func (hl *HipLib) HipGetDeviceCount() int { + if hl.dll == 0 { + slog.Error("dll has been unloaded") + return 0 + } + var count int + status, _, err := syscall.SyscallN(hl.hipGetDeviceCount, uintptr(unsafe.Pointer(&count))) + if status == hipErrorNoDevice { + slog.Info("AMD ROCm reports no devices found") + return 0 + } + if status != hipSuccess { + slog.Warn(fmt.Sprintf("failed call to hipGetDeviceCount: %d %s", status, err)) + } + return count +} + +func (hl *HipLib) HipSetDevice(device int) error { + if hl.dll == 0 { + return fmt.Errorf("dll has been unloaded") + } + status, _, err := syscall.SyscallN(hl.hipSetDevice, uintptr(device)) + if status != hipSuccess { + return fmt.Errorf("failed call to hipSetDevice: %d %s", status, err) + } + return nil +} + +func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, error) { + if hl.dll == 0 { + return nil, fmt.Errorf("dll has been unloaded") + } + var props hipDevicePropMinimal + status, _, err := syscall.SyscallN(hl.hipGetDeviceProperties, uintptr(unsafe.Pointer(&props)), uintptr(device)) + if status != hipSuccess { + return nil, fmt.Errorf("failed call to hipGetDeviceProperties: %d %s", status, err) + } + return &props, nil +} + +// free, total, err +func (hl *HipLib) HipMemGetInfo() (uint64, uint64, error) { + if hl.dll == 0 { + return 0, 0, fmt.Errorf("dll has been unloaded") + } + var totalMemory uint64 + var freeMemory uint64 + status, _, err := syscall.SyscallN(hl.hipMemGetInfo, uintptr(unsafe.Pointer(&freeMemory)), uintptr(unsafe.Pointer(&totalMemory))) + if status != hipSuccess { + return 0, 0, fmt.Errorf("failed call to hipMemGetInfo: %d %s", status, err) + } + return freeMemory, totalMemory, nil +} diff --git a/gpu/amd_linux.go b/gpu/amd_linux.go new file mode 100644 index 00000000..c775b71d --- /dev/null +++ b/gpu/amd_linux.go @@ -0,0 +1,411 @@ +package gpu + +import ( + "bufio" + "errors" + "fmt" + "io" + "log/slog" + "os" + "path/filepath" + "slices" + "strconv" + "strings" + + "github.com/jmorganca/ollama/version" +) + +// Discovery logic for AMD/ROCm GPUs + +const ( + curlMsg = "curl -fsSL https://github.com/ollama/ollama/releases/download/v%s/rocm-amd64-deps.tgz | tar -zxf - -C %s" + DriverVersionFile = "/sys/module/amdgpu/version" + AMDNodesSysfsDir = "/sys/class/kfd/kfd/topology/nodes/" + GPUPropertiesFileGlob = AMDNodesSysfsDir + "*/properties" + + // Prefix with the node dir + GPUTotalMemoryFileGlob = "mem_banks/*/properties" // size_in_bytes line + GPUUsedMemoryFileGlob = "mem_banks/*/used_memory" + RocmStandardLocation = "/opt/rocm/lib" +) + +var ( + // Used to validate if the given ROCm lib is usable + ROCmLibGlobs = []string{"libhipblas.so.2*", "rocblas"} // TODO - probably include more coverage of files here... +) + +// Gather GPU information from the amdgpu driver if any supported GPUs are detected +// HIP_VISIBLE_DEVICES will be set if we detect a mix of unsupported and supported devices +// and the user hasn't already set this variable +func AMDGetGPUInfo(resp *GpuInfo) { + // TODO - DRY this out with windows + if !AMDDetected() { + return + } + skip := map[int]interface{}{} + + // Opportunistic logging of driver version to aid in troubleshooting + ver, err := AMDDriverVersion() + if err == nil { + slog.Info("AMD Driver: " + ver) + } else { + // TODO - if we see users crash and burn with the upstreamed kernel this can be adjusted to hard-fail rocm support and fallback to CPU + slog.Warn(fmt.Sprintf("ollama recommends running the https://www.amd.com/en/support/linux-drivers: %s", err)) + } + + // If the user has specified exactly which GPUs to use, look up their memory + visibleDevices := os.Getenv("HIP_VISIBLE_DEVICES") + if visibleDevices != "" { + ids := []int{} + for _, idStr := range strings.Split(visibleDevices, ",") { + id, err := strconv.Atoi(idStr) + if err != nil { + slog.Warn(fmt.Sprintf("malformed HIP_VISIBLE_DEVICES=%s %s", visibleDevices, err)) + } else { + ids = append(ids, id) + } + } + amdProcMemLookup(resp, nil, ids) + return + } + + // Gather GFX version information from all detected cards + gfx := AMDGFXVersions() + verStrings := []string{} + for i, v := range gfx { + verStrings = append(verStrings, v.ToGFXString()) + if v.Major == 0 { + // Silently skip CPUs + skip[i] = struct{}{} + continue + } + if v.Major < 9 { + // TODO consider this a build-time setting if we can support 8xx family GPUs + slog.Warn(fmt.Sprintf("amdgpu [%d] too old %s", i, v.ToGFXString())) + skip[i] = struct{}{} + } + } + slog.Info(fmt.Sprintf("detected amdgpu versions %v", verStrings)) + + // Abort if all GPUs are skipped + if len(skip) >= len(gfx) { + slog.Info("all detected amdgpus are skipped, falling back to CPU") + return + } + + // If we got this far, then we have at least 1 GPU that's a ROCm candidate, so make sure we have a lib + libDir, err := AMDValidateLibDir() + if err != nil { + slog.Warn(fmt.Sprintf("unable to verify rocm library, will use cpu: %s", err)) + return + } + + gfxOverride := os.Getenv("HSA_OVERRIDE_GFX_VERSION") + if gfxOverride == "" { + supported, err := GetSupportedGFX(libDir) + if err != nil { + slog.Warn(fmt.Sprintf("failed to lookup supported GFX types, falling back to CPU mode: %s", err)) + return + } + slog.Debug(fmt.Sprintf("rocm supported GPU types %v", supported)) + + for i, v := range gfx { + if !slices.Contains[[]string, string](supported, v.ToGFXString()) { + slog.Warn(fmt.Sprintf("amdgpu [%d] %s is not supported by %s %v", i, v.ToGFXString(), libDir, supported)) + // TODO - consider discrete markdown just for ROCM troubleshooting? + slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for HSA_OVERRIDE_GFX_VERSION usage") + skip[i] = struct{}{} + } else { + slog.Info(fmt.Sprintf("amdgpu [%d] %s is supported", i, v.ToGFXString())) + } + } + } else { + slog.Debug("skipping rocm gfx compatibility check with HSA_OVERRIDE_GFX_VERSION=" + gfxOverride) + } + + if len(skip) >= len(gfx) { + slog.Info("all detected amdgpus are skipped, falling back to CPU") + return + } + + ids := make([]int, len(gfx)) + i := 0 + for k := range gfx { + ids[i] = k + i++ + } + amdProcMemLookup(resp, skip, ids) + if resp.memInfo.DeviceCount == 0 { + return + } + if len(skip) > 0 { + amdSetVisibleDevices(ids, skip) + } +} + +// Walk the sysfs nodes for the available GPUs and gather information from them +// skipping over any devices in the skip map +func amdProcMemLookup(resp *GpuInfo, skip map[int]interface{}, ids []int) { + resp.memInfo.DeviceCount = 0 + resp.memInfo.TotalMemory = 0 + resp.memInfo.FreeMemory = 0 + if len(ids) == 0 { + slog.Debug("discovering all amdgpu devices") + entries, err := os.ReadDir(AMDNodesSysfsDir) + if err != nil { + slog.Warn(fmt.Sprintf("failed to read amdgpu sysfs %s - %s", AMDNodesSysfsDir, err)) + return + } + for _, node := range entries { + if !node.IsDir() { + continue + } + id, err := strconv.Atoi(node.Name()) + if err != nil { + slog.Warn("malformed amdgpu sysfs node id " + node.Name()) + continue + } + ids = append(ids, id) + } + } + slog.Debug(fmt.Sprintf("discovering amdgpu devices %v", ids)) + + for _, id := range ids { + if _, skipped := skip[id]; skipped { + continue + } + totalMemory := uint64(0) + usedMemory := uint64(0) + propGlob := filepath.Join(AMDNodesSysfsDir, strconv.Itoa(id), GPUTotalMemoryFileGlob) + propFiles, err := filepath.Glob(propGlob) + if err != nil { + slog.Warn(fmt.Sprintf("error looking up total GPU memory: %s %s", propGlob, err)) + } + // 1 or more memory banks - sum the values of all of them + for _, propFile := range propFiles { + fp, err := os.Open(propFile) + if err != nil { + slog.Warn(fmt.Sprintf("failed to open sysfs node file %s: %s", propFile, err)) + continue + } + defer fp.Close() + scanner := bufio.NewScanner(fp) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if strings.HasPrefix(line, "size_in_bytes") { + ver := strings.Fields(line) + if len(ver) != 2 { + slog.Warn("malformed " + line) + continue + } + bankSizeInBytes, err := strconv.ParseUint(ver[1], 10, 64) + if err != nil { + slog.Warn("malformed int " + line) + continue + } + totalMemory += bankSizeInBytes + } + } + } + if totalMemory == 0 { + continue + } + usedGlob := filepath.Join(AMDNodesSysfsDir, strconv.Itoa(id), GPUUsedMemoryFileGlob) + usedFiles, err := filepath.Glob(usedGlob) + if err != nil { + slog.Warn(fmt.Sprintf("error looking up used GPU memory: %s %s", usedGlob, err)) + continue + } + for _, usedFile := range usedFiles { + fp, err := os.Open(usedFile) + if err != nil { + slog.Warn(fmt.Sprintf("failed to open sysfs node file %s: %s", usedFile, err)) + continue + } + defer fp.Close() + data, err := io.ReadAll(fp) + if err != nil { + slog.Warn(fmt.Sprintf("failed to read sysfs node file %s: %s", usedFile, err)) + continue + } + used, err := strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64) + if err != nil { + slog.Warn(fmt.Sprintf("malformed used memory %s: %s", string(data), err)) + continue + } + usedMemory += used + } + slog.Info(fmt.Sprintf("[%d] amdgpu totalMemory %d", id, totalMemory)) + slog.Info(fmt.Sprintf("[%d] amdgpu freeMemory %d", id, (totalMemory - usedMemory))) + resp.memInfo.DeviceCount++ + resp.memInfo.TotalMemory += totalMemory + resp.memInfo.FreeMemory += (totalMemory - usedMemory) + } + if resp.memInfo.DeviceCount > 0 { + resp.Library = "rocm" + } +} + +// Quick check for AMD driver so we can skip amdgpu discovery if not present +func AMDDetected() bool { + // Some driver versions (older?) don't have a version file, so just lookup the parent dir + sysfsDir := filepath.Dir(DriverVersionFile) + _, err := os.Stat(sysfsDir) + if errors.Is(err, os.ErrNotExist) { + slog.Debug("amdgpu driver not detected " + sysfsDir) + return false + } else if err != nil { + slog.Debug(fmt.Sprintf("error looking up amd driver %s %s", sysfsDir, err)) + return false + } + return true +} + +func setupLink(source, target string) error { + if err := os.RemoveAll(target); err != nil { + return fmt.Errorf("failed to remove old rocm directory %s %w", target, err) + } + if err := os.Symlink(source, target); err != nil { + return fmt.Errorf("failed to create link %s => %s %w", source, target, err) + } + slog.Debug(fmt.Sprintf("host rocm linked %s => %s", source, target)) + return nil +} + +// Ensure the AMD rocm lib dir is wired up +// Prefer to use host installed ROCm, as long as it meets our minimum requirements +// failing that, tell the user how to download it on their own +func AMDValidateLibDir() (string, error) { + // We rely on the rpath compiled into our library to find rocm + // so we establish a symlink to wherever we find it on the system + // to $AssetsDir/rocm + + // If we already have a rocm dependency wired, nothing more to do + assetsDir, err := AssetsDir() + if err != nil { + return "", fmt.Errorf("unable to lookup lib dir: %w", err) + } + // Versioned directory + rocmTargetDir := filepath.Join(assetsDir, "rocm") + if rocmLibUsable(rocmTargetDir) { + return rocmTargetDir, nil + } + // Parent dir (unversioned) + commonRocmDir := filepath.Join(filepath.Dir(assetsDir), "rocm") + if rocmLibUsable(commonRocmDir) { + return rocmTargetDir, setupLink(commonRocmDir, rocmTargetDir) + } + + // Prefer explicit HIP env var + hipPath := os.Getenv("HIP_PATH") + if hipPath != "" { + hipLibDir := filepath.Join(hipPath, "lib") + if rocmLibUsable(hipLibDir) { + slog.Debug("detected ROCM via HIP_PATH=" + hipPath) + return rocmTargetDir, setupLink(hipLibDir, rocmTargetDir) + } + } + + // Scan the library path for potential matches + ldPaths := strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":") + for _, ldPath := range ldPaths { + d, err := filepath.Abs(ldPath) + if err != nil { + continue + } + if rocmLibUsable(d) { + return rocmTargetDir, setupLink(d, rocmTargetDir) + } + } + + // Well known location(s) + if rocmLibUsable("/opt/rocm/lib") { + return rocmTargetDir, setupLink("/opt/rocm/lib", rocmTargetDir) + } + err = os.MkdirAll(rocmTargetDir, 0755) + if err != nil { + return "", fmt.Errorf("failed to create empty rocm dir %s %w", rocmTargetDir, err) + } + + // If we still haven't found a usable rocm, the user will have to download it on their own + slog.Warn("amdgpu detected, but no compatible rocm library found. Either install rocm v6, or run the following") + slog.Warn(fmt.Sprintf(curlMsg, version.Version, rocmTargetDir)) + return "", fmt.Errorf("no suitable rocm found, falling back to CPU") +} + +func AMDDriverVersion() (string, error) { + _, err := os.Stat(DriverVersionFile) + if err != nil { + return "", fmt.Errorf("amdgpu version file missing: %s %w", DriverVersionFile, err) + } + fp, err := os.Open(DriverVersionFile) + if err != nil { + return "", err + } + defer fp.Close() + verString, err := io.ReadAll(fp) + if err != nil { + return "", err + } + return strings.TrimSpace(string(verString)), nil +} + +func AMDGFXVersions() map[int]Version { + res := map[int]Version{} + matches, _ := filepath.Glob(GPUPropertiesFileGlob) + for _, match := range matches { + fp, err := os.Open(match) + if err != nil { + slog.Debug(fmt.Sprintf("failed to open sysfs node file %s: %s", match, err)) + continue + } + defer fp.Close() + i, err := strconv.Atoi(filepath.Base(filepath.Dir(match))) + if err != nil { + slog.Debug(fmt.Sprintf("failed to parse node ID %s", err)) + continue + } + + scanner := bufio.NewScanner(fp) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if strings.HasPrefix(line, "gfx_target_version") { + ver := strings.Fields(line) + if len(ver) != 2 || len(ver[1]) < 5 { + + if ver[1] == "0" { + // Silently skip the CPU + continue + } else { + slog.Debug("malformed " + line) + } + res[i] = Version{ + Major: 0, + Minor: 0, + Patch: 0, + } + continue + } + l := len(ver[1]) + patch, err1 := strconv.ParseUint(ver[1][l-2:l], 10, 32) + minor, err2 := strconv.ParseUint(ver[1][l-4:l-2], 10, 32) + major, err3 := strconv.ParseUint(ver[1][:l-4], 10, 32) + if err1 != nil || err2 != nil || err3 != nil { + slog.Debug("malformed int " + line) + continue + } + + res[i] = Version{ + Major: uint(major), + Minor: uint(minor), + Patch: uint(patch), + } + } + } + } + return res +} + +func (v Version) ToGFXString() string { + return fmt.Sprintf("gfx%d%d%d", v.Major, v.Minor, v.Patch) +} diff --git a/gpu/amd_windows.go b/gpu/amd_windows.go new file mode 100644 index 00000000..5a965482 --- /dev/null +++ b/gpu/amd_windows.go @@ -0,0 +1,190 @@ +package gpu + +import ( + "bytes" + "fmt" + "log/slog" + "os" + "path/filepath" + "slices" + "strings" +) + +const ( + RocmStandardLocation = "C:\\Program Files\\AMD\\ROCm\\5.7\\bin" // TODO glob? + + // TODO We're lookinng for this exact name to detect iGPUs since hipGetDeviceProperties never reports integrated==true + iGPUName = "AMD Radeon(TM) Graphics" +) + +var ( + // Used to validate if the given ROCm lib is usable + ROCmLibGlobs = []string{"hipblas.dll", "rocblas"} // TODO - probably include more coverage of files here... +) + +func AMDGetGPUInfo(resp *GpuInfo) { + hl, err := NewHipLib() + if err != nil { + slog.Debug(err.Error()) + return + } + defer hl.Release() + skip := map[int]interface{}{} + ids := []int{} + resp.memInfo.DeviceCount = 0 + resp.memInfo.TotalMemory = 0 + resp.memInfo.FreeMemory = 0 + + ver, err := hl.AMDDriverVersion() + if err == nil { + slog.Info("AMD Driver: " + ver) + } else { + // For now this is benign, but we may eventually need to fail compatibility checks + slog.Debug(fmt.Sprintf("error looking up amd driver version: %s", err)) + } + + // Note: the HIP library automatically handles HIP_VISIBLE_DEVICES + count := hl.HipGetDeviceCount() + if count == 0 { + return + } + libDir, err := AMDValidateLibDir() + if err != nil { + slog.Warn(fmt.Sprintf("unable to verify rocm library, will use cpu: %s", err)) + return + } + + var supported []string + gfxOverride := os.Getenv("HSA_OVERRIDE_GFX_VERSION") + if gfxOverride == "" { + supported, err = GetSupportedGFX(libDir) + if err != nil { + slog.Warn(fmt.Sprintf("failed to lookup supported GFX types, falling back to CPU mode: %s", err)) + return + } + } else { + slog.Debug("skipping rocm gfx compatibility check with HSA_OVERRIDE_GFX_VERSION=" + gfxOverride) + } + + slog.Info(fmt.Sprintf("detected %d hip devices", count)) + for i := 0; i < count; i++ { + ids = append(ids, i) + err = hl.HipSetDevice(i) + if err != nil { + slog.Warn(fmt.Sprintf("[%d] %s", i, err)) + skip[i] = struct{}{} + continue + } + + props, err := hl.HipGetDeviceProperties(i) + if err != nil { + slog.Warn(fmt.Sprintf("[%d] %s", i, err)) + skip[i] = struct{}{} + continue + } + n := bytes.IndexByte(props.Name[:], 0) + name := string(props.Name[:n]) + slog.Info(fmt.Sprintf("[%d] Name: %s", i, name)) + n = bytes.IndexByte(props.GcnArchName[:], 0) + gfx := string(props.GcnArchName[:n]) + slog.Info(fmt.Sprintf("[%d] GcnArchName: %s", i, gfx)) + //slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0 + // TODO Why isn't props.iGPU accurate!? + if strings.EqualFold(name, iGPUName) { + slog.Info(fmt.Sprintf("iGPU detected [%d] skipping", i)) + skip[i] = struct{}{} + continue + } + if gfxOverride == "" { + if !slices.Contains[[]string, string](supported, gfx) { + slog.Warn(fmt.Sprintf("amdgpu [%d] %s is not supported by %s %v", i, gfx, libDir, supported)) + // TODO - consider discrete markdown just for ROCM troubleshooting? + slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for HSA_OVERRIDE_GFX_VERSION usage") + skip[i] = struct{}{} + continue + } else { + slog.Info(fmt.Sprintf("amdgpu [%d] %s is supported", i, gfx)) + } + } + + totalMemory, freeMemory, err := hl.HipMemGetInfo() + if err != nil { + slog.Warn(fmt.Sprintf("[%d] %s", i, err)) + continue + } + + // TODO according to docs, freeMem may lie on windows! + slog.Info(fmt.Sprintf("[%d] Total Mem: %d", i, totalMemory)) + slog.Info(fmt.Sprintf("[%d] Free Mem: %d", i, freeMemory)) + resp.memInfo.DeviceCount++ + resp.memInfo.TotalMemory += totalMemory + resp.memInfo.FreeMemory += freeMemory + } + if resp.memInfo.DeviceCount > 0 { + resp.Library = "rocm" + } + // Abort if all GPUs are skipped + if len(skip) >= count { + slog.Info("all detected amdgpus are skipped, falling back to CPU") + return + } + if len(skip) > 0 { + amdSetVisibleDevices(ids, skip) + } + UpdatePath(libDir) +} + +func AMDValidateLibDir() (string, error) { + // On windows non-admins typically can't create links + // so instead of trying to rely on rpath and a link in + // $LibDir/rocm, we instead rely on setting PATH to point + // to the location of the ROCm library + + // Installer payload location + exe, err := os.Executable() + if err == nil { + rocmTargetDir := filepath.Join(filepath.Dir(exe), "rocm") + if rocmLibUsable(rocmTargetDir) { + slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir) + return rocmTargetDir, nil + } + } + + // If we already have a rocm dependency wired, nothing more to do + libDir, err := AssetsDir() + if err != nil { + return "", fmt.Errorf("unable to lookup lib dir: %w", err) + } + rocmTargetDir := filepath.Join(libDir, "rocm") + if rocmLibUsable(rocmTargetDir) { + return rocmTargetDir, nil + } + + // Prefer explicit HIP env var + hipPath := os.Getenv("HIP_PATH") + if hipPath != "" { + hipLibDir := filepath.Join(hipPath, "bin") + if rocmLibUsable(hipLibDir) { + slog.Debug("detected ROCM via HIP_PATH=" + hipPath) + return hipLibDir, nil + } + } + + // Well known location(s) + if rocmLibUsable(RocmStandardLocation) { + return RocmStandardLocation, nil + } + + // Installer payload (if we're running from some other location) + localAppData := os.Getenv("LOCALAPPDATA") + appDir := filepath.Join(localAppData, "Programs", "Ollama") + rocmTargetDir = filepath.Join(appDir, "rocm") + if rocmLibUsable(rocmTargetDir) { + slog.Debug("detected ollama installed ROCm at " + rocmTargetDir) + return rocmTargetDir, nil + } + + // Should not happen on windows since we include it in the installer, but stand-alone binary might hit this + slog.Warn("amdgpu detected, but no compatible rocm library found. Please install ROCm v6") + return "", fmt.Errorf("no suitable rocm found, falling back to CPU") +} diff --git a/gpu/assets.go b/gpu/assets.go new file mode 100644 index 00000000..41d0046a --- /dev/null +++ b/gpu/assets.go @@ -0,0 +1,60 @@ +package gpu + +import ( + "fmt" + "log/slog" + "os" + "path/filepath" + "runtime" + "strings" + + "github.com/jmorganca/ollama/version" +) + +func AssetsDir() (string, error) { + home, err := os.UserHomeDir() + if err != nil { + return "", err + } + baseDir := filepath.Join(home, ".ollama", "assets") + libDirs, err := os.ReadDir(baseDir) + if err == nil { + for _, d := range libDirs { + if d.Name() == version.Version { + continue + } + // Special case the rocm dependencies, which are handled by the installer + if d.Name() == "rocm" { + continue + } + slog.Debug("stale lib detected, cleaning up " + d.Name()) + err = os.RemoveAll(filepath.Join(baseDir, d.Name())) + if err != nil { + slog.Warn(fmt.Sprintf("unable to clean up stale library %s: %s", filepath.Join(baseDir, d.Name()), err)) + } + } + } + return filepath.Join(baseDir, version.Version), nil +} + +func UpdatePath(dir string) { + if runtime.GOOS == "windows" { + tmpDir := filepath.Dir(dir) + pathComponents := strings.Split(os.Getenv("PATH"), ";") + i := 0 + for _, comp := range pathComponents { + if strings.EqualFold(comp, dir) { + return + } + // Remove any other prior paths to our temp dir + if !strings.HasPrefix(strings.ToLower(comp), strings.ToLower(tmpDir)) { + pathComponents[i] = comp + i++ + } + } + newPath := strings.Join(append([]string{dir}, pathComponents...), ";") + slog.Info(fmt.Sprintf("Updating PATH to %s", newPath)) + os.Setenv("PATH", newPath) + } + // linux and darwin rely on rpath +} diff --git a/gpu/gpu.go b/gpu/gpu.go index 8c7f1297..e0c18e26 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -24,7 +24,6 @@ import ( type handles struct { cuda *C.cuda_handle_t - rocm *C.rocm_handle_t } var gpuMutex sync.Mutex @@ -54,39 +53,23 @@ var CudaWindowsGlobs = []string{ "c:\\Windows\\System32\\nvml.dll", } -var RocmLinuxGlobs = []string{ - "/opt/rocm*/lib*/librocm_smi64.so*", -} - -var RocmWindowsGlobs = []string{ - "c:\\Windows\\System32\\rocm_smi64.dll", -} - // Note: gpuMutex must already be held func initGPUHandles() { // TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing - gpuHandles = &handles{nil, nil} + gpuHandles = &handles{nil} var cudaMgmtName string var cudaMgmtPatterns []string - var rocmMgmtName string - var rocmMgmtPatterns []string switch runtime.GOOS { case "windows": cudaMgmtName = "nvml.dll" cudaMgmtPatterns = make([]string, len(CudaWindowsGlobs)) copy(cudaMgmtPatterns, CudaWindowsGlobs) - rocmMgmtName = "rocm_smi64.dll" - rocmMgmtPatterns = make([]string, len(RocmWindowsGlobs)) - copy(rocmMgmtPatterns, RocmWindowsGlobs) case "linux": cudaMgmtName = "libnvidia-ml.so" cudaMgmtPatterns = make([]string, len(CudaLinuxGlobs)) copy(cudaMgmtPatterns, CudaLinuxGlobs) - rocmMgmtName = "librocm_smi64.so" - rocmMgmtPatterns = make([]string, len(RocmLinuxGlobs)) - copy(rocmMgmtPatterns, RocmLinuxGlobs) default: return } @@ -101,16 +84,6 @@ func initGPUHandles() { return } } - - rocmLibPaths := FindGPULibs(rocmMgmtName, rocmMgmtPatterns) - if len(rocmLibPaths) > 0 { - rocm := LoadROCMMgmt(rocmLibPaths) - if rocm != nil { - slog.Info("Radeon GPU detected") - gpuHandles.rocm = rocm - return - } - } } func GetGPUInfo() GpuInfo { @@ -149,66 +122,10 @@ func GetGPUInfo() GpuInfo { slog.Info(fmt.Sprintf("CUDA GPU is too old. Falling back to CPU mode. Compute Capability detected: %d.%d", cc.major, cc.minor)) } } - } else if AMDDetected() && gpuHandles.rocm != nil && (cpuVariant != "" || runtime.GOARCH != "amd64") { - ver, err := AMDDriverVersion() - if err == nil { - slog.Info("AMD Driver: " + ver) - } else { - // For now this is benign, but we may eventually need to fail compatibility checks - slog.Debug("error looking up amd driver version: %s", err) - } - gfx := AMDGFXVersions() - tooOld := false - for _, v := range gfx { - if v.Major < 9 { - slog.Info("AMD GPU too old, falling back to CPU " + v.ToGFXString()) - tooOld = true - break - } - - // TODO - remap gfx strings for unsupporetd minor/patch versions to supported for the same major - // e.g. gfx1034 works if we map it to gfx1030 at runtime - - } - if !tooOld { - // TODO - this algo can be shifted over to use sysfs instead of the rocm info library... - C.rocm_check_vram(*gpuHandles.rocm, &memInfo) - if memInfo.err != nil { - slog.Info(fmt.Sprintf("error looking up ROCm GPU memory: %s", C.GoString(memInfo.err))) - C.free(unsafe.Pointer(memInfo.err)) - } else if memInfo.igpu_index >= 0 && memInfo.count == 1 { - // Only one GPU detected and it appears to be an integrated GPU - skip it - slog.Info("ROCm unsupported integrated GPU detected") - } else if memInfo.count > 0 { - if memInfo.igpu_index >= 0 { - // We have multiple GPUs reported, and one of them is an integrated GPU - // so we have to set the env var to bypass it - // If the user has specified their own ROCR_VISIBLE_DEVICES, don't clobber it - val := os.Getenv("ROCR_VISIBLE_DEVICES") - if val == "" { - devices := []string{} - for i := 0; i < int(memInfo.count); i++ { - if i == int(memInfo.igpu_index) { - continue - } - devices = append(devices, strconv.Itoa(i)) - } - val = strings.Join(devices, ",") - os.Setenv("ROCR_VISIBLE_DEVICES", val) - } - slog.Info(fmt.Sprintf("ROCm integrated GPU detected - ROCR_VISIBLE_DEVICES=%s", val)) - } - resp.Library = "rocm" - var version C.rocm_version_resp_t - C.rocm_get_version(*gpuHandles.rocm, &version) - verString := C.GoString(version.str) - if version.status == 0 { - resp.Variant = "v" + verString - } else { - slog.Info(fmt.Sprintf("failed to look up ROCm version: %s", verString)) - } - C.free(unsafe.Pointer(version.str)) - } + } else { + AMDGetGPUInfo(&resp) + if resp.Library != "" { + return resp } } if resp.Library == "" { @@ -338,23 +255,6 @@ func LoadCUDAMgmt(cudaLibPaths []string) *C.cuda_handle_t { return nil } -func LoadROCMMgmt(rocmLibPaths []string) *C.rocm_handle_t { - var resp C.rocm_init_resp_t - resp.rh.verbose = getVerboseState() - for _, libPath := range rocmLibPaths { - lib := C.CString(libPath) - defer C.free(unsafe.Pointer(lib)) - C.rocm_init(lib, &resp) - if resp.err != nil { - slog.Info(fmt.Sprintf("Unable to load ROCm management library %s: %s", libPath, C.GoString(resp.err))) - C.free(unsafe.Pointer(resp.err)) - } else { - return &resp.rh - } - } - return nil -} - func getVerboseState() C.uint16_t { if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" { return C.uint16_t(1) diff --git a/gpu/gpu_info.h b/gpu/gpu_info.h index e52d2066..8186a3f0 100644 --- a/gpu/gpu_info.h +++ b/gpu/gpu_info.h @@ -53,7 +53,6 @@ void cpu_check_ram(mem_info_t *resp); #endif #include "gpu_info_cuda.h" -#include "gpu_info_rocm.h" #endif // __GPU_INFO_H__ #endif // __APPLE__ \ No newline at end of file diff --git a/gpu/gpu_info_cuda.c b/gpu/gpu_info_cuda.c index a64b4587..509bf5c6 100644 --- a/gpu/gpu_info_cuda.c +++ b/gpu/gpu_info_cuda.c @@ -124,31 +124,31 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) { // When in verbose mode, report more information about // the card we discover, but don't fail on error ret = (*h.nvmlDeviceGetName)(device, buf, buflen); - if (ret != RSMI_STATUS_SUCCESS) { + if (ret != NVML_SUCCESS) { LOG(h.verbose, "nvmlDeviceGetName failed: %d\n", ret); } else { LOG(h.verbose, "[%d] CUDA device name: %s\n", i, buf); } ret = (*h.nvmlDeviceGetBoardPartNumber)(device, buf, buflen); - if (ret != RSMI_STATUS_SUCCESS) { + if (ret != NVML_SUCCESS) { LOG(h.verbose, "nvmlDeviceGetBoardPartNumber failed: %d\n", ret); } else { LOG(h.verbose, "[%d] CUDA part number: %s\n", i, buf); } ret = (*h.nvmlDeviceGetSerial)(device, buf, buflen); - if (ret != RSMI_STATUS_SUCCESS) { + if (ret != NVML_SUCCESS) { LOG(h.verbose, "nvmlDeviceGetSerial failed: %d\n", ret); } else { LOG(h.verbose, "[%d] CUDA S/N: %s\n", i, buf); } ret = (*h.nvmlDeviceGetVbiosVersion)(device, buf, buflen); - if (ret != RSMI_STATUS_SUCCESS) { + if (ret != NVML_SUCCESS) { LOG(h.verbose, "nvmlDeviceGetVbiosVersion failed: %d\n", ret); } else { LOG(h.verbose, "[%d] CUDA vbios version: %s\n", i, buf); } ret = (*h.nvmlDeviceGetBrand)(device, &brand); - if (ret != RSMI_STATUS_SUCCESS) { + if (ret != NVML_SUCCESS) { LOG(h.verbose, "nvmlDeviceGetBrand failed: %d\n", ret); } else { LOG(h.verbose, "[%d] CUDA brand: %d\n", i, brand); diff --git a/gpu/gpu_info_rocm.c b/gpu/gpu_info_rocm.c deleted file mode 100644 index 7ac88611..00000000 --- a/gpu/gpu_info_rocm.c +++ /dev/null @@ -1,198 +0,0 @@ -#ifndef __APPLE__ - -#include "gpu_info_rocm.h" - -#include - -void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp) { - rsmi_status_t ret; - resp->err = NULL; - const int buflen = 256; - char buf[buflen + 1]; - int i; - struct lookup { - char *s; - void **p; - } l[] = { - {"rsmi_init", (void *)&resp->rh.rsmi_init}, - {"rsmi_shut_down", (void *)&resp->rh.rsmi_shut_down}, - {"rsmi_dev_memory_total_get", (void *)&resp->rh.rsmi_dev_memory_total_get}, - {"rsmi_dev_memory_usage_get", (void *)&resp->rh.rsmi_dev_memory_usage_get}, - {"rsmi_version_get", (void *)&resp->rh.rsmi_version_get}, - {"rsmi_num_monitor_devices", (void*)&resp->rh.rsmi_num_monitor_devices}, - {"rsmi_dev_id_get", (void*)&resp->rh.rsmi_dev_id_get}, - {"rsmi_dev_name_get", (void *)&resp->rh.rsmi_dev_name_get}, - {"rsmi_dev_brand_get", (void *)&resp->rh.rsmi_dev_brand_get}, - {"rsmi_dev_vendor_name_get", (void *)&resp->rh.rsmi_dev_vendor_name_get}, - {"rsmi_dev_vram_vendor_get", (void *)&resp->rh.rsmi_dev_vram_vendor_get}, - {"rsmi_dev_serial_number_get", (void *)&resp->rh.rsmi_dev_serial_number_get}, - {"rsmi_dev_subsystem_name_get", (void *)&resp->rh.rsmi_dev_subsystem_name_get}, - {"rsmi_dev_vbios_version_get", (void *)&resp->rh.rsmi_dev_vbios_version_get}, - {NULL, NULL}, - }; - - resp->rh.handle = LOAD_LIBRARY(rocm_lib_path, RTLD_LAZY); - if (!resp->rh.handle) { - char *msg = LOAD_ERR(); - snprintf(buf, buflen, - "Unable to load %s library to query for Radeon GPUs: %s\n", - rocm_lib_path, msg); - free(msg); - resp->err = strdup(buf); - return; - } - - // TODO once we've squashed the remaining corner cases remove this log - LOG(resp->rh.verbose, "wiring rocm management library functions in %s\n", rocm_lib_path); - - for (i = 0; l[i].s != NULL; i++) { - // TODO once we've squashed the remaining corner cases remove this log - LOG(resp->rh.verbose, "dlsym: %s\n", l[i].s); - - *l[i].p = LOAD_SYMBOL(resp->rh.handle, l[i].s); - if (!l[i].p) { - resp->rh.handle = NULL; - char *msg = LOAD_ERR(); - LOG(resp->rh.verbose, "dlerr: %s\n", msg); - UNLOAD_LIBRARY(resp->rh.handle); - snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s, - msg); - free(msg); - resp->err = strdup(buf); - return; - } - } - - ret = (*resp->rh.rsmi_init)(0); - if (ret != RSMI_STATUS_SUCCESS) { - LOG(resp->rh.verbose, "rsmi_init err: %d\n", ret); - UNLOAD_LIBRARY(resp->rh.handle); - resp->rh.handle = NULL; - snprintf(buf, buflen, "rocm vram init failure: %d", ret); - resp->err = strdup(buf); - } - - return; -} - -void rocm_check_vram(rocm_handle_t h, mem_info_t *resp) { - resp->err = NULL; - resp->igpu_index = -1; - uint64_t totalMem = 0; - uint64_t usedMem = 0; - rsmi_status_t ret; - const int buflen = 256; - char buf[buflen + 1]; - int i; - - if (h.handle == NULL) { - resp->err = strdup("rocm handle not initialized"); - return; - } - - ret = (*h.rsmi_num_monitor_devices)(&resp->count); - if (ret != RSMI_STATUS_SUCCESS) { - snprintf(buf, buflen, "unable to get device count: %d", ret); - resp->err = strdup(buf); - return; - } - LOG(h.verbose, "discovered %d ROCm GPU Devices\n", resp->count); - - resp->total = 0; - resp->free = 0; - for (i = 0; i < resp->count; i++) { - if (h.verbose) { - // When in verbose mode, report more information about - // the card we discover, but don't fail on error - ret = (*h.rsmi_dev_name_get)(i, buf, buflen); - if (ret != RSMI_STATUS_SUCCESS) { - LOG(h.verbose, "rsmi_dev_name_get failed: %d\n", ret); - } else { - LOG(h.verbose, "[%d] ROCm device name: %s\n", i, buf); - } - ret = (*h.rsmi_dev_brand_get)(i, buf, buflen); - if (ret != RSMI_STATUS_SUCCESS) { - LOG(h.verbose, "rsmi_dev_brand_get failed: %d\n", ret); - } else { - LOG(h.verbose, "[%d] ROCm brand: %s\n", i, buf); - } - ret = (*h.rsmi_dev_vendor_name_get)(i, buf, buflen); - if (ret != RSMI_STATUS_SUCCESS) { - LOG(h.verbose, "rsmi_dev_vendor_name_get failed: %d\n", ret); - } else { - LOG(h.verbose, "[%d] ROCm vendor: %s\n", i, buf); - } - ret = (*h.rsmi_dev_vram_vendor_get)(i, buf, buflen); - if (ret != RSMI_STATUS_SUCCESS) { - LOG(h.verbose, "rsmi_dev_vram_vendor_get failed: %d\n", ret); - } else { - LOG(h.verbose, "[%d] ROCm VRAM vendor: %s\n", i, buf); - } - ret = (*h.rsmi_dev_serial_number_get)(i, buf, buflen); - if (ret != RSMI_STATUS_SUCCESS) { - LOG(h.verbose, "rsmi_dev_serial_number_get failed: %d\n", ret); - } else { - LOG(h.verbose, "[%d] ROCm S/N: %s\n", i, buf); - } - ret = (*h.rsmi_dev_subsystem_name_get)(i, buf, buflen); - if (ret != RSMI_STATUS_SUCCESS) { - LOG(h.verbose, "rsmi_dev_subsystem_name_get failed: %d\n", ret); - } else { - LOG(h.verbose, "[%d] ROCm subsystem name: %s\n", i, buf); - } - ret = (*h.rsmi_dev_vbios_version_get)(i, buf, buflen); - if (ret != RSMI_STATUS_SUCCESS) { - LOG(h.verbose, "rsmi_dev_vbios_version_get failed: %d\n", ret); - } else { - LOG(h.verbose, "[%d] ROCm vbios version: %s\n", i, buf); - } - } - - // Get total memory - used memory for available memory - ret = (*h.rsmi_dev_memory_total_get)(i, RSMI_MEM_TYPE_VRAM, &totalMem); - if (ret != RSMI_STATUS_SUCCESS) { - snprintf(buf, buflen, "rocm total mem lookup failure: %d", ret); - resp->err = strdup(buf); - return; - } - ret = (*h.rsmi_dev_memory_usage_get)(i, RSMI_MEM_TYPE_VRAM, &usedMem); - if (ret != RSMI_STATUS_SUCCESS) { - snprintf(buf, buflen, "rocm usage mem lookup failure: %d", ret); - resp->err = strdup(buf); - return; - } - LOG(h.verbose, "[%d] ROCm totalMem %ld\n", i, totalMem); - LOG(h.verbose, "[%d] ROCm usedMem %ld\n", i, usedMem); - if (totalMem < 1024 * 1024 * 1024) { - // Do not add up integrated GPU memory capacity, it's a bogus 512M, and actually uses system memory - LOG(h.verbose, "[%d] ROCm integrated GPU\n", i); - resp->igpu_index = i; - } else { - resp->total += totalMem; - resp->free += totalMem - usedMem; - } - } -} - -void rocm_get_version(rocm_handle_t h, rocm_version_resp_t *resp) { - const int buflen = 256; - char buf[buflen + 1]; - if (h.handle == NULL) { - resp->str = strdup("rocm handle not initialized"); - resp->status = 1; - return; - } - rsmi_version_t ver; - rsmi_status_t ret; - ret = h.rsmi_version_get(&ver); - if (ret != RSMI_STATUS_SUCCESS) { - snprintf(buf, buflen, "unexpected response on version lookup %d", ret); - resp->status = 1; - } else { - snprintf(buf, buflen, "%d", ver.major); - resp->status = 0; - } - resp->str = strdup(buf); -} - -#endif // __APPLE__ diff --git a/gpu/gpu_info_rocm.h b/gpu/gpu_info_rocm.h deleted file mode 100644 index 0a8d50c0..00000000 --- a/gpu/gpu_info_rocm.h +++ /dev/null @@ -1,59 +0,0 @@ -#ifndef __APPLE__ -#ifndef __GPU_INFO_ROCM_H__ -#define __GPU_INFO_ROCM_H__ -#include "gpu_info.h" - -// Just enough typedef's to dlopen/dlsym for memory information -typedef enum rsmi_status_return { - RSMI_STATUS_SUCCESS = 0, - // Other values omitted for now... -} rsmi_status_t; - -typedef enum rsmi_memory_type { - RSMI_MEM_TYPE_VRAM = 0, - RSMI_MEM_TYPE_VIS_VRAM, - RSMI_MEM_TYPE_GTT, -} rsmi_memory_type_t; - - typedef struct { - uint32_t major; - uint32_t minor; - uint32_t patch; - const char *build; - } rsmi_version_t; - -typedef struct rocm_handle { - void *handle; - uint16_t verbose; - rsmi_status_t (*rsmi_init)(uint64_t); - rsmi_status_t (*rsmi_shut_down)(void); - rsmi_status_t (*rsmi_dev_memory_total_get)(uint32_t, rsmi_memory_type_t, uint64_t *); - rsmi_status_t (*rsmi_dev_memory_usage_get)(uint32_t, rsmi_memory_type_t, uint64_t *); - rsmi_status_t (*rsmi_version_get) (rsmi_version_t *version); - rsmi_status_t (*rsmi_num_monitor_devices) (uint32_t *); - rsmi_status_t (*rsmi_dev_id_get)(uint32_t, uint16_t *); - rsmi_status_t (*rsmi_dev_name_get) (uint32_t,char *,size_t); - rsmi_status_t (*rsmi_dev_brand_get) (uint32_t, char *, uint32_t); - rsmi_status_t (*rsmi_dev_vendor_name_get) (uint32_t, char *, uint32_t); - rsmi_status_t (*rsmi_dev_vram_vendor_get) (uint32_t, char *, uint32_t); - rsmi_status_t (*rsmi_dev_serial_number_get) (uint32_t, char *, uint32_t); - rsmi_status_t (*rsmi_dev_subsystem_name_get) (uint32_t, char *, uint32_t); - rsmi_status_t (*rsmi_dev_vbios_version_get) (uint32_t, char *, uint32_t); -} rocm_handle_t; - -typedef struct rocm_init_resp { - char *err; // If err is non-null handle is invalid - rocm_handle_t rh; -} rocm_init_resp_t; - -typedef struct rocm_version_resp { - rsmi_status_t status; - char *str; // Contains version or error string if status != 0 -} rocm_version_resp_t; - -void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp); -void rocm_check_vram(rocm_handle_t rh, mem_info_t *resp); -void rocm_get_version(rocm_handle_t rh, rocm_version_resp_t *resp); - -#endif // __GPU_INFO_ROCM_H__ -#endif // __APPLE__ \ No newline at end of file diff --git a/llm/dyn_ext_server.c b/llm/dyn_ext_server.c index 47dc4e99..dab49f85 100644 --- a/llm/dyn_ext_server.c +++ b/llm/dyn_ext_server.c @@ -14,17 +14,14 @@ #define LOAD_LIBRARY(lib, flags) LoadLibrary(lib) #define LOAD_SYMBOL(handle, sym) GetProcAddress(handle, sym) #define UNLOAD_LIBRARY(handle) FreeLibrary(handle) -inline char *LOAD_ERR() { - LPSTR messageBuffer = NULL; - size_t size = FormatMessageA( - FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | - FORMAT_MESSAGE_IGNORE_INSERTS, - NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), - (LPSTR)&messageBuffer, 0, NULL); - char *resp = strdup(messageBuffer); - LocalFree(messageBuffer); - return resp; -} +#define LOAD_ERR() ({\ + LPSTR messageBuffer = NULL; \ + size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, \ + NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL); \ + char *resp = strdup(messageBuffer); \ + LocalFree(messageBuffer); \ + resp; \ +}) #else #include #define LOAD_LIBRARY(lib, flags) dlopen(lib, flags) diff --git a/llm/dyn_ext_server.go b/llm/dyn_ext_server.go index 8d7ebf9e..fa5a3477 100644 --- a/llm/dyn_ext_server.go +++ b/llm/dyn_ext_server.go @@ -28,13 +28,13 @@ import ( "log/slog" "os" "path/filepath" - "runtime" "strings" "sync" "time" "unsafe" "github.com/jmorganca/ollama/api" + "github.com/jmorganca/ollama/gpu" ) type dynExtServer struct { @@ -72,7 +72,7 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts slog.Info("concurrent llm servers not yet supported, waiting for prior server to complete") mutex.Lock() } - updatePath(filepath.Dir(library)) + gpu.UpdatePath(filepath.Dir(library)) libPath := C.CString(library) defer C.free(unsafe.Pointer(libPath)) resp := newExtServerResp(512) @@ -148,6 +148,7 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts } slog.Info("Initializing llama server") + slog.Debug(fmt.Sprintf("server params: %+v", sparams)) initResp := newExtServerResp(128) defer freeExtServerResp(initResp) C.dyn_llama_server_init(llm.s, &sparams, &initResp) @@ -365,25 +366,3 @@ func (llm *dynExtServer) Close() { C.dyn_llama_server_stop(llm.s) mutex.Unlock() } - -func updatePath(dir string) { - if runtime.GOOS == "windows" { - tmpDir := filepath.Dir(dir) - pathComponents := strings.Split(os.Getenv("PATH"), ";") - i := 0 - for _, comp := range pathComponents { - if strings.EqualFold(comp, dir) { - return - } - // Remove any other prior paths to our temp dir - if !strings.HasPrefix(strings.ToLower(comp), strings.ToLower(tmpDir)) { - pathComponents[i] = comp - i++ - } - } - newPath := strings.Join(append([]string{dir}, pathComponents...), ";") - slog.Info(fmt.Sprintf("Updating PATH to %s", newPath)) - os.Setenv("PATH", newPath) - } - // linux and darwin rely on rpath -} diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh index e6a7d077..0b8cb344 100755 --- a/llm/generate/gen_linux.sh +++ b/llm/generate/gen_linux.sh @@ -179,17 +179,21 @@ fi if [ -d "${ROCM_PATH}" ]; then echo "ROCm libraries detected - building dynamic ROCm library" - if [ -f ${ROCM_PATH}/lib/librocm_smi64.so.? ]; then - ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocm_smi64.so.? | cut -f3 -d. || true) + if [ -f ${ROCM_PATH}/lib/librocblas.so.*.*.????? ]; then + ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true) fi init_vars CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)" BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/rocm${ROCM_VARIANT}" - EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,${ROCM_PATH}/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu" + EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,\$ORIGIN/../rocm/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu" build - # Note: the ROCM libs and runtime library files are too large to embed, so we depend on - # them being present at runtime on the host + # Record the ROCM dependencies + rm -f "${BUILD_DIR}/lib/deps.txt" + touch "${BUILD_DIR}/lib/deps.txt" + for dep in $(ldd "${BUILD_DIR}/lib/libext_server.so" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo ); do + echo "${dep}" >> "${BUILD_DIR}/lib/deps.txt" + done compress_libs fi diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1 index e0313420..579b2bca 100644 --- a/llm/generate/gen_windows.ps1 +++ b/llm/generate/gen_windows.ps1 @@ -2,19 +2,52 @@ $ErrorActionPreference = "Stop" +function amdGPUs { + if ($env:AMDGPU_TARGETS) { + return $env:AMDGPU_TARGETS + } + # TODO - load from some common data file for linux + windows build consistency + $GPU_LIST = @( + "gfx900" + "gfx906:xnack-" + "gfx908:xnack-" + "gfx90a:xnack+" + "gfx90a:xnack-" + "gfx1010" + "gfx1012" + "gfx1030" + "gfx1100" + "gfx1101" + "gfx1102" + ) + $GPU_LIST -join ';' +} + function init_vars { + # Verify the environment is a Developer Shell for MSVC 2019 + write-host $env:VSINSTALLDIR + if (($env:VSINSTALLDIR -eq $null)) { + Write-Error "`r`nBUILD ERROR - YOUR DEVELOPMENT ENVIRONMENT IS NOT SET UP CORRECTLY`r`nTo build Ollama you must run from an MSVC Developer Shell`r`nSee .\docs\development.md for instructions to set up your dev environment" + exit 1 + } $script:SRC_DIR = $(resolve-path "..\..\") $script:llamacppDir = "../llama.cpp" - $script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-A", "x64") + $script:cmakeDefs = @( + "-DBUILD_SHARED_LIBS=on", + "-DLLAMA_NATIVE=off" + ) $script:cmakeTargets = @("ext_server") $script:ARCH = "amd64" # arm not yet supported. if ($env:CGO_CFLAGS -contains "-g") { - $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on") + $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo") $script:config = "RelWithDebInfo" } else { - $script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off") + $script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off", "-DCMAKE_BUILD_TYPE=Release") $script:config = "Release" } + if ($null -ne $env:CMAKE_SYSTEM_VERSION) { + $script:cmakeDefs += @("-DCMAKE_SYSTEM_VERSION=${env:CMAKE_SYSTEM_VERSION}") + } # Try to find the CUDA dir if ($env:CUDA_LIB_DIR -eq $null) { $d=(get-command -ea 'silentlycontinue' nvcc).path @@ -157,7 +190,7 @@ apply_patches $script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on") init_vars -$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs +$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu" write-host "Building LCD CPU" build @@ -166,7 +199,7 @@ sign compress_libs init_vars -$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs +$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx" write-host "Building AVX CPU" build @@ -175,7 +208,7 @@ sign compress_libs init_vars -$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs +$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx2" write-host "Building AVX2 CPU" build @@ -192,18 +225,51 @@ if ($null -ne $script:CUDA_LIB_DIR) { } init_vars $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT" - $script:cmakeDefs += @("-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}") + $script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}") + write-host "Building CUDA" build install sign compress_libs } -# TODO - actually implement ROCm support on windows -$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/rocm" -rm -ea 0 -recurse -force -path "${script:buildDir}/lib" -md "${script:buildDir}/lib" -ea 0 > $null -echo $null >> "${script:buildDir}/lib/.generated" +if ($null -ne $env:HIP_PATH) { + $script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename + if ($null -ne $script:ROCM_VERSION) { + $script:ROCM_VARIANT="_v"+$script:ROCM_VERSION + } + + init_vars + $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT" + $script:cmakeDefs += @( + "-G", "Ninja", + "-DCMAKE_C_COMPILER=clang.exe", + "-DCMAKE_CXX_COMPILER=clang++.exe", + "-DLLAMA_HIPBLAS=on", + "-DLLAMA_AVX=on", + "-DLLAMA_AVX2=off", + "-DCMAKE_POSITION_INDEPENDENT_CODE=on", + "-DAMDGPU_TARGETS=$(amdGPUs)", + "-DGPU_TARGETS=$(amdGPUs)" + ) + + # Make sure the ROCm binary dir is first in the path + $env:PATH="$env:HIP_PATH\bin;$env:VSINSTALLDIR\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja;$env:PATH" + + # We have to clobber the LIB var from the developer shell for clang to work properly + $env:LIB="" + + write-host "Building ROCm" + build + # Ninja doesn't prefix with config name + ${script:config}="" + install + if ($null -ne $script:DUMPBIN) { + & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/${script:config}/ext_server.dll" | select-string ".dll" + } + sign + compress_libs +} cleanup write-host "`ngo generate completed" diff --git a/llm/llm.go b/llm/llm.go index 81bab122..b0ac0f60 100644 --- a/llm/llm.go +++ b/llm/llm.go @@ -19,7 +19,7 @@ type LLM interface { Close() } -func New(workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) { +func New(model string, adapters, projectors []string, opts api.Options) (LLM, error) { if _, err := os.Stat(model); err != nil { return nil, err } @@ -120,15 +120,15 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options) opts.RopeFrequencyBase = 0.0 opts.RopeFrequencyScale = 0.0 - return newLlmServer(info, workDir, model, adapters, projectors, opts) + return newLlmServer(info, model, adapters, projectors, opts) } // Give any native cgo implementations an opportunity to initialize -func Init(workdir string) error { - return nativeInit(workdir) +func Init() error { + return nativeInit() } -func newLlmServer(gpuInfo gpu.GpuInfo, workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) { +func newLlmServer(gpuInfo gpu.GpuInfo, model string, adapters, projectors []string, opts api.Options) (LLM, error) { dynLibs := getDynLibs(gpuInfo) // Check to see if the user has requested a specific library instead of auto-detecting @@ -147,7 +147,7 @@ func newLlmServer(gpuInfo gpu.GpuInfo, workDir, model string, adapters, projecto _, err := os.Stat(dynLibs[0]) if err != nil { slog.Info(fmt.Sprintf("%s has disappeared, reloading libraries", dynLibs[0])) - err = nativeInit(workDir) + err = nativeInit() if err != nil { return nil, err } diff --git a/llm/payload_common.go b/llm/payload_common.go index 3958b9f5..ff38b63f 100644 --- a/llm/payload_common.go +++ b/llm/payload_common.go @@ -103,10 +103,14 @@ func rocmDynLibPresent() bool { return false } -func nativeInit(workdir string) error { +func nativeInit() error { slog.Info("Extracting dynamic libraries...") + assetsDir, err := gpu.AssetsDir() + if err != nil { + return err + } if runtime.GOOS == "darwin" { - err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal") + err := extractPayloadFiles(assetsDir, "llama.cpp/ggml-metal.metal") if err != nil { if err == payloadMissing { // TODO perhaps consider this a hard failure on arm macs? @@ -115,10 +119,10 @@ func nativeInit(workdir string) error { } return err } - os.Setenv("GGML_METAL_PATH_RESOURCES", workdir) + os.Setenv("GGML_METAL_PATH_RESOURCES", assetsDir) } - libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/*/lib/*") + libs, err := extractDynamicLibs(assetsDir, "llama.cpp/build/*/*/*/lib/*") if err != nil { if err == payloadMissing { slog.Info(fmt.Sprintf("%s", payloadMissing)) @@ -149,17 +153,13 @@ func nativeInit(workdir string) error { return nil } -func extractDynamicLibs(workDir, glob string) ([]string, error) { +func extractDynamicLibs(assetsDir, glob string) ([]string, error) { files, err := fs.Glob(libEmbed, glob) if err != nil || len(files) == 0 { return nil, payloadMissing } libs := []string{} - // TODO consider making this idempotent with some sort of persistent directory (where we store models probably) - // and tracking by version so we don't reexpand the files every time - // Also maybe consider lazy loading only what is needed - g := new(errgroup.Group) for _, file := range files { pathComps := strings.Split(file, "/") @@ -172,14 +172,14 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) { g.Go(func() error { // llama.cpp/build/$OS/$GOARCH/$VARIANT/lib/$LIBRARY // Include the variant in the path to avoid conflicts between multiple server libs - targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3]) + targetDir := filepath.Join(assetsDir, pathComps[pathComponentCount-3]) srcFile, err := libEmbed.Open(file) if err != nil { return fmt.Errorf("read payload %s: %v", file, err) } defer srcFile.Close() if err := os.MkdirAll(targetDir, 0o755); err != nil { - return fmt.Errorf("create payload temp dir %s: %v", workDir, err) + return fmt.Errorf("create payload lib dir %s: %v", assetsDir, err) } src := io.Reader(srcFile) filename := file @@ -196,19 +196,13 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) { libs = append(libs, destFile) } - _, err = os.Stat(destFile) - switch { - case errors.Is(err, os.ErrNotExist): - destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) - if err != nil { - return fmt.Errorf("write payload %s: %v", file, err) - } - defer destFile.Close() - if _, err := io.Copy(destFile, src); err != nil { - return fmt.Errorf("copy payload %s: %v", file, err) - } - case err != nil: - return fmt.Errorf("stat payload %s: %v", file, err) + destFp, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) + if err != nil { + return fmt.Errorf("write payload %s: %v", file, err) + } + defer destFp.Close() + if _, err := io.Copy(destFp, src); err != nil { + return fmt.Errorf("copy payload %s: %v", file, err) } return nil }) @@ -216,7 +210,7 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) { return libs, g.Wait() } -func extractPayloadFiles(workDir, glob string) error { +func extractPayloadFiles(assetsDir, glob string) error { files, err := fs.Glob(libEmbed, glob) if err != nil || len(files) == 0 { return payloadMissing @@ -228,8 +222,8 @@ func extractPayloadFiles(workDir, glob string) error { return fmt.Errorf("read payload %s: %v", file, err) } defer srcFile.Close() - if err := os.MkdirAll(workDir, 0o755); err != nil { - return fmt.Errorf("create payload temp dir %s: %v", workDir, err) + if err := os.MkdirAll(assetsDir, 0o755); err != nil { + return fmt.Errorf("create payload lib dir %s: %v", assetsDir, err) } src := io.Reader(srcFile) filename := file @@ -241,20 +235,22 @@ func extractPayloadFiles(workDir, glob string) error { filename = strings.TrimSuffix(filename, ".gz") } - destFile := filepath.Join(workDir, filepath.Base(filename)) + destFile := filepath.Join(assetsDir, filepath.Base(filename)) _, err = os.Stat(destFile) switch { case errors.Is(err, os.ErrNotExist): - destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) + destFp, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) if err != nil { return fmt.Errorf("write payload %s: %v", file, err) } - defer destFile.Close() - if _, err := io.Copy(destFile, src); err != nil { + defer destFp.Close() + if _, err := io.Copy(destFp, src); err != nil { return fmt.Errorf("copy payload %s: %v", file, err) } case err != nil: return fmt.Errorf("stat payload %s: %v", file, err) + case err == nil: + slog.Debug("payload already exists: " + destFile) } } return nil diff --git a/llm/payload_linux.go b/llm/payload_linux.go index fc366209..276705c7 100644 --- a/llm/payload_linux.go +++ b/llm/payload_linux.go @@ -4,5 +4,5 @@ import ( "embed" ) -//go:embed llama.cpp/build/linux/*/*/lib/*.so* +//go:embed llama.cpp/build/linux/*/*/lib/* var libEmbed embed.FS diff --git a/scripts/build_linux.sh b/scripts/build_linux.sh index 338dbcd5..77e21d40 100755 --- a/scripts/build_linux.sh +++ b/scripts/build_linux.sh @@ -22,5 +22,6 @@ for TARGETARCH in ${BUILD_ARCH}; do . docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH + docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/dist/deps/ ./dist/ docker rm builder-$TARGETARCH done diff --git a/server/routes.go b/server/routes.go index 28165721..e5adc345 100644 --- a/server/routes.go +++ b/server/routes.go @@ -66,8 +66,6 @@ var defaultSessionDuration = 5 * time.Minute // load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.Duration) error { - workDir := c.GetString("workDir") - needLoad := loaded.runner == nil || // is there a model loaded? loaded.ModelPath != model.ModelPath || // has the base model changed? !reflect.DeepEqual(loaded.AdapterPaths, model.AdapterPaths) || // have the adapters changed? @@ -82,7 +80,7 @@ func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.D loaded.Options = nil } - llmRunner, err := llm.New(workDir, model.ModelPath, model.AdapterPaths, model.ProjectorPaths, opts) + llmRunner, err := llm.New(model.ModelPath, model.AdapterPaths, model.ProjectorPaths, opts) if err != nil { // some older models are not compatible with newer versions of llama.cpp // show a generalized compatibility error until there is a better way to @@ -1035,7 +1033,7 @@ func Serve(ln net.Listener) error { os.Exit(0) }() - if err := llm.Init(s.WorkDir); err != nil { + if err := llm.Init(); err != nil { return fmt.Errorf("unable to initialize llm library %w", err) } if runtime.GOOS == "linux" { // TODO - windows too