2024-02-15 17:15:09 -08:00
|
|
|
package gpu
|
|
|
|
|
|
|
|
import (
|
2024-08-01 14:52:15 -07:00
|
|
|
"errors"
|
2024-02-15 17:15:09 -08:00
|
|
|
"fmt"
|
|
|
|
"log/slog"
|
|
|
|
"syscall"
|
|
|
|
"unsafe"
|
|
|
|
|
|
|
|
"golang.org/x/sys/windows"
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
hipSuccess = 0
|
|
|
|
hipErrorNoDevice = 100
|
|
|
|
)
|
|
|
|
|
|
|
|
type hipDevicePropMinimal struct {
|
|
|
|
Name [256]byte
|
|
|
|
unused1 [140]byte
|
|
|
|
GcnArchName [256]byte // gfx####
|
|
|
|
iGPU int // Doesn't seem to actually report correctly
|
|
|
|
unused2 [128]byte
|
|
|
|
}
|
|
|
|
|
|
|
|
// Wrap the amdhip64.dll library for GPU discovery
|
|
|
|
type HipLib struct {
|
|
|
|
dll windows.Handle
|
|
|
|
hipGetDeviceCount uintptr
|
|
|
|
hipGetDeviceProperties uintptr
|
|
|
|
hipMemGetInfo uintptr
|
|
|
|
hipSetDevice uintptr
|
|
|
|
hipDriverGetVersion uintptr
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewHipLib() (*HipLib, error) {
|
2024-07-19 15:07:26 -07:00
|
|
|
// At runtime we depend on v6, so discover GPUs with the same library for a consistent set of GPUs
|
|
|
|
h, err := windows.LoadLibrary("amdhip64_6.dll")
|
2024-02-15 17:15:09 -08:00
|
|
|
if err != nil {
|
2024-07-19 15:07:26 -07:00
|
|
|
return nil, fmt.Errorf("unable to load amdhip64_6.dll, please make sure to upgrade to the latest amd driver: %w", err)
|
2024-02-15 17:15:09 -08:00
|
|
|
}
|
|
|
|
hl := &HipLib{}
|
|
|
|
hl.dll = h
|
|
|
|
hl.hipGetDeviceCount, err = windows.GetProcAddress(hl.dll, "hipGetDeviceCount")
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
hl.hipGetDeviceProperties, err = windows.GetProcAddress(hl.dll, "hipGetDeviceProperties")
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
hl.hipMemGetInfo, err = windows.GetProcAddress(hl.dll, "hipMemGetInfo")
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
hl.hipSetDevice, err = windows.GetProcAddress(hl.dll, "hipSetDevice")
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
hl.hipDriverGetVersion, err = windows.GetProcAddress(hl.dll, "hipDriverGetVersion")
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return hl, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// The hip library only evaluates the HIP_VISIBLE_DEVICES variable at startup
|
|
|
|
// so we have to unload/reset the library after we do our initial discovery
|
|
|
|
// to make sure our updates to that variable are processed by llama.cpp
|
|
|
|
func (hl *HipLib) Release() {
|
|
|
|
err := windows.FreeLibrary(hl.dll)
|
|
|
|
if err != nil {
|
2024-03-30 09:50:05 -07:00
|
|
|
slog.Warn("failed to unload amdhip64.dll", "error", err)
|
2024-02-15 17:15:09 -08:00
|
|
|
}
|
|
|
|
hl.dll = 0
|
|
|
|
}
|
|
|
|
|
2024-05-07 14:54:26 -07:00
|
|
|
func (hl *HipLib) AMDDriverVersion() (driverMajor, driverMinor int, err error) {
|
2024-02-15 17:15:09 -08:00
|
|
|
if hl.dll == 0 {
|
2024-08-01 14:52:15 -07:00
|
|
|
return 0, 0, errors.New("dll has been unloaded")
|
2024-02-15 17:15:09 -08:00
|
|
|
}
|
|
|
|
var version int
|
|
|
|
status, _, err := syscall.SyscallN(hl.hipDriverGetVersion, uintptr(unsafe.Pointer(&version)))
|
|
|
|
if status != hipSuccess {
|
2024-05-07 14:54:26 -07:00
|
|
|
return 0, 0, fmt.Errorf("failed call to hipDriverGetVersion: %d %s", status, err)
|
2024-02-15 17:15:09 -08:00
|
|
|
}
|
2024-05-07 14:54:26 -07:00
|
|
|
|
|
|
|
slog.Debug("hipDriverGetVersion", "version", version)
|
2024-06-18 16:22:47 -07:00
|
|
|
driverMajor = version / 10000000
|
|
|
|
driverMinor = (version - (driverMajor * 10000000)) / 100000
|
2024-05-07 14:54:26 -07:00
|
|
|
|
|
|
|
return driverMajor, driverMinor, nil
|
2024-02-15 17:15:09 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (hl *HipLib) HipGetDeviceCount() int {
|
|
|
|
if hl.dll == 0 {
|
|
|
|
slog.Error("dll has been unloaded")
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
var count int
|
|
|
|
status, _, err := syscall.SyscallN(hl.hipGetDeviceCount, uintptr(unsafe.Pointer(&count)))
|
|
|
|
if status == hipErrorNoDevice {
|
|
|
|
slog.Info("AMD ROCm reports no devices found")
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
if status != hipSuccess {
|
2024-03-30 09:50:05 -07:00
|
|
|
slog.Warn("failed call to hipGetDeviceCount", "status", status, "error", err)
|
2024-02-15 17:15:09 -08:00
|
|
|
}
|
|
|
|
return count
|
|
|
|
}
|
|
|
|
|
|
|
|
func (hl *HipLib) HipSetDevice(device int) error {
|
|
|
|
if hl.dll == 0 {
|
2024-08-01 14:52:15 -07:00
|
|
|
return errors.New("dll has been unloaded")
|
2024-02-15 17:15:09 -08:00
|
|
|
}
|
|
|
|
status, _, err := syscall.SyscallN(hl.hipSetDevice, uintptr(device))
|
|
|
|
if status != hipSuccess {
|
|
|
|
return fmt.Errorf("failed call to hipSetDevice: %d %s", status, err)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, error) {
|
|
|
|
if hl.dll == 0 {
|
2024-08-01 14:52:15 -07:00
|
|
|
return nil, errors.New("dll has been unloaded")
|
2024-02-15 17:15:09 -08:00
|
|
|
}
|
|
|
|
var props hipDevicePropMinimal
|
|
|
|
status, _, err := syscall.SyscallN(hl.hipGetDeviceProperties, uintptr(unsafe.Pointer(&props)), uintptr(device))
|
|
|
|
if status != hipSuccess {
|
|
|
|
return nil, fmt.Errorf("failed call to hipGetDeviceProperties: %d %s", status, err)
|
|
|
|
}
|
|
|
|
return &props, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// free, total, err
|
|
|
|
func (hl *HipLib) HipMemGetInfo() (uint64, uint64, error) {
|
|
|
|
if hl.dll == 0 {
|
2024-08-01 14:52:15 -07:00
|
|
|
return 0, 0, errors.New("dll has been unloaded")
|
2024-02-15 17:15:09 -08:00
|
|
|
}
|
|
|
|
var totalMemory uint64
|
|
|
|
var freeMemory uint64
|
|
|
|
status, _, err := syscall.SyscallN(hl.hipMemGetInfo, uintptr(unsafe.Pointer(&freeMemory)), uintptr(unsafe.Pointer(&totalMemory)))
|
|
|
|
if status != hipSuccess {
|
|
|
|
return 0, 0, fmt.Errorf("failed call to hipMemGetInfo: %d %s", status, err)
|
|
|
|
}
|
|
|
|
return freeMemory, totalMemory, nil
|
|
|
|
}
|