Refine GPU discovery to bootstrap once
Now that we call the GPU discovery routines many times to update memory, this splits initial discovery from free memory updating.
This commit is contained in:
parent
b32ebb4f29
commit
43ed358f9a
9 changed files with 383 additions and 149 deletions
|
@ -44,8 +44,8 @@ var (
|
||||||
)
|
)
|
||||||
|
|
||||||
// Gather GPU information from the amdgpu driver if any supported GPUs are detected
|
// Gather GPU information from the amdgpu driver if any supported GPUs are detected
|
||||||
func AMDGetGPUInfo() []GpuInfo {
|
func AMDGetGPUInfo() []RocmGPUInfo {
|
||||||
resp := []GpuInfo{}
|
resp := []RocmGPUInfo{}
|
||||||
if !AMDDetected() {
|
if !AMDDetected() {
|
||||||
return resp
|
return resp
|
||||||
}
|
}
|
||||||
|
@ -178,7 +178,7 @@ func AMDGetGPUInfo() []GpuInfo {
|
||||||
// Shouldn't happen, but just in case...
|
// Shouldn't happen, but just in case...
|
||||||
if gpuID < 0 {
|
if gpuID < 0 {
|
||||||
slog.Error("unexpected amdgpu sysfs data resulted in negative GPU ID, please set OLLAMA_DEBUG=1 and report an issue")
|
slog.Error("unexpected amdgpu sysfs data resulted in negative GPU ID, please set OLLAMA_DEBUG=1 and report an issue")
|
||||||
return []GpuInfo{}
|
return []RocmGPUInfo{}
|
||||||
}
|
}
|
||||||
|
|
||||||
if int(major) < RocmComputeMin {
|
if int(major) < RocmComputeMin {
|
||||||
|
@ -189,6 +189,7 @@ func AMDGetGPUInfo() []GpuInfo {
|
||||||
// Look up the memory for the current node
|
// Look up the memory for the current node
|
||||||
totalMemory := uint64(0)
|
totalMemory := uint64(0)
|
||||||
usedMemory := uint64(0)
|
usedMemory := uint64(0)
|
||||||
|
var usedFile string
|
||||||
mapping := []struct {
|
mapping := []struct {
|
||||||
id uint64
|
id uint64
|
||||||
filename string
|
filename string
|
||||||
|
@ -255,22 +256,10 @@ func AMDGetGPUInfo() []GpuInfo {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
usedFile := filepath.Join(devDir, DRMUsedMemoryFile)
|
usedFile = filepath.Join(devDir, DRMUsedMemoryFile)
|
||||||
usedFp, err := os.Open(usedFile)
|
usedMemory, err = getFreeMemory(usedFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Debug("failed to open sysfs node", "file", usedFile, "error", err)
|
slog.Debug("failed to update used memory", "error", err)
|
||||||
break
|
|
||||||
}
|
|
||||||
defer totalFp.Close()
|
|
||||||
buf, err = io.ReadAll(usedFp)
|
|
||||||
if err != nil {
|
|
||||||
slog.Debug("failed to read sysfs node", "file", usedFile, "error", err)
|
|
||||||
break
|
|
||||||
}
|
|
||||||
usedMemory, err = strconv.ParseUint(strings.TrimSpace(string(buf)), 10, 64)
|
|
||||||
if err != nil {
|
|
||||||
slog.Debug("failed to parse sysfs node", "file", usedFile, "error", err)
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
@ -288,18 +277,21 @@ func AMDGetGPUInfo() []GpuInfo {
|
||||||
|
|
||||||
slog.Debug("amdgpu memory", "gpu", gpuID, "total", format.HumanBytes2(totalMemory))
|
slog.Debug("amdgpu memory", "gpu", gpuID, "total", format.HumanBytes2(totalMemory))
|
||||||
slog.Debug("amdgpu memory", "gpu", gpuID, "available", format.HumanBytes2(totalMemory-usedMemory))
|
slog.Debug("amdgpu memory", "gpu", gpuID, "available", format.HumanBytes2(totalMemory-usedMemory))
|
||||||
gpuInfo := GpuInfo{
|
gpuInfo := RocmGPUInfo{
|
||||||
Library: "rocm",
|
GpuInfo: GpuInfo{
|
||||||
memInfo: memInfo{
|
Library: "rocm",
|
||||||
TotalMemory: totalMemory,
|
memInfo: memInfo{
|
||||||
FreeMemory: (totalMemory - usedMemory),
|
TotalMemory: totalMemory,
|
||||||
|
FreeMemory: (totalMemory - usedMemory),
|
||||||
|
},
|
||||||
|
ID: fmt.Sprintf("%d", gpuID),
|
||||||
|
Name: name,
|
||||||
|
Compute: fmt.Sprintf("gfx%d%x%x", major, minor, patch),
|
||||||
|
MinimumMemory: rocmMinimumMemory,
|
||||||
|
DriverMajor: driverMajor,
|
||||||
|
DriverMinor: driverMinor,
|
||||||
},
|
},
|
||||||
ID: fmt.Sprintf("%d", gpuID),
|
usedFilepath: usedFile,
|
||||||
Name: name,
|
|
||||||
Compute: fmt.Sprintf("gfx%d%x%x", major, minor, patch),
|
|
||||||
MinimumMemory: rocmMinimumMemory,
|
|
||||||
DriverMajor: driverMajor,
|
|
||||||
DriverMinor: driverMinor,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the user wants to filter to a subset of devices, filter out if we aren't a match
|
// If the user wants to filter to a subset of devices, filter out if we aren't a match
|
||||||
|
@ -323,7 +315,7 @@ func AMDGetGPUInfo() []GpuInfo {
|
||||||
libDir, err = AMDValidateLibDir()
|
libDir, err = AMDValidateLibDir()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("unable to verify rocm library, will use cpu", "error", err)
|
slog.Warn("unable to verify rocm library, will use cpu", "error", err)
|
||||||
return []GpuInfo{}
|
return []RocmGPUInfo{}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
gpuInfo.DependencyPath = libDir
|
gpuInfo.DependencyPath = libDir
|
||||||
|
@ -334,7 +326,7 @@ func AMDGetGPUInfo() []GpuInfo {
|
||||||
supported, err = GetSupportedGFX(libDir)
|
supported, err = GetSupportedGFX(libDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("failed to lookup supported GFX types, falling back to CPU mode", "error", err)
|
slog.Warn("failed to lookup supported GFX types, falling back to CPU mode", "error", err)
|
||||||
return []GpuInfo{}
|
return []RocmGPUInfo{}
|
||||||
}
|
}
|
||||||
slog.Debug("rocm supported GPUs", "types", supported)
|
slog.Debug("rocm supported GPUs", "types", supported)
|
||||||
}
|
}
|
||||||
|
@ -425,3 +417,36 @@ func AMDDriverVersion() (driverMajor, driverMinor int, err error) {
|
||||||
}
|
}
|
||||||
return driverMajor, driverMinor, nil
|
return driverMajor, driverMinor, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
|
||||||
|
if len(gpus) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
for i := range gpus {
|
||||||
|
usedMemory, err := getFreeMemory(gpus[i].usedFilepath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
slog.Debug("updating rocm free memory", "gpu", gpus[i].ID, "name", gpus[i].Name, "before", format.HumanBytes2(gpus[i].FreeMemory), "now", format.HumanBytes2(gpus[i].TotalMemory-usedMemory))
|
||||||
|
gpus[i].FreeMemory = gpus[i].TotalMemory - usedMemory
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getFreeMemory(usedFile string) (uint64, error) {
|
||||||
|
usedFp, err := os.Open(usedFile)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("failed to open sysfs node %s %w", usedFile, err)
|
||||||
|
}
|
||||||
|
defer usedFp.Close()
|
||||||
|
buf, err := io.ReadAll(usedFp)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("failed to read sysfs node %s %w", usedFile, err)
|
||||||
|
}
|
||||||
|
usedMemory, err := strconv.ParseUint(strings.TrimSpace(string(buf)), 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
slog.Debug("failed to parse sysfs node", "file", usedFile, "error", err)
|
||||||
|
return 0, fmt.Errorf("failed to parse sysfs node %s %w", usedFile, err)
|
||||||
|
}
|
||||||
|
return usedMemory, nil
|
||||||
|
}
|
||||||
|
|
|
@ -24,8 +24,8 @@ var (
|
||||||
RocmStandardLocations = []string{"C:\\Program Files\\AMD\\ROCm\\5.7\\bin"} // TODO glob?
|
RocmStandardLocations = []string{"C:\\Program Files\\AMD\\ROCm\\5.7\\bin"} // TODO glob?
|
||||||
)
|
)
|
||||||
|
|
||||||
func AMDGetGPUInfo() []GpuInfo {
|
func AMDGetGPUInfo() []RocmGPUInfo {
|
||||||
resp := []GpuInfo{}
|
resp := []RocmGPUInfo{}
|
||||||
hl, err := NewHipLib()
|
hl, err := NewHipLib()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Debug(err.Error())
|
slog.Debug(err.Error())
|
||||||
|
@ -117,21 +117,24 @@ func AMDGetGPUInfo() []GpuInfo {
|
||||||
// v5.7 only reports VRAM used by this process, so it's completely wrong and unusable
|
// v5.7 only reports VRAM used by this process, so it's completely wrong and unusable
|
||||||
slog.Debug("amdgpu memory", "gpu", i, "total", format.HumanBytes2(totalMemory))
|
slog.Debug("amdgpu memory", "gpu", i, "total", format.HumanBytes2(totalMemory))
|
||||||
slog.Debug("amdgpu memory", "gpu", i, "available", format.HumanBytes2(freeMemory))
|
slog.Debug("amdgpu memory", "gpu", i, "available", format.HumanBytes2(freeMemory))
|
||||||
gpuInfo := GpuInfo{
|
gpuInfo := RocmGPUInfo{
|
||||||
Library: "rocm",
|
GpuInfo: GpuInfo{
|
||||||
memInfo: memInfo{
|
Library: "rocm",
|
||||||
TotalMemory: totalMemory,
|
memInfo: memInfo{
|
||||||
FreeMemory: freeMemory,
|
TotalMemory: totalMemory,
|
||||||
},
|
FreeMemory: freeMemory,
|
||||||
ID: fmt.Sprintf("%d", i), // TODO this is probably wrong if we specify visible devices
|
},
|
||||||
DependencyPath: libDir,
|
ID: fmt.Sprintf("%d", i), // TODO this is probably wrong if we specify visible devices
|
||||||
MinimumMemory: rocmMinimumMemory,
|
DependencyPath: libDir,
|
||||||
Name: name,
|
MinimumMemory: rocmMinimumMemory,
|
||||||
Compute: gfx,
|
Name: name,
|
||||||
|
Compute: gfx,
|
||||||
|
|
||||||
// TODO - this information isn't accurate on windows, so don't report it until we find the right way to retrieve
|
// TODO - this information isn't accurate on windows, so don't report it until we find the right way to retrieve
|
||||||
// DriverMajor: driverMajor,
|
// DriverMajor: driverMajor,
|
||||||
// DriverMinor: driverMinor,
|
// DriverMinor: driverMinor,
|
||||||
|
},
|
||||||
|
index: i,
|
||||||
}
|
}
|
||||||
|
|
||||||
resp = append(resp, gpuInfo)
|
resp = append(resp, gpuInfo)
|
||||||
|
@ -159,3 +162,30 @@ func AMDValidateLibDir() (string, error) {
|
||||||
slog.Warn("amdgpu detected, but no compatible rocm library found. Please install ROCm")
|
slog.Warn("amdgpu detected, but no compatible rocm library found. Please install ROCm")
|
||||||
return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
|
return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
|
||||||
|
if len(gpus) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
hl, err := NewHipLib()
|
||||||
|
if err != nil {
|
||||||
|
slog.Debug(err.Error())
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
defer hl.Release()
|
||||||
|
|
||||||
|
for i := range gpus {
|
||||||
|
err := hl.HipSetDevice(gpus[i].index)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
freeMemory, _, err := hl.HipMemGetInfo()
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("get mem info", "id", i, "error", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
slog.Debug("updating rocm free memory", "gpu", gpus[i].ID, "name", gpus[i].Name, "before", format.HumanBytes2(gpus[i].FreeMemory), "now", format.HumanBytes2(freeMemory))
|
||||||
|
gpus[i].FreeMemory = freeMemory
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
|
@ -1,21 +1,20 @@
|
||||||
package gpu
|
package gpu
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"log/slog"
|
|
||||||
|
|
||||||
"golang.org/x/sys/cpu"
|
"golang.org/x/sys/cpu"
|
||||||
)
|
)
|
||||||
|
|
||||||
func GetCPUVariant() string {
|
func GetCPUVariant() string {
|
||||||
|
return getCPUCapability().ToVariant()
|
||||||
|
}
|
||||||
|
|
||||||
|
func getCPUCapability() CPUCapability {
|
||||||
if cpu.X86.HasAVX2 {
|
if cpu.X86.HasAVX2 {
|
||||||
slog.Debug("CPU has AVX2")
|
return CPUCapabilityAVX2
|
||||||
return "avx2"
|
|
||||||
}
|
}
|
||||||
if cpu.X86.HasAVX {
|
if cpu.X86.HasAVX {
|
||||||
slog.Debug("CPU has AVX")
|
return CPUCapabilityAVX
|
||||||
return "avx"
|
|
||||||
}
|
}
|
||||||
slog.Debug("CPU does not have vector extensions")
|
|
||||||
// else LCD
|
// else LCD
|
||||||
return ""
|
return CPUCapabilityBase
|
||||||
}
|
}
|
||||||
|
|
262
gpu/gpu.go
262
gpu/gpu.go
|
@ -21,8 +21,8 @@ import (
|
||||||
"sync"
|
"sync"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
"github.com/ollama/ollama/format"
|
|
||||||
"github.com/ollama/ollama/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
|
"github.com/ollama/ollama/format"
|
||||||
)
|
)
|
||||||
|
|
||||||
type handles struct {
|
type handles struct {
|
||||||
|
@ -37,7 +37,18 @@ const (
|
||||||
rocmMinimumMemory = 457 * format.MebiByte
|
rocmMinimumMemory = 457 * format.MebiByte
|
||||||
)
|
)
|
||||||
|
|
||||||
var gpuMutex sync.Mutex
|
var (
|
||||||
|
gpuMutex sync.Mutex
|
||||||
|
bootstrapped bool
|
||||||
|
cpuCapability CPUCapability
|
||||||
|
cpus []CPUInfo
|
||||||
|
cudaGPUs []CudaGPUInfo
|
||||||
|
nvcudaLibPath string
|
||||||
|
cudartLibPath string
|
||||||
|
oneapiLibPath string
|
||||||
|
rocmGPUs []RocmGPUInfo
|
||||||
|
oneapiGPUs []OneapiGPUInfo
|
||||||
|
)
|
||||||
|
|
||||||
// With our current CUDA compile flags, older than 5.0 will not work properly
|
// With our current CUDA compile flags, older than 5.0 will not work properly
|
||||||
var CudaComputeMin = [2]C.int{5, 0}
|
var CudaComputeMin = [2]C.int{5, 0}
|
||||||
|
@ -96,11 +107,22 @@ var OneapiLinuxGlobs = []string{
|
||||||
var CudaTegra string = os.Getenv("JETSON_JETPACK")
|
var CudaTegra string = os.Getenv("JETSON_JETPACK")
|
||||||
|
|
||||||
// Note: gpuMutex must already be held
|
// Note: gpuMutex must already be held
|
||||||
func initGPUHandles() *handles {
|
func initCudaHandles() *handles {
|
||||||
|
|
||||||
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
|
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
|
||||||
|
|
||||||
gpuHandles := &handles{}
|
gpuHandles := &handles{}
|
||||||
|
// Short Circuit if we already know which library to use
|
||||||
|
if nvcudaLibPath != "" {
|
||||||
|
gpuHandles.deviceCount, gpuHandles.nvcuda, _ = LoadNVCUDAMgmt([]string{nvcudaLibPath})
|
||||||
|
return gpuHandles
|
||||||
|
}
|
||||||
|
if cudartLibPath != "" {
|
||||||
|
gpuHandles.deviceCount, gpuHandles.cudart, _ = LoadCUDARTMgmt([]string{cudartLibPath})
|
||||||
|
return gpuHandles
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Debug("searching for GPU discovery libraries for NVIDIA")
|
||||||
var cudartMgmtName string
|
var cudartMgmtName string
|
||||||
var cudartMgmtPatterns []string
|
var cudartMgmtPatterns []string
|
||||||
var nvcudaMgmtName string
|
var nvcudaMgmtName string
|
||||||
|
@ -136,7 +158,6 @@ func initGPUHandles() *handles {
|
||||||
return gpuHandles
|
return gpuHandles
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Debug("Detecting GPUs")
|
|
||||||
nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
|
nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
|
||||||
if len(nvcudaLibPaths) > 0 {
|
if len(nvcudaLibPaths) > 0 {
|
||||||
deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
|
deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
|
||||||
|
@ -144,6 +165,7 @@ func initGPUHandles() *handles {
|
||||||
slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
|
slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
|
||||||
gpuHandles.nvcuda = nvcuda
|
gpuHandles.nvcuda = nvcuda
|
||||||
gpuHandles.deviceCount = deviceCount
|
gpuHandles.deviceCount = deviceCount
|
||||||
|
nvcudaLibPath = libPath
|
||||||
return gpuHandles
|
return gpuHandles
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -155,6 +177,7 @@ func initGPUHandles() *handles {
|
||||||
slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
|
slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
|
||||||
gpuHandles.cudart = cudart
|
gpuHandles.cudart = cudart
|
||||||
gpuHandles.deviceCount = deviceCount
|
gpuHandles.deviceCount = deviceCount
|
||||||
|
cudartLibPath = libPath
|
||||||
return gpuHandles
|
return gpuHandles
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -166,6 +189,7 @@ func initGPUHandles() *handles {
|
||||||
slog.Debug("detected Intel GPUs", "library", libPath, "count", deviceCount)
|
slog.Debug("detected Intel GPUs", "library", libPath, "count", deviceCount)
|
||||||
gpuHandles.oneapi = oneapi
|
gpuHandles.oneapi = oneapi
|
||||||
gpuHandles.deviceCount = deviceCount
|
gpuHandles.deviceCount = deviceCount
|
||||||
|
oneapiLibPath = libPath
|
||||||
return gpuHandles
|
return gpuHandles
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -178,9 +202,12 @@ func GetGPUInfo() GpuInfoList {
|
||||||
// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
|
// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
|
||||||
gpuMutex.Lock()
|
gpuMutex.Lock()
|
||||||
defer gpuMutex.Unlock()
|
defer gpuMutex.Unlock()
|
||||||
|
needRefresh := true
|
||||||
gpuHandles := initGPUHandles()
|
var gpuHandles *handles
|
||||||
defer func() {
|
defer func() {
|
||||||
|
if gpuHandles == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
if gpuHandles.cudart != nil {
|
if gpuHandles.cudart != nil {
|
||||||
C.cudart_release(*gpuHandles.cudart)
|
C.cudart_release(*gpuHandles.cudart)
|
||||||
}
|
}
|
||||||
|
@ -189,97 +216,156 @@ func GetGPUInfo() GpuInfoList {
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX
|
if !bootstrapped {
|
||||||
cpuVariant := GetCPUVariant()
|
slog.Debug("Detecting GPUs")
|
||||||
if cpuVariant == "" && runtime.GOARCH == "amd64" {
|
needRefresh = false
|
||||||
slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
|
cpuCapability = getCPUCapability()
|
||||||
}
|
var memInfo C.mem_info_t
|
||||||
|
|
||||||
// On windows we bundle the nvidia library one level above the runner dir
|
|
||||||
depPath := ""
|
|
||||||
if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
|
|
||||||
depPath = filepath.Dir(envconfig.RunnersDir)
|
|
||||||
}
|
|
||||||
|
|
||||||
var memInfo C.mem_info_t
|
|
||||||
resp := []GpuInfo{}
|
|
||||||
|
|
||||||
// NVIDIA first
|
|
||||||
for i := range gpuHandles.deviceCount {
|
|
||||||
// TODO once we support CPU compilation variants of GPU libraries refine this...
|
|
||||||
if cpuVariant == "" && runtime.GOARCH == "amd64" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if gpuHandles.cudart != nil || gpuHandles.nvcuda != nil {
|
|
||||||
gpuInfo := GpuInfo{
|
|
||||||
Library: "cuda",
|
|
||||||
}
|
|
||||||
var driverMajor int
|
|
||||||
var driverMinor int
|
|
||||||
if gpuHandles.cudart != nil {
|
|
||||||
C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
|
|
||||||
} else {
|
|
||||||
C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo)
|
|
||||||
driverMajor = int(gpuHandles.nvcuda.driver_major)
|
|
||||||
driverMinor = int(gpuHandles.nvcuda.driver_minor)
|
|
||||||
}
|
|
||||||
if memInfo.err != nil {
|
|
||||||
slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
|
|
||||||
C.free(unsafe.Pointer(memInfo.err))
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
|
|
||||||
slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
gpuInfo.TotalMemory = uint64(memInfo.total)
|
|
||||||
gpuInfo.FreeMemory = uint64(memInfo.free)
|
|
||||||
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
|
||||||
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
|
|
||||||
gpuInfo.MinimumMemory = cudaMinimumMemory
|
|
||||||
gpuInfo.DependencyPath = depPath
|
|
||||||
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
|
||||||
gpuInfo.DriverMajor = driverMajor
|
|
||||||
gpuInfo.DriverMinor = driverMinor
|
|
||||||
|
|
||||||
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
|
|
||||||
resp = append(resp, gpuInfo)
|
|
||||||
}
|
|
||||||
if gpuHandles.oneapi != nil {
|
|
||||||
gpuInfo := GpuInfo{
|
|
||||||
Library: "oneapi",
|
|
||||||
}
|
|
||||||
C.oneapi_check_vram(*gpuHandles.oneapi, &memInfo)
|
|
||||||
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
|
|
||||||
memInfo.free = C.uint64_t(totalFreeMem)
|
|
||||||
gpuInfo.TotalMemory = uint64(memInfo.total)
|
|
||||||
gpuInfo.FreeMemory = uint64(memInfo.free)
|
|
||||||
gpuInfo.ID = strconv.Itoa(i)
|
|
||||||
resp = append(resp, gpuInfo)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Then AMD
|
|
||||||
resp = append(resp, AMDGetGPUInfo()...)
|
|
||||||
|
|
||||||
if len(resp) == 0 {
|
|
||||||
C.cpu_check_ram(&memInfo)
|
C.cpu_check_ram(&memInfo)
|
||||||
if memInfo.err != nil {
|
if memInfo.err != nil {
|
||||||
slog.Info("error looking up CPU memory", "error", C.GoString(memInfo.err))
|
slog.Info("error looking up CPU memory", "error", C.GoString(memInfo.err))
|
||||||
C.free(unsafe.Pointer(memInfo.err))
|
C.free(unsafe.Pointer(memInfo.err))
|
||||||
return resp
|
return []GpuInfo{}
|
||||||
}
|
}
|
||||||
gpuInfo := GpuInfo{
|
cpuInfo := CPUInfo{
|
||||||
Library: "cpu",
|
GpuInfo: GpuInfo{
|
||||||
Variant: cpuVariant,
|
Library: "cpu",
|
||||||
|
Variant: cpuCapability.ToVariant(),
|
||||||
|
},
|
||||||
}
|
}
|
||||||
gpuInfo.TotalMemory = uint64(memInfo.total)
|
cpuInfo.TotalMemory = uint64(memInfo.total)
|
||||||
gpuInfo.FreeMemory = uint64(memInfo.free)
|
cpuInfo.FreeMemory = uint64(memInfo.free)
|
||||||
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
cpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
||||||
|
cpus = []CPUInfo{cpuInfo}
|
||||||
|
|
||||||
resp = append(resp, gpuInfo)
|
// Fallback to CPU mode if we're lacking required vector extensions on x86
|
||||||
|
if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
|
||||||
|
slog.Warn("CPU does not have minimum vector extensions, GPU inference disabled", "required", GPURunnerCPUCapability.ToString(), "detected", cpuCapability.ToString())
|
||||||
|
bootstrapped = true
|
||||||
|
// No need to do any GPU discovery, since we can't run on them
|
||||||
|
return GpuInfoList{cpus[0].GpuInfo}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO - implement
|
||||||
|
|
||||||
|
// TODO refine the discovery to only gather total memory
|
||||||
|
|
||||||
|
// On windows we bundle the nvidia library one level above the runner dir
|
||||||
|
depPath := ""
|
||||||
|
if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
|
||||||
|
depPath = filepath.Dir(envconfig.RunnersDir)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load ALL libraries
|
||||||
|
gpuHandles = initCudaHandles()
|
||||||
|
|
||||||
|
// TODO needs a refactoring pass to init oneapi handles
|
||||||
|
|
||||||
|
// NVIDIA
|
||||||
|
for i := range gpuHandles.deviceCount {
|
||||||
|
if gpuHandles.cudart != nil || gpuHandles.nvcuda != nil {
|
||||||
|
gpuInfo := CudaGPUInfo{
|
||||||
|
GpuInfo: GpuInfo{
|
||||||
|
Library: "cuda",
|
||||||
|
},
|
||||||
|
index: i,
|
||||||
|
}
|
||||||
|
var driverMajor int
|
||||||
|
var driverMinor int
|
||||||
|
if gpuHandles.cudart != nil {
|
||||||
|
C.cudart_bootstrap(*gpuHandles.cudart, C.int(i), &memInfo)
|
||||||
|
} else {
|
||||||
|
C.nvcuda_bootstrap(*gpuHandles.nvcuda, C.int(i), &memInfo)
|
||||||
|
driverMajor = int(gpuHandles.nvcuda.driver_major)
|
||||||
|
driverMinor = int(gpuHandles.nvcuda.driver_minor)
|
||||||
|
}
|
||||||
|
if memInfo.err != nil {
|
||||||
|
slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
|
||||||
|
C.free(unsafe.Pointer(memInfo.err))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
|
||||||
|
slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
gpuInfo.TotalMemory = uint64(memInfo.total)
|
||||||
|
gpuInfo.FreeMemory = uint64(memInfo.free)
|
||||||
|
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
||||||
|
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
|
||||||
|
gpuInfo.MinimumMemory = cudaMinimumMemory
|
||||||
|
gpuInfo.DependencyPath = depPath
|
||||||
|
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
||||||
|
gpuInfo.DriverMajor = int(driverMajor)
|
||||||
|
gpuInfo.DriverMinor = int(driverMinor)
|
||||||
|
|
||||||
|
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
|
||||||
|
cudaGPUs = append(cudaGPUs, gpuInfo)
|
||||||
|
}
|
||||||
|
if gpuHandles.oneapi != nil {
|
||||||
|
gpuInfo := OneapiGPUInfo{
|
||||||
|
GpuInfo: GpuInfo{
|
||||||
|
Library: "oneapi",
|
||||||
|
},
|
||||||
|
index: i,
|
||||||
|
}
|
||||||
|
// TODO - split bootstrapping from updating free memory
|
||||||
|
C.oneapi_check_vram(*gpuHandles.oneapi, &memInfo)
|
||||||
|
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
|
||||||
|
memInfo.free = C.uint64_t(totalFreeMem)
|
||||||
|
gpuInfo.TotalMemory = uint64(memInfo.total)
|
||||||
|
gpuInfo.FreeMemory = uint64(memInfo.free)
|
||||||
|
gpuInfo.ID = strconv.Itoa(i)
|
||||||
|
oneapiGPUs = append(oneapiGPUs, gpuInfo)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rocmGPUs = AMDGetGPUInfo()
|
||||||
|
bootstrapped = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For detected GPUs, load library if not loaded
|
||||||
|
|
||||||
|
// Refresh free memory usage
|
||||||
|
if needRefresh {
|
||||||
|
// TODO - CPU system memory tracking/refresh
|
||||||
|
var memInfo C.mem_info_t
|
||||||
|
if gpuHandles == nil && len(cudaGPUs) > 0 {
|
||||||
|
gpuHandles = initCudaHandles()
|
||||||
|
}
|
||||||
|
for i, gpu := range cudaGPUs {
|
||||||
|
if gpuHandles.cudart != nil {
|
||||||
|
C.cudart_bootstrap(*gpuHandles.cudart, C.int(gpu.index), &memInfo)
|
||||||
|
} else {
|
||||||
|
C.nvcuda_get_free(*gpuHandles.nvcuda, C.int(gpu.index), &memInfo.free)
|
||||||
|
}
|
||||||
|
if memInfo.err != nil {
|
||||||
|
slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
|
||||||
|
C.free(unsafe.Pointer(memInfo.err))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if memInfo.free == 0 {
|
||||||
|
slog.Warn("error looking up nvidia GPU memory")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
slog.Debug("updating cuda free memory", "gpu", gpu.ID, "name", gpu.Name, "before", format.HumanBytes2(gpu.FreeMemory), "now", format.HumanBytes2(uint64(memInfo.free)))
|
||||||
|
cudaGPUs[i].FreeMemory = uint64(memInfo.free)
|
||||||
|
}
|
||||||
|
err := RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
|
||||||
|
if err != nil {
|
||||||
|
slog.Debug("problem refreshing ROCm free memory", "error", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resp := []GpuInfo{}
|
||||||
|
for _, gpu := range cudaGPUs {
|
||||||
|
resp = append(resp, gpu.GpuInfo)
|
||||||
|
}
|
||||||
|
for _, gpu := range rocmGPUs {
|
||||||
|
resp = append(resp, gpu.GpuInfo)
|
||||||
|
}
|
||||||
|
if len(resp) == 0 {
|
||||||
|
resp = append(resp, cpus[0].GpuInfo)
|
||||||
|
}
|
||||||
return resp
|
return resp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -94,7 +94,7 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void cudart_check_vram(cudart_handle_t h, int i, mem_info_t *resp) {
|
void cudart_bootstrap(cudart_handle_t h, int i, mem_info_t *resp) {
|
||||||
resp->err = NULL;
|
resp->err = NULL;
|
||||||
cudartMemory_t memInfo = {0,0,0};
|
cudartMemory_t memInfo = {0,0,0};
|
||||||
cudartReturn_t ret;
|
cudartReturn_t ret;
|
||||||
|
|
|
@ -140,7 +140,8 @@ typedef struct cudart_init_resp {
|
||||||
} cudart_init_resp_t;
|
} cudart_init_resp_t;
|
||||||
|
|
||||||
void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp);
|
void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp);
|
||||||
void cudart_check_vram(cudart_handle_t ch, int device_id, mem_info_t *resp);
|
void cudart_bootstrap(cudart_handle_t ch, int device_id, mem_info_t *resp);
|
||||||
|
// TODO - if we keep this library longer term, add cudart_get_free
|
||||||
void cudart_release(cudart_handle_t ch);
|
void cudart_release(cudart_handle_t ch);
|
||||||
|
|
||||||
#endif // __GPU_INFO_CUDART_H__
|
#endif // __GPU_INFO_CUDART_H__
|
||||||
|
|
|
@ -96,7 +96,7 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const int buflen = 256;
|
const int buflen = 256;
|
||||||
void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) {
|
void nvcuda_bootstrap(nvcuda_handle_t h, int i, mem_info_t *resp) {
|
||||||
resp->err = NULL;
|
resp->err = NULL;
|
||||||
nvcudaMemory_t memInfo = {0,0};
|
nvcudaMemory_t memInfo = {0,0};
|
||||||
CUresult ret;
|
CUresult ret;
|
||||||
|
@ -168,7 +168,7 @@ void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) {
|
||||||
// To get memory we have to set (and release) a context
|
// To get memory we have to set (and release) a context
|
||||||
ret = (*h.cuCtxCreate_v3)(&ctx, NULL, 0, 0, device);
|
ret = (*h.cuCtxCreate_v3)(&ctx, NULL, 0, 0, device);
|
||||||
if (ret != CUDA_SUCCESS) {
|
if (ret != CUDA_SUCCESS) {
|
||||||
snprintf(buf, buflen, "nvcuda failed to get primary device context %d", ret);
|
snprintf(buf, buflen, "nvcuda failed to get device context %d", ret);
|
||||||
resp->err = strdup(buf);
|
resp->err = strdup(buf);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -193,7 +193,42 @@ void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) {
|
||||||
|
|
||||||
ret = (*h.cuCtxDestroy)(ctx);
|
ret = (*h.cuCtxDestroy)(ctx);
|
||||||
if (ret != CUDA_SUCCESS) {
|
if (ret != CUDA_SUCCESS) {
|
||||||
LOG(1, "nvcuda failed to release primary device context %d", ret);
|
LOG(1, "nvcuda failed to release device context %d", ret);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void nvcuda_get_free(nvcuda_handle_t h, int i, uint64_t *free) {
|
||||||
|
CUresult ret;
|
||||||
|
CUcontext ctx = NULL;
|
||||||
|
CUdevice device = -1;
|
||||||
|
*free = 0;
|
||||||
|
uint64_t total = 0;
|
||||||
|
|
||||||
|
ret = (*h.cuDeviceGet)(&device, i);
|
||||||
|
if (ret != CUDA_SUCCESS) {
|
||||||
|
LOG(1, "nvcuda device failed to initialize");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// To get memory we have to set (and release) a context
|
||||||
|
ret = (*h.cuCtxCreate_v3)(&ctx, NULL, 0, 0, device);
|
||||||
|
if (ret != CUDA_SUCCESS) {
|
||||||
|
LOG(1, "nvcuda failed to get device context %d", ret);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = (*h.cuMemGetInfo_v2)(free, &total);
|
||||||
|
if (ret != CUDA_SUCCESS) {
|
||||||
|
LOG(1, "nvcuda device memory info lookup failure %d", ret);
|
||||||
|
// Best effort on failure...
|
||||||
|
(*h.cuCtxDestroy)(ctx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = (*h.cuCtxDestroy)(ctx);
|
||||||
|
if (ret != CUDA_SUCCESS) {
|
||||||
|
LOG(1, "nvcuda failed to release device context %d", ret);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -67,7 +67,8 @@ typedef struct nvcuda_init_resp {
|
||||||
} nvcuda_init_resp_t;
|
} nvcuda_init_resp_t;
|
||||||
|
|
||||||
void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp);
|
void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp);
|
||||||
void nvcuda_check_vram(nvcuda_handle_t ch, int device_id, mem_info_t *resp);
|
void nvcuda_bootstrap(nvcuda_handle_t ch, int device_id, mem_info_t *resp);
|
||||||
|
void nvcuda_get_free(nvcuda_handle_t ch, int device_id, uint64_t *free);
|
||||||
void nvcuda_release(nvcuda_handle_t ch);
|
void nvcuda_release(nvcuda_handle_t ch);
|
||||||
|
|
||||||
#endif // __GPU_INFO_NVCUDA_H__
|
#endif // __GPU_INFO_NVCUDA_H__
|
||||||
|
|
57
gpu/types.go
57
gpu/types.go
|
@ -38,6 +38,29 @@ type GpuInfo struct {
|
||||||
// TODO other performance capability info to help in scheduling decisions
|
// TODO other performance capability info to help in scheduling decisions
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type CPUInfo struct {
|
||||||
|
GpuInfo
|
||||||
|
}
|
||||||
|
|
||||||
|
type CudaGPUInfo struct {
|
||||||
|
GpuInfo
|
||||||
|
index int // device index
|
||||||
|
}
|
||||||
|
type CudaGPUInfoList []CudaGPUInfo
|
||||||
|
|
||||||
|
type RocmGPUInfo struct {
|
||||||
|
GpuInfo
|
||||||
|
usedFilepath string // linux
|
||||||
|
index int // device index on windows
|
||||||
|
}
|
||||||
|
type RocmGPUInfoList []RocmGPUInfo
|
||||||
|
|
||||||
|
type OneapiGPUInfo struct {
|
||||||
|
GpuInfo
|
||||||
|
index int // device index
|
||||||
|
}
|
||||||
|
type OneapiGPUInfoList []OneapiGPUInfo
|
||||||
|
|
||||||
type GpuInfoList []GpuInfo
|
type GpuInfoList []GpuInfo
|
||||||
|
|
||||||
// Split up the set of gpu info's by Library and variant
|
// Split up the set of gpu info's by Library and variant
|
||||||
|
@ -86,3 +109,37 @@ type ByFreeMemory []GpuInfo
|
||||||
func (a ByFreeMemory) Len() int { return len(a) }
|
func (a ByFreeMemory) Len() int { return len(a) }
|
||||||
func (a ByFreeMemory) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
func (a ByFreeMemory) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
||||||
func (a ByFreeMemory) Less(i, j int) bool { return a[i].FreeMemory < a[j].FreeMemory }
|
func (a ByFreeMemory) Less(i, j int) bool { return a[i].FreeMemory < a[j].FreeMemory }
|
||||||
|
|
||||||
|
type CPUCapability uint32
|
||||||
|
|
||||||
|
// Override at build time when building base GPU runners
|
||||||
|
var GPURunnerCPUCapability = CPUCapabilityAVX
|
||||||
|
|
||||||
|
const (
|
||||||
|
CPUCapabilityBase CPUCapability = iota
|
||||||
|
CPUCapabilityAVX
|
||||||
|
CPUCapabilityAVX2
|
||||||
|
// TODO AVX512
|
||||||
|
)
|
||||||
|
|
||||||
|
func (c CPUCapability) ToString() string {
|
||||||
|
switch c {
|
||||||
|
case CPUCapabilityAVX:
|
||||||
|
return "AVX"
|
||||||
|
case CPUCapabilityAVX2:
|
||||||
|
return "AVX2"
|
||||||
|
default:
|
||||||
|
return "no vector extensions"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c CPUCapability) ToVariant() string {
|
||||||
|
switch c {
|
||||||
|
case CPUCapabilityAVX:
|
||||||
|
return "avx"
|
||||||
|
case CPUCapabilityAVX2:
|
||||||
|
return "avx2"
|
||||||
|
default:
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue