Ensure amd gpu nodes are numerically sorted
For systems that enumerate over 10 CPUs the default lexicographical sort order interleaves CPUs and GPUs.
This commit is contained in:
parent
ac33aa7d37
commit
7c2a157ca4
1 changed files with 15 additions and 0 deletions
|
@ -10,6 +10,7 @@ import (
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
"slices"
|
"slices"
|
||||||
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
@ -82,6 +83,20 @@ func AMDGetGPUInfo() []RocmGPUInfo {
|
||||||
// The amdgpu driver always exposes the host CPU(s) first, but we have to skip them and subtract
|
// The amdgpu driver always exposes the host CPU(s) first, but we have to skip them and subtract
|
||||||
// from the other IDs to get alignment with the HIP libraries expectations (zero is the first GPU, not the CPU)
|
// from the other IDs to get alignment with the HIP libraries expectations (zero is the first GPU, not the CPU)
|
||||||
matches, _ := filepath.Glob(GPUPropertiesFileGlob)
|
matches, _ := filepath.Glob(GPUPropertiesFileGlob)
|
||||||
|
sort.Slice(matches, func(i, j int) bool {
|
||||||
|
// /sys/class/kfd/kfd/topology/nodes/<number>/properties
|
||||||
|
a, err := strconv.ParseInt(filepath.Base(filepath.Dir(matches[i])), 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
slog.Debug("parse err", "error", err, "match", matches[i])
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
b, err := strconv.ParseInt(filepath.Base(filepath.Dir(matches[j])), 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
slog.Debug("parse err", "error", err, "match", matches[i])
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return a < b
|
||||||
|
})
|
||||||
cpuCount := 0
|
cpuCount := 0
|
||||||
for _, match := range matches {
|
for _, match := range matches {
|
||||||
slog.Debug("evaluating amdgpu node " + match)
|
slog.Debug("evaluating amdgpu node " + match)
|
||||||
|
|
Loading…
Reference in a new issue