24636dfa87
On windows, detect large multi-socket systems and reduce to the number of cores in one socket for best performance
234 lines
6.5 KiB
Go
234 lines
6.5 KiB
Go
package gpu
|
|
|
|
import (
|
|
"fmt"
|
|
"log/slog"
|
|
"syscall"
|
|
"unsafe"
|
|
)
|
|
|
|
type MEMORYSTATUSEX struct {
|
|
length uint32
|
|
MemoryLoad uint32
|
|
TotalPhys uint64
|
|
AvailPhys uint64
|
|
TotalPageFile uint64
|
|
AvailPageFile uint64
|
|
TotalVirtual uint64
|
|
AvailVirtual uint64
|
|
AvailExtendedVirtual uint64
|
|
}
|
|
|
|
var (
|
|
k32 = syscall.NewLazyDLL("kernel32.dll")
|
|
globalMemoryStatusExProc = k32.NewProc("GlobalMemoryStatusEx")
|
|
sizeofMemoryStatusEx = uint32(unsafe.Sizeof(MEMORYSTATUSEX{}))
|
|
GetLogicalProcessorInformationEx = k32.NewProc("GetLogicalProcessorInformationEx")
|
|
)
|
|
|
|
var CudartGlobs = []string{
|
|
"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
|
|
}
|
|
|
|
var NvmlGlobs = []string{
|
|
"c:\\Windows\\System32\\nvml.dll",
|
|
}
|
|
|
|
var NvcudaGlobs = []string{
|
|
"c:\\windows\\system*\\nvcuda.dll",
|
|
}
|
|
|
|
var OneapiGlobs = []string{
|
|
"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
|
|
}
|
|
|
|
var (
|
|
CudartMgmtName = "cudart64_*.dll"
|
|
NvcudaMgmtName = "nvcuda.dll"
|
|
NvmlMgmtName = "nvml.dll"
|
|
OneapiMgmtName = "ze_intel_gpu64.dll"
|
|
)
|
|
|
|
func GetCPUMem() (memInfo, error) {
|
|
memStatus := MEMORYSTATUSEX{length: sizeofMemoryStatusEx}
|
|
r1, _, err := globalMemoryStatusExProc.Call(uintptr(unsafe.Pointer(&memStatus)))
|
|
if r1 == 0 {
|
|
return memInfo{}, fmt.Errorf("GlobalMemoryStatusEx failed: %w", err)
|
|
}
|
|
return memInfo{TotalMemory: memStatus.TotalPhys, FreeMemory: memStatus.AvailPhys, FreeSwap: memStatus.AvailPageFile}, nil
|
|
}
|
|
|
|
type LOGICAL_PROCESSOR_RELATIONSHIP uint32
|
|
|
|
const (
|
|
RelationProcessorCore LOGICAL_PROCESSOR_RELATIONSHIP = iota
|
|
RelationNumaNode
|
|
RelationCache
|
|
RelationProcessorPackage
|
|
RelationGroup
|
|
RelationProcessorDie
|
|
RelationNumaNodeEx
|
|
RelationProcessorModule
|
|
)
|
|
const RelationAll LOGICAL_PROCESSOR_RELATIONSHIP = 0xffff
|
|
|
|
type GROUP_AFFINITY struct {
|
|
Mask uintptr // KAFFINITY
|
|
Group uint16
|
|
Reserved [3]uint16
|
|
}
|
|
|
|
type PROCESSOR_RELATIONSHIP struct {
|
|
Flags byte
|
|
EfficiencyClass byte
|
|
Reserved [20]byte
|
|
GroupCount uint16
|
|
GroupMask [1]GROUP_AFFINITY // len GroupCount
|
|
}
|
|
|
|
// Omitted unused structs: NUMA_NODE_RELATIONSHIP CACHE_RELATIONSHIP GROUP_RELATIONSHIP
|
|
|
|
type SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX struct {
|
|
Relationship LOGICAL_PROCESSOR_RELATIONSHIP
|
|
Size uint32
|
|
U [1]byte // Union len Size
|
|
// PROCESSOR_RELATIONSHIP
|
|
// NUMA_NODE_RELATIONSHIP
|
|
// CACHE_RELATIONSHIP
|
|
// GROUP_RELATIONSHIP
|
|
}
|
|
|
|
func (group *GROUP_AFFINITY) IsMember(target *GROUP_AFFINITY) bool {
|
|
if group == nil || target == nil {
|
|
return false
|
|
}
|
|
return group.Mask&target.Mask != 0
|
|
}
|
|
|
|
type winPackage struct {
|
|
groups []*GROUP_AFFINITY
|
|
coreCount int // performance cores = coreCount - efficiencyCoreCount
|
|
efficiencyCoreCount int
|
|
threadCount int
|
|
}
|
|
|
|
func (pkg *winPackage) IsMember(target *GROUP_AFFINITY) bool {
|
|
for _, group := range pkg.groups {
|
|
if group.IsMember(target) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func getLogicalProcessorInformationEx() ([]byte, error) {
|
|
buf := make([]byte, 1)
|
|
bufSize := len(buf)
|
|
ret, _, err := GetLogicalProcessorInformationEx.Call(
|
|
uintptr(RelationAll),
|
|
uintptr(unsafe.Pointer(&buf[0])),
|
|
uintptr(unsafe.Pointer(&bufSize)),
|
|
)
|
|
if ret != 0 {
|
|
return nil, fmt.Errorf("failed to determine size info ret:%d %w", ret, err)
|
|
}
|
|
|
|
buf = make([]byte, bufSize)
|
|
ret, _, err = GetLogicalProcessorInformationEx.Call(
|
|
uintptr(RelationAll),
|
|
uintptr(unsafe.Pointer(&buf[0])),
|
|
uintptr(unsafe.Pointer(&bufSize)),
|
|
)
|
|
if ret == 0 {
|
|
return nil, fmt.Errorf("failed to gather processor information ret:%d buflen:%d %w", ret, bufSize, err)
|
|
}
|
|
return buf, nil
|
|
}
|
|
|
|
func processSystemLogicalProcessorInforationList(buf []byte) []*winPackage {
|
|
var slpi *SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX
|
|
// Find all the packages first
|
|
packages := []*winPackage{}
|
|
for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
|
|
slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
|
|
if slpi.Relationship != RelationProcessorPackage {
|
|
continue
|
|
}
|
|
pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
|
|
pkg := &winPackage{}
|
|
ga0 := unsafe.Pointer(&pr.GroupMask[0])
|
|
for j := range pr.GroupCount {
|
|
gm := (*GROUP_AFFINITY)(unsafe.Pointer(uintptr(ga0) + uintptr(j)*unsafe.Sizeof(GROUP_AFFINITY{})))
|
|
pkg.groups = append(pkg.groups, gm)
|
|
}
|
|
packages = append(packages, pkg)
|
|
}
|
|
|
|
slog.Info("packages", "count", len(packages))
|
|
|
|
// To identify efficiency cores we have to compare the relative values
|
|
// Larger values are "less efficient" (aka, more performant)
|
|
var maxEfficiencyClass byte
|
|
for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
|
|
slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
|
|
if slpi.Relationship != RelationProcessorCore {
|
|
continue
|
|
}
|
|
pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
|
|
if pr.EfficiencyClass > maxEfficiencyClass {
|
|
maxEfficiencyClass = pr.EfficiencyClass
|
|
}
|
|
}
|
|
if maxEfficiencyClass > 0 {
|
|
slog.Info("efficiency cores detected", "maxEfficiencyClass", maxEfficiencyClass)
|
|
}
|
|
|
|
// then match up the Cores to the Packages, count up cores, threads and efficiency cores
|
|
for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
|
|
slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
|
|
if slpi.Relationship != RelationProcessorCore {
|
|
continue
|
|
}
|
|
pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
|
|
ga0 := unsafe.Pointer(&pr.GroupMask[0])
|
|
for j := range pr.GroupCount {
|
|
gm := (*GROUP_AFFINITY)(unsafe.Pointer(uintptr(ga0) + uintptr(j)*unsafe.Sizeof(GROUP_AFFINITY{})))
|
|
for _, pkg := range packages {
|
|
if pkg.IsMember(gm) {
|
|
pkg.coreCount++
|
|
if pr.Flags == 0 {
|
|
pkg.threadCount++
|
|
} else {
|
|
pkg.threadCount += 2
|
|
}
|
|
if pr.EfficiencyClass < maxEfficiencyClass {
|
|
pkg.efficiencyCoreCount++
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sumarize the results
|
|
for i, pkg := range packages {
|
|
slog.Info("", "package", i, "cores", pkg.coreCount, "efficiency", pkg.efficiencyCoreCount, "threads", pkg.threadCount)
|
|
}
|
|
|
|
return packages
|
|
}
|
|
|
|
func GetCPUDetails() ([]CPU, error) {
|
|
buf, err := getLogicalProcessorInformationEx()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
packages := processSystemLogicalProcessorInforationList(buf)
|
|
cpus := make([]CPU, len(packages))
|
|
|
|
for i, pkg := range packages {
|
|
cpus[i].CoreCount = pkg.coreCount
|
|
cpus[i].EfficiencyCoreCount = pkg.efficiencyCoreCount
|
|
cpus[i].ThreadCount = pkg.threadCount
|
|
}
|
|
return cpus, nil
|
|
}
|