Discovery CPU details for default thread selection (#6264)
On windows, detect large multi-socket systems and reduce to the number of cores in one socket for best performance
This commit is contained in:
parent
1d7fa3ad2d
commit
24636dfa87
7 changed files with 408 additions and 24 deletions
|
@ -229,7 +229,10 @@ func GetGPUInfo() GpuInfoList {
|
||||||
slog.Warn("error looking up system memory", "error", err)
|
slog.Warn("error looking up system memory", "error", err)
|
||||||
}
|
}
|
||||||
depPath := LibraryDir()
|
depPath := LibraryDir()
|
||||||
|
details, err := GetCPUDetails()
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("failed to lookup CPU details", "error", err)
|
||||||
|
}
|
||||||
cpus = []CPUInfo{
|
cpus = []CPUInfo{
|
||||||
{
|
{
|
||||||
GpuInfo: GpuInfo{
|
GpuInfo: GpuInfo{
|
||||||
|
@ -239,6 +242,7 @@ func GetGPUInfo() GpuInfoList {
|
||||||
ID: "0",
|
ID: "0",
|
||||||
DependencyPath: depPath,
|
DependencyPath: depPath,
|
||||||
},
|
},
|
||||||
|
CPUs: details,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,9 @@ package gpu
|
||||||
import "C"
|
import "C"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"log/slog"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"syscall"
|
||||||
|
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
)
|
)
|
||||||
|
@ -69,11 +71,30 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
|
||||||
|
|
||||||
func GetSystemInfo() SystemInfo {
|
func GetSystemInfo() SystemInfo {
|
||||||
mem, _ := GetCPUMem()
|
mem, _ := GetCPUMem()
|
||||||
|
query := "hw.perflevel0.physicalcpu"
|
||||||
|
perfCores, err := syscall.SysctlUint32(query)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("failed to discover physical CPU details", "query", query, "error", err)
|
||||||
|
}
|
||||||
|
query = "hw.perflevel1.physicalcpu"
|
||||||
|
efficiencyCores, _ := syscall.SysctlUint32(query) // On x86 xeon this wont return data
|
||||||
|
|
||||||
|
// Determine thread count
|
||||||
|
query = "hw.logicalcpu"
|
||||||
|
logicalCores, _ := syscall.SysctlUint32(query)
|
||||||
|
|
||||||
return SystemInfo{
|
return SystemInfo{
|
||||||
System: CPUInfo{
|
System: CPUInfo{
|
||||||
GpuInfo: GpuInfo{
|
GpuInfo: GpuInfo{
|
||||||
memInfo: mem,
|
memInfo: mem,
|
||||||
},
|
},
|
||||||
|
CPUs: []CPU{
|
||||||
|
{
|
||||||
|
CoreCount: int(perfCores + efficiencyCores),
|
||||||
|
EfficiencyCoreCount: int(efficiencyCores),
|
||||||
|
ThreadCount: int(logicalCores),
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
GPUs: GetGPUInfo(),
|
GPUs: GetGPUInfo(),
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,8 @@ import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
"reflect"
|
||||||
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
|
@ -90,3 +92,95 @@ func GetCPUMem() (memInfo, error) {
|
||||||
}
|
}
|
||||||
return mem, nil
|
return mem, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const CpuInfoFilename = "/proc/cpuinfo"
|
||||||
|
|
||||||
|
type linuxCpuInfo struct {
|
||||||
|
ID string `cpuinfo:"processor"`
|
||||||
|
VendorID string `cpuinfo:"vendor_id"`
|
||||||
|
ModelName string `cpuinfo:"model name"`
|
||||||
|
PhysicalID string `cpuinfo:"physical id"`
|
||||||
|
Siblings string `cpuinfo:"siblings"`
|
||||||
|
CoreID string `cpuinfo:"core id"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetCPUDetails() ([]CPU, error) {
|
||||||
|
file, err := os.Open(CpuInfoFilename)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
reColumns := regexp.MustCompile("\t+: ")
|
||||||
|
scanner := bufio.NewScanner(file)
|
||||||
|
cpuInfos := []linuxCpuInfo{}
|
||||||
|
cpu := &linuxCpuInfo{}
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := scanner.Text()
|
||||||
|
if sl := reColumns.Split(line, 2); len(sl) > 1 {
|
||||||
|
t := reflect.TypeOf(cpu).Elem()
|
||||||
|
s := reflect.ValueOf(cpu).Elem()
|
||||||
|
for i := range t.NumField() {
|
||||||
|
field := t.Field(i)
|
||||||
|
tag := field.Tag.Get("cpuinfo")
|
||||||
|
if tag == sl[0] {
|
||||||
|
s.FieldByName(field.Name).SetString(sl[1])
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if strings.TrimSpace(line) == "" && cpu.ID != "" {
|
||||||
|
cpuInfos = append(cpuInfos, *cpu)
|
||||||
|
cpu = &linuxCpuInfo{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process the sockets/cores/threads
|
||||||
|
socketByID := map[string]*CPU{}
|
||||||
|
coreBySocket := map[string]map[string]struct{}{}
|
||||||
|
threadsByCoreBySocket := map[string]map[string]int{}
|
||||||
|
for _, c := range cpuInfos {
|
||||||
|
if _, found := socketByID[c.PhysicalID]; !found {
|
||||||
|
socketByID[c.PhysicalID] = &CPU{
|
||||||
|
ID: c.PhysicalID,
|
||||||
|
VendorID: c.VendorID,
|
||||||
|
ModelName: c.ModelName,
|
||||||
|
}
|
||||||
|
coreBySocket[c.PhysicalID] = map[string]struct{}{}
|
||||||
|
threadsByCoreBySocket[c.PhysicalID] = map[string]int{}
|
||||||
|
}
|
||||||
|
if c.CoreID != "" {
|
||||||
|
coreBySocket[c.PhysicalID][c.PhysicalID+":"+c.CoreID] = struct{}{}
|
||||||
|
threadsByCoreBySocket[c.PhysicalID][c.PhysicalID+":"+c.CoreID]++
|
||||||
|
} else {
|
||||||
|
coreBySocket[c.PhysicalID][c.PhysicalID+":"+c.ID] = struct{}{}
|
||||||
|
threadsByCoreBySocket[c.PhysicalID][c.PhysicalID+":"+c.ID]++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tally up the values from the tracking maps
|
||||||
|
for id, s := range socketByID {
|
||||||
|
s.CoreCount = len(coreBySocket[id])
|
||||||
|
s.ThreadCount = 0
|
||||||
|
for _, tc := range threadsByCoreBySocket[id] {
|
||||||
|
s.ThreadCount += tc
|
||||||
|
}
|
||||||
|
|
||||||
|
// This only works if HT is enabled, consider a more reliable model, maybe cache size comparisons?
|
||||||
|
efficiencyCoreCount := 0
|
||||||
|
for _, threads := range threadsByCoreBySocket[id] {
|
||||||
|
if threads == 1 {
|
||||||
|
efficiencyCoreCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if efficiencyCoreCount == s.CoreCount {
|
||||||
|
// 1:1 mapping means they're not actually efficiency cores, but regular cores
|
||||||
|
s.EfficiencyCoreCount = 0
|
||||||
|
} else {
|
||||||
|
s.EfficiencyCoreCount = efficiencyCoreCount
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result := []CPU{}
|
||||||
|
for _, c := range socketByID {
|
||||||
|
result = append(result, *c)
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@ package gpu
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
"syscall"
|
"syscall"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
)
|
)
|
||||||
|
@ -22,6 +23,7 @@ var (
|
||||||
k32 = syscall.NewLazyDLL("kernel32.dll")
|
k32 = syscall.NewLazyDLL("kernel32.dll")
|
||||||
globalMemoryStatusExProc = k32.NewProc("GlobalMemoryStatusEx")
|
globalMemoryStatusExProc = k32.NewProc("GlobalMemoryStatusEx")
|
||||||
sizeofMemoryStatusEx = uint32(unsafe.Sizeof(MEMORYSTATUSEX{}))
|
sizeofMemoryStatusEx = uint32(unsafe.Sizeof(MEMORYSTATUSEX{}))
|
||||||
|
GetLogicalProcessorInformationEx = k32.NewProc("GetLogicalProcessorInformationEx")
|
||||||
)
|
)
|
||||||
|
|
||||||
var CudartGlobs = []string{
|
var CudartGlobs = []string{
|
||||||
|
@ -55,3 +57,178 @@ func GetCPUMem() (memInfo, error) {
|
||||||
}
|
}
|
||||||
return memInfo{TotalMemory: memStatus.TotalPhys, FreeMemory: memStatus.AvailPhys, FreeSwap: memStatus.AvailPageFile}, nil
|
return memInfo{TotalMemory: memStatus.TotalPhys, FreeMemory: memStatus.AvailPhys, FreeSwap: memStatus.AvailPageFile}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type LOGICAL_PROCESSOR_RELATIONSHIP uint32
|
||||||
|
|
||||||
|
const (
|
||||||
|
RelationProcessorCore LOGICAL_PROCESSOR_RELATIONSHIP = iota
|
||||||
|
RelationNumaNode
|
||||||
|
RelationCache
|
||||||
|
RelationProcessorPackage
|
||||||
|
RelationGroup
|
||||||
|
RelationProcessorDie
|
||||||
|
RelationNumaNodeEx
|
||||||
|
RelationProcessorModule
|
||||||
|
)
|
||||||
|
const RelationAll LOGICAL_PROCESSOR_RELATIONSHIP = 0xffff
|
||||||
|
|
||||||
|
type GROUP_AFFINITY struct {
|
||||||
|
Mask uintptr // KAFFINITY
|
||||||
|
Group uint16
|
||||||
|
Reserved [3]uint16
|
||||||
|
}
|
||||||
|
|
||||||
|
type PROCESSOR_RELATIONSHIP struct {
|
||||||
|
Flags byte
|
||||||
|
EfficiencyClass byte
|
||||||
|
Reserved [20]byte
|
||||||
|
GroupCount uint16
|
||||||
|
GroupMask [1]GROUP_AFFINITY // len GroupCount
|
||||||
|
}
|
||||||
|
|
||||||
|
// Omitted unused structs: NUMA_NODE_RELATIONSHIP CACHE_RELATIONSHIP GROUP_RELATIONSHIP
|
||||||
|
|
||||||
|
type SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX struct {
|
||||||
|
Relationship LOGICAL_PROCESSOR_RELATIONSHIP
|
||||||
|
Size uint32
|
||||||
|
U [1]byte // Union len Size
|
||||||
|
// PROCESSOR_RELATIONSHIP
|
||||||
|
// NUMA_NODE_RELATIONSHIP
|
||||||
|
// CACHE_RELATIONSHIP
|
||||||
|
// GROUP_RELATIONSHIP
|
||||||
|
}
|
||||||
|
|
||||||
|
func (group *GROUP_AFFINITY) IsMember(target *GROUP_AFFINITY) bool {
|
||||||
|
if group == nil || target == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return group.Mask&target.Mask != 0
|
||||||
|
}
|
||||||
|
|
||||||
|
type winPackage struct {
|
||||||
|
groups []*GROUP_AFFINITY
|
||||||
|
coreCount int // performance cores = coreCount - efficiencyCoreCount
|
||||||
|
efficiencyCoreCount int
|
||||||
|
threadCount int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pkg *winPackage) IsMember(target *GROUP_AFFINITY) bool {
|
||||||
|
for _, group := range pkg.groups {
|
||||||
|
if group.IsMember(target) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func getLogicalProcessorInformationEx() ([]byte, error) {
|
||||||
|
buf := make([]byte, 1)
|
||||||
|
bufSize := len(buf)
|
||||||
|
ret, _, err := GetLogicalProcessorInformationEx.Call(
|
||||||
|
uintptr(RelationAll),
|
||||||
|
uintptr(unsafe.Pointer(&buf[0])),
|
||||||
|
uintptr(unsafe.Pointer(&bufSize)),
|
||||||
|
)
|
||||||
|
if ret != 0 {
|
||||||
|
return nil, fmt.Errorf("failed to determine size info ret:%d %w", ret, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
buf = make([]byte, bufSize)
|
||||||
|
ret, _, err = GetLogicalProcessorInformationEx.Call(
|
||||||
|
uintptr(RelationAll),
|
||||||
|
uintptr(unsafe.Pointer(&buf[0])),
|
||||||
|
uintptr(unsafe.Pointer(&bufSize)),
|
||||||
|
)
|
||||||
|
if ret == 0 {
|
||||||
|
return nil, fmt.Errorf("failed to gather processor information ret:%d buflen:%d %w", ret, bufSize, err)
|
||||||
|
}
|
||||||
|
return buf, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func processSystemLogicalProcessorInforationList(buf []byte) []*winPackage {
|
||||||
|
var slpi *SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX
|
||||||
|
// Find all the packages first
|
||||||
|
packages := []*winPackage{}
|
||||||
|
for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
|
||||||
|
slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
|
||||||
|
if slpi.Relationship != RelationProcessorPackage {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
|
||||||
|
pkg := &winPackage{}
|
||||||
|
ga0 := unsafe.Pointer(&pr.GroupMask[0])
|
||||||
|
for j := range pr.GroupCount {
|
||||||
|
gm := (*GROUP_AFFINITY)(unsafe.Pointer(uintptr(ga0) + uintptr(j)*unsafe.Sizeof(GROUP_AFFINITY{})))
|
||||||
|
pkg.groups = append(pkg.groups, gm)
|
||||||
|
}
|
||||||
|
packages = append(packages, pkg)
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Info("packages", "count", len(packages))
|
||||||
|
|
||||||
|
// To identify efficiency cores we have to compare the relative values
|
||||||
|
// Larger values are "less efficient" (aka, more performant)
|
||||||
|
var maxEfficiencyClass byte
|
||||||
|
for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
|
||||||
|
slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
|
||||||
|
if slpi.Relationship != RelationProcessorCore {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
|
||||||
|
if pr.EfficiencyClass > maxEfficiencyClass {
|
||||||
|
maxEfficiencyClass = pr.EfficiencyClass
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if maxEfficiencyClass > 0 {
|
||||||
|
slog.Info("efficiency cores detected", "maxEfficiencyClass", maxEfficiencyClass)
|
||||||
|
}
|
||||||
|
|
||||||
|
// then match up the Cores to the Packages, count up cores, threads and efficiency cores
|
||||||
|
for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
|
||||||
|
slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
|
||||||
|
if slpi.Relationship != RelationProcessorCore {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
|
||||||
|
ga0 := unsafe.Pointer(&pr.GroupMask[0])
|
||||||
|
for j := range pr.GroupCount {
|
||||||
|
gm := (*GROUP_AFFINITY)(unsafe.Pointer(uintptr(ga0) + uintptr(j)*unsafe.Sizeof(GROUP_AFFINITY{})))
|
||||||
|
for _, pkg := range packages {
|
||||||
|
if pkg.IsMember(gm) {
|
||||||
|
pkg.coreCount++
|
||||||
|
if pr.Flags == 0 {
|
||||||
|
pkg.threadCount++
|
||||||
|
} else {
|
||||||
|
pkg.threadCount += 2
|
||||||
|
}
|
||||||
|
if pr.EfficiencyClass < maxEfficiencyClass {
|
||||||
|
pkg.efficiencyCoreCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sumarize the results
|
||||||
|
for i, pkg := range packages {
|
||||||
|
slog.Info("", "package", i, "cores", pkg.coreCount, "efficiency", pkg.efficiencyCoreCount, "threads", pkg.threadCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
return packages
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetCPUDetails() ([]CPU, error) {
|
||||||
|
buf, err := getLogicalProcessorInformationEx()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
packages := processSystemLogicalProcessorInforationList(buf)
|
||||||
|
cpus := make([]CPU, len(packages))
|
||||||
|
|
||||||
|
for i, pkg := range packages {
|
||||||
|
cpus[i].CoreCount = pkg.coreCount
|
||||||
|
cpus[i].EfficiencyCoreCount = pkg.efficiencyCoreCount
|
||||||
|
cpus[i].ThreadCount = pkg.threadCount
|
||||||
|
}
|
||||||
|
return cpus, nil
|
||||||
|
}
|
||||||
|
|
77
gpu/gpu_windows_test.go
Normal file
77
gpu/gpu_windows_test.go
Normal file
File diff suppressed because one or more lines are too long
24
gpu/types.go
24
gpu/types.go
|
@ -10,11 +10,11 @@ import (
|
||||||
type memInfo struct {
|
type memInfo struct {
|
||||||
TotalMemory uint64 `json:"total_memory,omitempty"`
|
TotalMemory uint64 `json:"total_memory,omitempty"`
|
||||||
FreeMemory uint64 `json:"free_memory,omitempty"`
|
FreeMemory uint64 `json:"free_memory,omitempty"`
|
||||||
FreeSwap uint64 `json:"free_swap,omitempty"`
|
FreeSwap uint64 `json:"free_swap,omitempty"` // TODO split this out for system only
|
||||||
}
|
}
|
||||||
|
|
||||||
// Beginning of an `ollama info` command
|
// Beginning of an `ollama info` command
|
||||||
type GpuInfo struct {
|
type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
|
||||||
memInfo
|
memInfo
|
||||||
Library string `json:"library,omitempty"`
|
Library string `json:"library,omitempty"`
|
||||||
|
|
||||||
|
@ -49,6 +49,17 @@ type GpuInfo struct {
|
||||||
|
|
||||||
type CPUInfo struct {
|
type CPUInfo struct {
|
||||||
GpuInfo
|
GpuInfo
|
||||||
|
CPUs []CPU
|
||||||
|
}
|
||||||
|
|
||||||
|
// CPU type represents a CPU Package occupying a socket
|
||||||
|
type CPU struct {
|
||||||
|
ID string `cpuinfo:"processor"`
|
||||||
|
VendorID string `cpuinfo:"vendor_id"`
|
||||||
|
ModelName string `cpuinfo:"model name"`
|
||||||
|
CoreCount int
|
||||||
|
EfficiencyCoreCount int // Performance = CoreCount - Efficiency
|
||||||
|
ThreadCount int
|
||||||
}
|
}
|
||||||
|
|
||||||
type CudaGPUInfo struct {
|
type CudaGPUInfo struct {
|
||||||
|
@ -158,3 +169,12 @@ type SystemInfo struct {
|
||||||
UnsupportedGPUs []UnsupportedGPUInfo `json:"unsupported_gpus"`
|
UnsupportedGPUs []UnsupportedGPUInfo `json:"unsupported_gpus"`
|
||||||
DiscoveryErrors []string `json:"discovery_errors"`
|
DiscoveryErrors []string `json:"discovery_errors"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return the optimal number of threads to use for inference
|
||||||
|
func (si SystemInfo) GetOptimalThreadCount() int {
|
||||||
|
if len(si.System.CPUs) == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
// Allocate thread count matching the performance cores on a single socket
|
||||||
|
return si.System.CPUs[0].CoreCount - si.System.CPUs[0].EfficiencyCoreCount
|
||||||
|
}
|
||||||
|
|
|
@ -98,15 +98,11 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||||
var systemFreeMemory uint64
|
var systemFreeMemory uint64
|
||||||
var systemSwapFreeMemory uint64
|
var systemSwapFreeMemory uint64
|
||||||
|
|
||||||
systemMemInfo, err := gpu.GetCPUMem()
|
systemInfo := gpu.GetSystemInfo()
|
||||||
if err != nil {
|
systemTotalMemory = systemInfo.System.TotalMemory
|
||||||
slog.Error("failed to lookup system memory", "error", err)
|
systemFreeMemory = systemInfo.System.FreeMemory
|
||||||
} else {
|
systemSwapFreeMemory = systemInfo.System.FreeSwap
|
||||||
systemTotalMemory = systemMemInfo.TotalMemory
|
|
||||||
systemFreeMemory = systemMemInfo.FreeMemory
|
|
||||||
systemSwapFreeMemory = systemMemInfo.FreeSwap
|
|
||||||
slog.Info("system memory", "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "free_swap", format.HumanBytes2(systemSwapFreeMemory))
|
slog.Info("system memory", "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "free_swap", format.HumanBytes2(systemSwapFreeMemory))
|
||||||
}
|
|
||||||
|
|
||||||
// If the user wants zero GPU layers, reset the gpu list to be CPU/system ram info
|
// If the user wants zero GPU layers, reset the gpu list to be CPU/system ram info
|
||||||
if opts.NumGPU == 0 {
|
if opts.NumGPU == 0 {
|
||||||
|
@ -217,8 +213,11 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||||
params = append(params, "--mmproj", projectors[0])
|
params = append(params, "--mmproj", projectors[0])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
defaultThreads := systemInfo.GetOptimalThreadCount()
|
||||||
if opts.NumThread > 0 {
|
if opts.NumThread > 0 {
|
||||||
params = append(params, "--threads", strconv.Itoa(opts.NumThread))
|
params = append(params, "--threads", strconv.Itoa(opts.NumThread))
|
||||||
|
} else if defaultThreads > 0 {
|
||||||
|
params = append(params, "--threads", strconv.Itoa(defaultThreads))
|
||||||
}
|
}
|
||||||
|
|
||||||
if !opts.F16KV {
|
if !opts.F16KV {
|
||||||
|
@ -260,15 +259,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||||
params = append(params, "--mlock")
|
params = append(params, "--mlock")
|
||||||
}
|
}
|
||||||
|
|
||||||
if gpu.IsNUMA() && gpus[0].Library == "cpu" {
|
// TODO - NUMA support currently doesn't work properly
|
||||||
numaMode := "distribute"
|
|
||||||
if runtime.GOOS == "linux" {
|
|
||||||
if _, err := exec.LookPath("numactl"); err == nil {
|
|
||||||
numaMode = "numactl"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
params = append(params, "--numa", numaMode)
|
|
||||||
}
|
|
||||||
|
|
||||||
params = append(params, "--parallel", strconv.Itoa(numParallel))
|
params = append(params, "--parallel", strconv.Itoa(numParallel))
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue