Implement linux NUMA detection
If the system has multiple numa nodes, enable numa support in llama.cpp If we detect numactl in the path, use that, else use the basic "distribute" mode.
This commit is contained in:
parent
39f2bc6bfc
commit
f457d63400
3 changed files with 29 additions and 4 deletions
|
@ -231,7 +231,6 @@ type Options struct {
|
||||||
|
|
||||||
// Runner options which must be set when the model is loaded into memory
|
// Runner options which must be set when the model is loaded into memory
|
||||||
type Runner struct {
|
type Runner struct {
|
||||||
UseNUMA bool `json:"numa,omitempty"`
|
|
||||||
NumCtx int `json:"num_ctx,omitempty"`
|
NumCtx int `json:"num_ctx,omitempty"`
|
||||||
NumBatch int `json:"num_batch,omitempty"`
|
NumBatch int `json:"num_batch,omitempty"`
|
||||||
NumGPU int `json:"num_gpu,omitempty"`
|
NumGPU int `json:"num_gpu,omitempty"`
|
||||||
|
@ -615,7 +614,6 @@ func DefaultOptions() Options {
|
||||||
F16KV: true,
|
F16KV: true,
|
||||||
UseMLock: false,
|
UseMLock: false,
|
||||||
UseMMap: nil,
|
UseMMap: nil,
|
||||||
UseNUMA: false,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,11 @@
|
||||||
package gpu
|
package gpu
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"golang.org/x/sys/cpu"
|
"golang.org/x/sys/cpu"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -14,3 +19,19 @@ func GetCPUCapability() CPUCapability {
|
||||||
// else LCD
|
// else LCD
|
||||||
return CPUCapabilityNone
|
return CPUCapabilityNone
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func IsNUMA() bool {
|
||||||
|
if runtime.GOOS != "linux" {
|
||||||
|
// numa support in llama.cpp is linux only
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
ids := map[string]interface{}{}
|
||||||
|
packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id")
|
||||||
|
for _, packageId := range packageIds {
|
||||||
|
id, err := os.ReadFile(packageId)
|
||||||
|
if err == nil {
|
||||||
|
ids[strings.TrimSpace(string(id))] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return len(ids) > 1
|
||||||
|
}
|
||||||
|
|
|
@ -256,8 +256,14 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||||
params = append(params, "--mlock")
|
params = append(params, "--mlock")
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts.UseNUMA {
|
if gpu.IsNUMA() {
|
||||||
params = append(params, "--numa")
|
numaMode := "distribute"
|
||||||
|
if runtime.GOOS == "linux" {
|
||||||
|
if _, err := exec.LookPath("numactl"); err == nil {
|
||||||
|
numaMode = "numactl"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
params = append(params, "--numa", numaMode)
|
||||||
}
|
}
|
||||||
|
|
||||||
params = append(params, "--parallel", strconv.Itoa(numParallel))
|
params = append(params, "--parallel", strconv.Itoa(numParallel))
|
||||||
|
|
Loading…
Reference in a new issue