From f457d63400f9859acdfff1853c53af13429acea5 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Mon, 5 Aug 2024 12:56:20 -0700 Subject: [PATCH] Implement linux NUMA detection If the system has multiple numa nodes, enable numa support in llama.cpp If we detect numactl in the path, use that, else use the basic "distribute" mode. --- api/types.go | 2 -- gpu/cpu_common.go | 21 +++++++++++++++++++++ llm/server.go | 10 ++++++++-- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/api/types.go b/api/types.go index c2529652..291522a3 100644 --- a/api/types.go +++ b/api/types.go @@ -231,7 +231,6 @@ type Options struct { // Runner options which must be set when the model is loaded into memory type Runner struct { - UseNUMA bool `json:"numa,omitempty"` NumCtx int `json:"num_ctx,omitempty"` NumBatch int `json:"num_batch,omitempty"` NumGPU int `json:"num_gpu,omitempty"` @@ -615,7 +614,6 @@ func DefaultOptions() Options { F16KV: true, UseMLock: false, UseMMap: nil, - UseNUMA: false, }, } } diff --git a/gpu/cpu_common.go b/gpu/cpu_common.go index 63e88f25..34edcdc5 100644 --- a/gpu/cpu_common.go +++ b/gpu/cpu_common.go @@ -1,6 +1,11 @@ package gpu import ( + "os" + "path/filepath" + "runtime" + "strings" + "golang.org/x/sys/cpu" ) @@ -14,3 +19,19 @@ func GetCPUCapability() CPUCapability { // else LCD return CPUCapabilityNone } + +func IsNUMA() bool { + if runtime.GOOS != "linux" { + // numa support in llama.cpp is linux only + return false + } + ids := map[string]interface{}{} + packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id") + for _, packageId := range packageIds { + id, err := os.ReadFile(packageId) + if err == nil { + ids[strings.TrimSpace(string(id))] = struct{}{} + } + } + return len(ids) > 1 +} diff --git a/llm/server.go b/llm/server.go index 7abc3bd7..152b7582 100644 --- a/llm/server.go +++ b/llm/server.go @@ -256,8 +256,14 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr params = append(params, "--mlock") } - if opts.UseNUMA { - params = append(params, "--numa") + if gpu.IsNUMA() { + numaMode := "distribute" + if runtime.GOOS == "linux" { + if _, err := exec.LookPath("numactl"); err == nil { + numaMode = "numactl" + } + } + params = append(params, "--numa", numaMode) } params = append(params, "--parallel", strconv.Itoa(numParallel))