diff --git a/api/types.go b/api/types.go index c2529652..291522a3 100644 --- a/api/types.go +++ b/api/types.go @@ -231,7 +231,6 @@ type Options struct { // Runner options which must be set when the model is loaded into memory type Runner struct { - UseNUMA bool `json:"numa,omitempty"` NumCtx int `json:"num_ctx,omitempty"` NumBatch int `json:"num_batch,omitempty"` NumGPU int `json:"num_gpu,omitempty"` @@ -615,7 +614,6 @@ func DefaultOptions() Options { F16KV: true, UseMLock: false, UseMMap: nil, - UseNUMA: false, }, } } diff --git a/gpu/cpu_common.go b/gpu/cpu_common.go index 63e88f25..34edcdc5 100644 --- a/gpu/cpu_common.go +++ b/gpu/cpu_common.go @@ -1,6 +1,11 @@ package gpu import ( + "os" + "path/filepath" + "runtime" + "strings" + "golang.org/x/sys/cpu" ) @@ -14,3 +19,19 @@ func GetCPUCapability() CPUCapability { // else LCD return CPUCapabilityNone } + +func IsNUMA() bool { + if runtime.GOOS != "linux" { + // numa support in llama.cpp is linux only + return false + } + ids := map[string]interface{}{} + packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id") + for _, packageId := range packageIds { + id, err := os.ReadFile(packageId) + if err == nil { + ids[strings.TrimSpace(string(id))] = struct{}{} + } + } + return len(ids) > 1 +} diff --git a/llm/server.go b/llm/server.go index 7abc3bd7..152b7582 100644 --- a/llm/server.go +++ b/llm/server.go @@ -256,8 +256,14 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr params = append(params, "--mlock") } - if opts.UseNUMA { - params = append(params, "--numa") + if gpu.IsNUMA() { + numaMode := "distribute" + if runtime.GOOS == "linux" { + if _, err := exec.LookPath("numactl"); err == nil { + numaMode = "numactl" + } + } + params = append(params, "--numa", numaMode) } params = append(params, "--parallel", strconv.Itoa(numParallel))