Merge pull request #6186 from dhiltgen/numa

Implement linux NUMA detection
This commit is contained in:
Daniel Hiltgen 2024-08-05 15:20:06 -07:00 committed by GitHub
commit 50ee8b5f56
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 29 additions and 4 deletions

View file

@ -231,7 +231,6 @@ type Options struct {
// Runner options which must be set when the model is loaded into memory // Runner options which must be set when the model is loaded into memory
type Runner struct { type Runner struct {
UseNUMA bool `json:"numa,omitempty"`
NumCtx int `json:"num_ctx,omitempty"` NumCtx int `json:"num_ctx,omitempty"`
NumBatch int `json:"num_batch,omitempty"` NumBatch int `json:"num_batch,omitempty"`
NumGPU int `json:"num_gpu,omitempty"` NumGPU int `json:"num_gpu,omitempty"`
@ -615,7 +614,6 @@ func DefaultOptions() Options {
F16KV: true, F16KV: true,
UseMLock: false, UseMLock: false,
UseMMap: nil, UseMMap: nil,
UseNUMA: false,
}, },
} }
} }

View file

@ -1,6 +1,11 @@
package gpu package gpu
import ( import (
"os"
"path/filepath"
"runtime"
"strings"
"golang.org/x/sys/cpu" "golang.org/x/sys/cpu"
) )
@ -14,3 +19,19 @@ func GetCPUCapability() CPUCapability {
// else LCD // else LCD
return CPUCapabilityNone return CPUCapabilityNone
} }
func IsNUMA() bool {
if runtime.GOOS != "linux" {
// numa support in llama.cpp is linux only
return false
}
ids := map[string]interface{}{}
packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id")
for _, packageId := range packageIds {
id, err := os.ReadFile(packageId)
if err == nil {
ids[strings.TrimSpace(string(id))] = struct{}{}
}
}
return len(ids) > 1
}

View file

@ -256,8 +256,14 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
params = append(params, "--mlock") params = append(params, "--mlock")
} }
if opts.UseNUMA { if gpu.IsNUMA() {
params = append(params, "--numa") numaMode := "distribute"
if runtime.GOOS == "linux" {
if _, err := exec.LookPath("numactl"); err == nil {
numaMode = "numactl"
}
}
params = append(params, "--numa", numaMode)
} }
params = append(params, "--parallel", strconv.Itoa(numParallel)) params = append(params, "--parallel", strconv.Itoa(numParallel))