Merge pull request #6186 from dhiltgen/numa
Implement linux NUMA detection
This commit is contained in:
commit
50ee8b5f56
3 changed files with 29 additions and 4 deletions
|
@ -231,7 +231,6 @@ type Options struct {
|
||||||
|
|
||||||
// Runner options which must be set when the model is loaded into memory
|
// Runner options which must be set when the model is loaded into memory
|
||||||
type Runner struct {
|
type Runner struct {
|
||||||
UseNUMA bool `json:"numa,omitempty"`
|
|
||||||
NumCtx int `json:"num_ctx,omitempty"`
|
NumCtx int `json:"num_ctx,omitempty"`
|
||||||
NumBatch int `json:"num_batch,omitempty"`
|
NumBatch int `json:"num_batch,omitempty"`
|
||||||
NumGPU int `json:"num_gpu,omitempty"`
|
NumGPU int `json:"num_gpu,omitempty"`
|
||||||
|
@ -615,7 +614,6 @@ func DefaultOptions() Options {
|
||||||
F16KV: true,
|
F16KV: true,
|
||||||
UseMLock: false,
|
UseMLock: false,
|
||||||
UseMMap: nil,
|
UseMMap: nil,
|
||||||
UseNUMA: false,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,11 @@
|
||||||
package gpu
|
package gpu
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"golang.org/x/sys/cpu"
|
"golang.org/x/sys/cpu"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -14,3 +19,19 @@ func GetCPUCapability() CPUCapability {
|
||||||
// else LCD
|
// else LCD
|
||||||
return CPUCapabilityNone
|
return CPUCapabilityNone
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func IsNUMA() bool {
|
||||||
|
if runtime.GOOS != "linux" {
|
||||||
|
// numa support in llama.cpp is linux only
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
ids := map[string]interface{}{}
|
||||||
|
packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id")
|
||||||
|
for _, packageId := range packageIds {
|
||||||
|
id, err := os.ReadFile(packageId)
|
||||||
|
if err == nil {
|
||||||
|
ids[strings.TrimSpace(string(id))] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return len(ids) > 1
|
||||||
|
}
|
||||||
|
|
|
@ -256,8 +256,14 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||||
params = append(params, "--mlock")
|
params = append(params, "--mlock")
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts.UseNUMA {
|
if gpu.IsNUMA() {
|
||||||
params = append(params, "--numa")
|
numaMode := "distribute"
|
||||||
|
if runtime.GOOS == "linux" {
|
||||||
|
if _, err := exec.LookPath("numactl"); err == nil {
|
||||||
|
numaMode = "numactl"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
params = append(params, "--numa", numaMode)
|
||||||
}
|
}
|
||||||
|
|
||||||
params = append(params, "--parallel", strconv.Itoa(numParallel))
|
params = append(params, "--parallel", strconv.Itoa(numParallel))
|
||||||
|
|
Loading…
Reference in a new issue