Merge pull request #6186 from dhiltgen/numa

Implement linux NUMA detection
2024-08-05 15:20:06 -07:00 · 2024-08-05 15:20:06 -07:00 · 50ee8b5f56
commit 50ee8b5f56
parent 03bdac0595 f457d63400
3 changed files with 29 additions and 4 deletions
--- a/api/types.go
+++ b/api/types.go
@ -231,7 +231,6 @@ type Options struct {
 // Runner options which must be set when the model is loaded into memory
 type Runner struct {
 	UseNUMA   bool  `json:"numa,omitempty"`
 	NumCtx    int   `json:"num_ctx,omitempty"`
 	NumBatch  int   `json:"num_batch,omitempty"`
 	NumGPU    int   `json:"num_gpu,omitempty"`
@ -615,7 +614,6 @@ func DefaultOptions() Options {
 			F16KV:     true,
 			UseMLock:  false,
 			UseMMap:   nil,
 			UseNUMA:   false,
 		},
 	}
 }
--- a/gpu/cpu_common.go
+++ b/gpu/cpu_common.go
@ -1,6 +1,11 @@
 package gpu
 import (
 	"os"
 	"path/filepath"
 	"runtime"
 	"strings"
 	"golang.org/x/sys/cpu"
 )
@ -14,3 +19,19 @@ func GetCPUCapability() CPUCapability {
 	// else LCD
 	return CPUCapabilityNone
 }
 func IsNUMA() bool {
 	if runtime.GOOS != "linux" {
 		// numa support in llama.cpp is linux only
 		return false
 	}
 	ids := map[string]interface{}{}
 	packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id")
 	for _, packageId := range packageIds {
 		id, err := os.ReadFile(packageId)
 		if err == nil {
 			ids[strings.TrimSpace(string(id))] = struct{}{}
 		}
 	}
 	return len(ids) > 1
 }
--- a/llm/server.go
+++ b/llm/server.go
@ -256,8 +256,14 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 		params = append(params, "--mlock")
 	}
-	if opts.UseNUMA {
+	if gpu.IsNUMA() {
-		params = append(params, "--numa")
+		numaMode := "distribute"
 		if runtime.GOOS == "linux" {
 			if _, err := exec.LookPath("numactl"); err == nil {
 				numaMode = "numactl"
 			}
 		}
 		params = append(params, "--numa", numaMode)
 	}
 	params = append(params, "--parallel", strconv.Itoa(numParallel))