diff --git a/api/types.go b/api/types.go
index c2529652..291522a3 100644
--- a/api/types.go
+++ b/api/types.go
@@ -231,7 +231,6 @@ type Options struct {
 
 // Runner options which must be set when the model is loaded into memory
 type Runner struct {
-	UseNUMA   bool  `json:"numa,omitempty"`
 	NumCtx    int   `json:"num_ctx,omitempty"`
 	NumBatch  int   `json:"num_batch,omitempty"`
 	NumGPU    int   `json:"num_gpu,omitempty"`
@@ -615,7 +614,6 @@ func DefaultOptions() Options {
 			F16KV:     true,
 			UseMLock:  false,
 			UseMMap:   nil,
-			UseNUMA:   false,
 		},
 	}
 }
diff --git a/gpu/cpu_common.go b/gpu/cpu_common.go
index 63e88f25..34edcdc5 100644
--- a/gpu/cpu_common.go
+++ b/gpu/cpu_common.go
@@ -1,6 +1,11 @@
 package gpu
 
 import (
+	"os"
+	"path/filepath"
+	"runtime"
+	"strings"
+
 	"golang.org/x/sys/cpu"
 )
 
@@ -14,3 +19,19 @@ func GetCPUCapability() CPUCapability {
 	// else LCD
 	return CPUCapabilityNone
 }
+
+func IsNUMA() bool {
+	if runtime.GOOS != "linux" {
+		// numa support in llama.cpp is linux only
+		return false
+	}
+	ids := map[string]interface{}{}
+	packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id")
+	for _, packageId := range packageIds {
+		id, err := os.ReadFile(packageId)
+		if err == nil {
+			ids[strings.TrimSpace(string(id))] = struct{}{}
+		}
+	}
+	return len(ids) > 1
+}
diff --git a/llm/server.go b/llm/server.go
index 7abc3bd7..152b7582 100644
--- a/llm/server.go
+++ b/llm/server.go
@@ -256,8 +256,14 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 		params = append(params, "--mlock")
 	}
 
-	if opts.UseNUMA {
-		params = append(params, "--numa")
+	if gpu.IsNUMA() {
+		numaMode := "distribute"
+		if runtime.GOOS == "linux" {
+			if _, err := exec.LookPath("numactl"); err == nil {
+				numaMode = "numactl"
+			}
+		}
+		params = append(params, "--numa", numaMode)
 	}
 
 	params = append(params, "--parallel", strconv.Itoa(numParallel))