Merge pull request #1935 from dhiltgen/cpu_fallback

Fix up the CPU fallback selection
2024-01-11 15:52:32 -08:00 · 2024-01-11 15:52:32 -08:00 · 3773fb6465
commit 3773fb6465
parent f84537e0e0 7427fa1387
4 changed files with 29 additions and 16 deletions
--- a/gpu/gpu_darwin.go
+++ b/gpu/gpu_darwin.go
@ -34,7 +34,7 @@ func GetGPUInfo() GpuInfo {
 	mem, _ := getCPUMem()
 	if runtime.GOARCH == "amd64" {
 		return GpuInfo{
-			Library: "default",
+			Library: "cpu",
 			Variant: GetCPUVariant(),
 			memInfo: mem,
 		}
--- a/llm/llm.go
+++ b/llm/llm.go
@ -51,7 +51,6 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
 	graph := int64(ggml.NumGQA()) * kv / 6

 	info := gpu.GetGPUInfo()
-	library := info.Library
 	switch runtime.GOOS {
 	case "darwin":
 		if opts.NumGPU == 0 {
@ -60,13 +59,15 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)

 		if size+kv+graph > vram {
 			log.Println("not enough vram available, falling back to CPU only")
+			info.Library = "cpu"
+			info.Variant = gpu.GetCPUVariant()
 			opts.NumGPU = 0
 			break
 		}

 		opts.NumGPU = 1
 	default:
-		if library == "cpu" || library == "default" {
+		if info.Library == "cpu" {
 			log.Println("GPU not available, falling back to CPU")
 			opts.NumGPU = 0
 			break
@ -74,7 +75,8 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)

 		// don't use GPU at all if no layers are loaded
 		if opts.NumGPU == 0 {
-			library = "cpu"
+			info.Library = "cpu"
+			info.Variant = gpu.GetCPUVariant()
 			break
 		}

@ -101,7 +103,8 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
 		min := graph + kv*layers/maxlayers
 		if layers <= 0 || min > avg {
 			log.Printf("not enough vram available, falling back to CPU only")
-			library = "cpu"
+			info.Library = "cpu"
+			info.Variant = gpu.GetCPUVariant()
 			opts.NumGPU = 0
 			break
 		}
@ -111,8 +114,7 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)

 	opts.RopeFrequencyBase = 0.0
 	opts.RopeFrequencyScale = 0.0
-	gpuInfo := gpu.GetGPUInfo()
-	return newLlmServer(gpuInfo, model, adapters, projectors, opts)
+	return newLlmServer(info, model, adapters, projectors, opts)
 }

 // Give any native cgo implementations an opportunity to initialize
--- a/llm/payload_common.go
+++ b/llm/payload_common.go
@ -28,6 +28,13 @@ func getDynLibs(gpuInfo gpu.GpuInfo) []string {
 	if gpuInfo.Library == "default" {
 		return []string{"default"}
 	}
+	// TODO - temporary until we have multiple CPU variations for Darwin
+	// Short circuit on darwin with metal only
+	if len(availableDynLibs) == 1 {
+		if _, onlyMetal := availableDynLibs["metal"]; onlyMetal {
+			return []string{availableDynLibs["metal"]}
+		}
+	}

 	exactMatch := ""
 	dynLibs := []string{}
--- a/llm/payload_test.go
+++ b/llm/payload_test.go
@ -16,39 +16,43 @@ func TestGetDynLibs(t *testing.T) {
 	assert.Len(t, res, 1)
 	assert.Equal(t, availableDynLibs["cpu"], res[0])

+	variant := gpu.GetCPUVariant()
+	if variant != "" {
+		variant = "_" + variant
+	}
 	availableDynLibs = map[string]string{
-		"rocm_v5": "X_rocm_v5",
-		"rocm_v6": "X_rocm_v6",
-		"cpu":     "X_cpu",
+		"rocm_v5":       "X_rocm_v5",
+		"rocm_v6":       "X_rocm_v6",
+		"cpu" + variant: "X_cpu",
 	}
 	assert.Equal(t, true, rocmDynLibPresent())
 	res = getDynLibs(gpu.GpuInfo{Library: "rocm"})
 	assert.Len(t, res, 3)
 	assert.Equal(t, availableDynLibs["rocm_v5"], res[0])
 	assert.Equal(t, availableDynLibs["rocm_v6"], res[1])
-	assert.Equal(t, availableDynLibs["cpu"], res[2])
+	assert.Equal(t, availableDynLibs["cpu"+variant], res[2])

 	res = getDynLibs(gpu.GpuInfo{Library: "rocm", Variant: "v6"})
 	assert.Len(t, res, 3)
 	assert.Equal(t, availableDynLibs["rocm_v6"], res[0])
 	assert.Equal(t, availableDynLibs["rocm_v5"], res[1])
-	assert.Equal(t, availableDynLibs["cpu"], res[2])
+	assert.Equal(t, availableDynLibs["cpu"+variant], res[2])

 	res = getDynLibs(gpu.GpuInfo{Library: "cuda"})
 	assert.Len(t, res, 1)
-	assert.Equal(t, availableDynLibs["cpu"], res[0])
+	assert.Equal(t, availableDynLibs["cpu"+variant], res[0])

 	res = getDynLibs(gpu.GpuInfo{Library: "default"})
 	assert.Len(t, res, 1)
 	assert.Equal(t, "default", res[0])

 	availableDynLibs = map[string]string{
-		"rocm": "X_rocm_v5",
-		"cpu":  "X_cpu",
+		"rocm":          "X_rocm_v5",
+		"cpu" + variant: "X_cpu",
 	}
 	assert.Equal(t, true, rocmDynLibPresent())
 	res = getDynLibs(gpu.GpuInfo{Library: "rocm", Variant: "v6"})
 	assert.Len(t, res, 2)
 	assert.Equal(t, availableDynLibs["rocm"], res[0])
-	assert.Equal(t, availableDynLibs["cpu"], res[1])
+	assert.Equal(t, availableDynLibs["cpu"+variant], res[1])
 }