Fix up the CPU fallback selection

The memory changes and multi-variant change had some merge
glitches I missed.  This fixes them so we actually get the cpu llm lib
and best variant for the given system.
This commit is contained in:
Daniel Hiltgen 2024-01-11 14:43:16 -08:00
parent de2fbdec99
commit 7427fa1387
4 changed files with 30 additions and 16 deletions

View file

@ -34,7 +34,7 @@ func GetGPUInfo() GpuInfo {
mem, _ := getCPUMem()
if runtime.GOARCH == "amd64" {
return GpuInfo{
Library: "default",
Library: "cpu",
Variant: GetCPUVariant(),
memInfo: mem,
}

View file

@ -2,6 +2,7 @@ package llm
import (
"context"
"fmt"
"log"
"os"
"runtime"
@ -50,7 +51,6 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
graph := int64(ggml.NumGQA()) * kv / 6
info := gpu.GetGPUInfo()
library := info.Library
switch runtime.GOOS {
case "darwin":
if opts.NumGPU == 0 {
@ -59,13 +59,15 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
if size+kv+graph > vram {
log.Println("not enough vram available, falling back to CPU only")
info.Library = "cpu"
info.Variant = gpu.GetCPUVariant()
opts.NumGPU = 0
break
}
opts.NumGPU = 1
default:
if library == "cpu" || library == "default" {
if info.Library == "cpu" {
log.Println("GPU not available, falling back to CPU")
opts.NumGPU = 0
break
@ -73,7 +75,8 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
// don't use GPU at all if no layers are loaded
if opts.NumGPU == 0 {
library = "cpu"
info.Library = "cpu"
info.Variant = gpu.GetCPUVariant()
break
}
@ -100,7 +103,8 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
min := graph + kv*layers/maxlayers
if layers <= 0 || min > avg {
log.Printf("not enough vram available, falling back to CPU only")
library = "cpu"
info.Library = "cpu"
info.Variant = gpu.GetCPUVariant()
opts.NumGPU = 0
break
}
@ -110,8 +114,7 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
opts.RopeFrequencyBase = 0.0
opts.RopeFrequencyScale = 0.0
gpuInfo := gpu.GetGPUInfo()
return newLlmServer(gpuInfo, model, adapters, projectors, opts)
return newLlmServer(info, model, adapters, projectors, opts)
}
// Give any native cgo implementations an opportunity to initialize

View file

@ -28,6 +28,13 @@ func getDynLibs(gpuInfo gpu.GpuInfo) []string {
if gpuInfo.Library == "default" {
return []string{"default"}
}
// TODO - temporary until we have multiple CPU variations for Darwin
// Short circuit on darwin with metal only
if len(availableDynLibs) == 1 {
if _, onlyMetal := availableDynLibs["metal"]; onlyMetal {
return []string{availableDynLibs["metal"]}
}
}
exactMatch := ""
dynLibs := []string{}

View file

@ -16,39 +16,43 @@ func TestGetDynLibs(t *testing.T) {
assert.Len(t, res, 1)
assert.Equal(t, availableDynLibs["cpu"], res[0])
variant := gpu.GetCPUVariant()
if variant != "" {
variant = "_" + variant
}
availableDynLibs = map[string]string{
"rocm_v5": "X_rocm_v5",
"rocm_v6": "X_rocm_v6",
"cpu": "X_cpu",
"rocm_v5": "X_rocm_v5",
"rocm_v6": "X_rocm_v6",
"cpu" + variant: "X_cpu",
}
assert.Equal(t, true, rocmDynLibPresent())
res = getDynLibs(gpu.GpuInfo{Library: "rocm"})
assert.Len(t, res, 3)
assert.Equal(t, availableDynLibs["rocm_v5"], res[0])
assert.Equal(t, availableDynLibs["rocm_v6"], res[1])
assert.Equal(t, availableDynLibs["cpu"], res[2])
assert.Equal(t, availableDynLibs["cpu"+variant], res[2])
res = getDynLibs(gpu.GpuInfo{Library: "rocm", Variant: "v6"})
assert.Len(t, res, 3)
assert.Equal(t, availableDynLibs["rocm_v6"], res[0])
assert.Equal(t, availableDynLibs["rocm_v5"], res[1])
assert.Equal(t, availableDynLibs["cpu"], res[2])
assert.Equal(t, availableDynLibs["cpu"+variant], res[2])
res = getDynLibs(gpu.GpuInfo{Library: "cuda"})
assert.Len(t, res, 1)
assert.Equal(t, availableDynLibs["cpu"], res[0])
assert.Equal(t, availableDynLibs["cpu"+variant], res[0])
res = getDynLibs(gpu.GpuInfo{Library: "default"})
assert.Len(t, res, 1)
assert.Equal(t, "default", res[0])
availableDynLibs = map[string]string{
"rocm": "X_rocm_v5",
"cpu": "X_cpu",
"rocm": "X_rocm_v5",
"cpu" + variant: "X_cpu",
}
assert.Equal(t, true, rocmDynLibPresent())
res = getDynLibs(gpu.GpuInfo{Library: "rocm", Variant: "v6"})
assert.Len(t, res, 2)
assert.Equal(t, availableDynLibs["rocm"], res[0])
assert.Equal(t, availableDynLibs["cpu"], res[1])
assert.Equal(t, availableDynLibs["cpu"+variant], res[1])
}