Fix up the CPU fallback selection
The memory changes and multi-variant change had some merge glitches I missed. This fixes them so we actually get the cpu llm lib and best variant for the given system.
This commit is contained in:
parent
de2fbdec99
commit
7427fa1387
4 changed files with 30 additions and 16 deletions
|
@ -34,7 +34,7 @@ func GetGPUInfo() GpuInfo {
|
|||
mem, _ := getCPUMem()
|
||||
if runtime.GOARCH == "amd64" {
|
||||
return GpuInfo{
|
||||
Library: "default",
|
||||
Library: "cpu",
|
||||
Variant: GetCPUVariant(),
|
||||
memInfo: mem,
|
||||
}
|
||||
|
|
15
llm/llm.go
15
llm/llm.go
|
@ -2,6 +2,7 @@ package llm
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"runtime"
|
||||
|
@ -50,7 +51,6 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
|
|||
graph := int64(ggml.NumGQA()) * kv / 6
|
||||
|
||||
info := gpu.GetGPUInfo()
|
||||
library := info.Library
|
||||
switch runtime.GOOS {
|
||||
case "darwin":
|
||||
if opts.NumGPU == 0 {
|
||||
|
@ -59,13 +59,15 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
|
|||
|
||||
if size+kv+graph > vram {
|
||||
log.Println("not enough vram available, falling back to CPU only")
|
||||
info.Library = "cpu"
|
||||
info.Variant = gpu.GetCPUVariant()
|
||||
opts.NumGPU = 0
|
||||
break
|
||||
}
|
||||
|
||||
opts.NumGPU = 1
|
||||
default:
|
||||
if library == "cpu" || library == "default" {
|
||||
if info.Library == "cpu" {
|
||||
log.Println("GPU not available, falling back to CPU")
|
||||
opts.NumGPU = 0
|
||||
break
|
||||
|
@ -73,7 +75,8 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
|
|||
|
||||
// don't use GPU at all if no layers are loaded
|
||||
if opts.NumGPU == 0 {
|
||||
library = "cpu"
|
||||
info.Library = "cpu"
|
||||
info.Variant = gpu.GetCPUVariant()
|
||||
break
|
||||
}
|
||||
|
||||
|
@ -100,7 +103,8 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
|
|||
min := graph + kv*layers/maxlayers
|
||||
if layers <= 0 || min > avg {
|
||||
log.Printf("not enough vram available, falling back to CPU only")
|
||||
library = "cpu"
|
||||
info.Library = "cpu"
|
||||
info.Variant = gpu.GetCPUVariant()
|
||||
opts.NumGPU = 0
|
||||
break
|
||||
}
|
||||
|
@ -110,8 +114,7 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
|
|||
|
||||
opts.RopeFrequencyBase = 0.0
|
||||
opts.RopeFrequencyScale = 0.0
|
||||
gpuInfo := gpu.GetGPUInfo()
|
||||
return newLlmServer(gpuInfo, model, adapters, projectors, opts)
|
||||
return newLlmServer(info, model, adapters, projectors, opts)
|
||||
}
|
||||
|
||||
// Give any native cgo implementations an opportunity to initialize
|
||||
|
|
|
@ -28,6 +28,13 @@ func getDynLibs(gpuInfo gpu.GpuInfo) []string {
|
|||
if gpuInfo.Library == "default" {
|
||||
return []string{"default"}
|
||||
}
|
||||
// TODO - temporary until we have multiple CPU variations for Darwin
|
||||
// Short circuit on darwin with metal only
|
||||
if len(availableDynLibs) == 1 {
|
||||
if _, onlyMetal := availableDynLibs["metal"]; onlyMetal {
|
||||
return []string{availableDynLibs["metal"]}
|
||||
}
|
||||
}
|
||||
|
||||
exactMatch := ""
|
||||
dynLibs := []string{}
|
||||
|
|
|
@ -16,39 +16,43 @@ func TestGetDynLibs(t *testing.T) {
|
|||
assert.Len(t, res, 1)
|
||||
assert.Equal(t, availableDynLibs["cpu"], res[0])
|
||||
|
||||
variant := gpu.GetCPUVariant()
|
||||
if variant != "" {
|
||||
variant = "_" + variant
|
||||
}
|
||||
availableDynLibs = map[string]string{
|
||||
"rocm_v5": "X_rocm_v5",
|
||||
"rocm_v6": "X_rocm_v6",
|
||||
"cpu": "X_cpu",
|
||||
"rocm_v5": "X_rocm_v5",
|
||||
"rocm_v6": "X_rocm_v6",
|
||||
"cpu" + variant: "X_cpu",
|
||||
}
|
||||
assert.Equal(t, true, rocmDynLibPresent())
|
||||
res = getDynLibs(gpu.GpuInfo{Library: "rocm"})
|
||||
assert.Len(t, res, 3)
|
||||
assert.Equal(t, availableDynLibs["rocm_v5"], res[0])
|
||||
assert.Equal(t, availableDynLibs["rocm_v6"], res[1])
|
||||
assert.Equal(t, availableDynLibs["cpu"], res[2])
|
||||
assert.Equal(t, availableDynLibs["cpu"+variant], res[2])
|
||||
|
||||
res = getDynLibs(gpu.GpuInfo{Library: "rocm", Variant: "v6"})
|
||||
assert.Len(t, res, 3)
|
||||
assert.Equal(t, availableDynLibs["rocm_v6"], res[0])
|
||||
assert.Equal(t, availableDynLibs["rocm_v5"], res[1])
|
||||
assert.Equal(t, availableDynLibs["cpu"], res[2])
|
||||
assert.Equal(t, availableDynLibs["cpu"+variant], res[2])
|
||||
|
||||
res = getDynLibs(gpu.GpuInfo{Library: "cuda"})
|
||||
assert.Len(t, res, 1)
|
||||
assert.Equal(t, availableDynLibs["cpu"], res[0])
|
||||
assert.Equal(t, availableDynLibs["cpu"+variant], res[0])
|
||||
|
||||
res = getDynLibs(gpu.GpuInfo{Library: "default"})
|
||||
assert.Len(t, res, 1)
|
||||
assert.Equal(t, "default", res[0])
|
||||
|
||||
availableDynLibs = map[string]string{
|
||||
"rocm": "X_rocm_v5",
|
||||
"cpu": "X_cpu",
|
||||
"rocm": "X_rocm_v5",
|
||||
"cpu" + variant: "X_cpu",
|
||||
}
|
||||
assert.Equal(t, true, rocmDynLibPresent())
|
||||
res = getDynLibs(gpu.GpuInfo{Library: "rocm", Variant: "v6"})
|
||||
assert.Len(t, res, 2)
|
||||
assert.Equal(t, availableDynLibs["rocm"], res[0])
|
||||
assert.Equal(t, availableDynLibs["cpu"], res[1])
|
||||
assert.Equal(t, availableDynLibs["cpu"+variant], res[1])
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue