disable gpu for q5_0, q5_1, q8_0 quants
This commit is contained in:
parent
fccf8d179f
commit
020a3b3530
1 changed files with 11 additions and 0 deletions
11
llm/llm.go
11
llm/llm.go
|
@ -2,6 +2,7 @@ package llm
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"github.com/jmorganca/ollama/api"
|
"github.com/jmorganca/ollama/api"
|
||||||
|
@ -31,6 +32,16 @@ func New(model string, opts api.Options) (LLM, error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch ggml.FileType {
|
||||||
|
case FileTypeF32, FileTypeF16, FileTypeQ5_0, FileTypeQ5_1, FileTypeQ8_0:
|
||||||
|
if opts.NumGPU != 0 {
|
||||||
|
// Q5_0, Q5_1, and Q8_0 do not support Metal API and will
|
||||||
|
// cause the runner to segmentation fault so disable GPU
|
||||||
|
log.Printf("WARNING: GPU disabled for F32, F16, Q5_0, Q5_1, and Q8_0")
|
||||||
|
opts.NumGPU = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
switch ggml.ModelFamily {
|
switch ggml.ModelFamily {
|
||||||
case ModelFamilyLlama:
|
case ModelFamilyLlama:
|
||||||
return newLlama(model, opts)
|
return newLlama(model, opts)
|
||||||
|
|
Loading…
Reference in a new issue