disable gpu for q5_0, q5_1, q8_0 quants
This commit is contained in:
parent
fccf8d179f
commit
020a3b3530
1 changed files with 11 additions and 0 deletions
11
llm/llm.go
11
llm/llm.go
|
@ -2,6 +2,7 @@ package llm
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
|
@ -31,6 +32,16 @@ func New(model string, opts api.Options) (LLM, error) {
|
|||
return nil, err
|
||||
}
|
||||
|
||||
switch ggml.FileType {
|
||||
case FileTypeF32, FileTypeF16, FileTypeQ5_0, FileTypeQ5_1, FileTypeQ8_0:
|
||||
if opts.NumGPU != 0 {
|
||||
// Q5_0, Q5_1, and Q8_0 do not support Metal API and will
|
||||
// cause the runner to segmentation fault so disable GPU
|
||||
log.Printf("WARNING: GPU disabled for F32, F16, Q5_0, Q5_1, and Q8_0")
|
||||
opts.NumGPU = 0
|
||||
}
|
||||
}
|
||||
|
||||
switch ggml.ModelFamily {
|
||||
case ModelFamilyLlama:
|
||||
return newLlama(model, opts)
|
||||
|
|
Loading…
Reference in a new issue