disable gpu for q5_0, q5_1, q8_0 quants

This commit is contained in:
Michael Yang 2023-08-03 15:40:16 -07:00
parent fccf8d179f
commit 020a3b3530

View file

@ -2,6 +2,7 @@ package llm
import ( import (
"fmt" "fmt"
"log"
"os" "os"
"github.com/jmorganca/ollama/api" "github.com/jmorganca/ollama/api"
@ -31,6 +32,16 @@ func New(model string, opts api.Options) (LLM, error) {
return nil, err return nil, err
} }
switch ggml.FileType {
case FileTypeF32, FileTypeF16, FileTypeQ5_0, FileTypeQ5_1, FileTypeQ8_0:
if opts.NumGPU != 0 {
// Q5_0, Q5_1, and Q8_0 do not support Metal API and will
// cause the runner to segmentation fault so disable GPU
log.Printf("WARNING: GPU disabled for F32, F16, Q5_0, Q5_1, and Q8_0")
opts.NumGPU = 0
}
}
switch ggml.ModelFamily { switch ggml.ModelFamily {
case ModelFamilyLlama: case ModelFamilyLlama:
return newLlama(model, opts) return newLlama(model, opts)