fix: regression unsupported metal types
omitting `--n-gpu-layers` means use metal on macos which isn't correct since ollama uses `num_gpu=0` to explicitly disable gpu for file types that are not implemented in metal
This commit is contained in:
parent
9ef2fce33a
commit
cb4a80b693
1 changed files with 1 additions and 4 deletions
|
@ -292,13 +292,10 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
|
||||||
"--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase),
|
"--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase),
|
||||||
"--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale),
|
"--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale),
|
||||||
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
|
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
|
||||||
|
"--n-gpu-layers", fmt.Sprintf("%d", numGPU),
|
||||||
"--embedding",
|
"--embedding",
|
||||||
}
|
}
|
||||||
|
|
||||||
if numGPU > 0 {
|
|
||||||
params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", numGPU))
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.NumGQA > 0 {
|
if opts.NumGQA > 0 {
|
||||||
params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA))
|
params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA))
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue