From cb4a80b6934c1b62c98a01aab4ae156ba340acc5 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Mon, 16 Oct 2023 14:37:17 -0700 Subject: [PATCH] fix: regression unsupported metal types omitting `--n-gpu-layers` means use metal on macos which isn't correct since ollama uses `num_gpu=0` to explicitly disable gpu for file types that are not implemented in metal --- llm/llama.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/llm/llama.go b/llm/llama.go index 80463eeb..0765388d 100644 --- a/llm/llama.go +++ b/llm/llama.go @@ -292,13 +292,10 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase), "--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale), "--batch-size", fmt.Sprintf("%d", opts.NumBatch), + "--n-gpu-layers", fmt.Sprintf("%d", numGPU), "--embedding", } - if numGPU > 0 { - params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", numGPU)) - } - if opts.NumGQA > 0 { params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA)) }