diff --git a/llm/llama.go b/llm/llama.go index 80463eeb..0765388d 100644 --- a/llm/llama.go +++ b/llm/llama.go @@ -292,13 +292,10 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase), "--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale), "--batch-size", fmt.Sprintf("%d", opts.NumBatch), + "--n-gpu-layers", fmt.Sprintf("%d", numGPU), "--embedding", } - if numGPU > 0 { - params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", numGPU)) - } - if opts.NumGQA > 0 { params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA)) }