diff --git a/llm/llama.go b/llm/llama.go index 06474099..f731acf4 100644 --- a/llm/llama.go +++ b/llm/llama.go @@ -306,13 +306,19 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers params := []string{ "--model", model, "--ctx-size", fmt.Sprintf("%d", opts.NumCtx), - "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase), - "--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale), "--batch-size", fmt.Sprintf("%d", opts.NumBatch), "--n-gpu-layers", fmt.Sprintf("%d", numGPU), "--embedding", } + if opts.RopeFrequencyBase > 0 { + params = append(params, "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase)) + } + + if opts.RopeFrequencyScale > 0 { + params = append(params, "--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale)) + } + if opts.NumGQA > 0 { params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA)) } diff --git a/llm/llm.go b/llm/llm.go index e25558f0..34017dad 100644 --- a/llm/llm.go +++ b/llm/llm.go @@ -85,7 +85,10 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error switch ggml.Name() { case "gguf": - opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions + // TODO: gguf will load these options automatically from the model binary + opts.NumGQA = 0 + opts.RopeFrequencyBase = 0.0 + opts.RopeFrequencyScale = 0.0 return newLlama(model, adapters, chooseRunners(workDir, "gguf"), ggml.NumLayers(), opts) case "ggml", "ggmf", "ggjt", "ggla": return newLlama(model, adapters, chooseRunners(workDir, "ggml"), ggml.NumLayers(), opts)