default rope params to 0 for new models (#968)
This commit is contained in:
parent
527f9a7975
commit
2e53704685
2 changed files with 12 additions and 3 deletions
10
llm/llama.go
10
llm/llama.go
|
@ -306,13 +306,19 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
|
||||||
params := []string{
|
params := []string{
|
||||||
"--model", model,
|
"--model", model,
|
||||||
"--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
|
"--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
|
||||||
"--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase),
|
|
||||||
"--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale),
|
|
||||||
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
|
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
|
||||||
"--n-gpu-layers", fmt.Sprintf("%d", numGPU),
|
"--n-gpu-layers", fmt.Sprintf("%d", numGPU),
|
||||||
"--embedding",
|
"--embedding",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if opts.RopeFrequencyBase > 0 {
|
||||||
|
params = append(params, "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.RopeFrequencyScale > 0 {
|
||||||
|
params = append(params, "--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale))
|
||||||
|
}
|
||||||
|
|
||||||
if opts.NumGQA > 0 {
|
if opts.NumGQA > 0 {
|
||||||
params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA))
|
params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA))
|
||||||
}
|
}
|
||||||
|
|
|
@ -85,7 +85,10 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
|
||||||
|
|
||||||
switch ggml.Name() {
|
switch ggml.Name() {
|
||||||
case "gguf":
|
case "gguf":
|
||||||
opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions
|
// TODO: gguf will load these options automatically from the model binary
|
||||||
|
opts.NumGQA = 0
|
||||||
|
opts.RopeFrequencyBase = 0.0
|
||||||
|
opts.RopeFrequencyScale = 0.0
|
||||||
return newLlama(model, adapters, chooseRunners(workDir, "gguf"), ggml.NumLayers(), opts)
|
return newLlama(model, adapters, chooseRunners(workDir, "gguf"), ggml.NumLayers(), opts)
|
||||||
case "ggml", "ggmf", "ggjt", "ggla":
|
case "ggml", "ggmf", "ggjt", "ggla":
|
||||||
return newLlama(model, adapters, chooseRunners(workDir, "ggml"), ggml.NumLayers(), opts)
|
return newLlama(model, adapters, chooseRunners(workDir, "ggml"), ggml.NumLayers(), opts)
|
||||||
|
|
Loading…
Reference in a new issue