use model defaults for num_gqa, rope_frequency_base and rope_frequency_scale (#1983)

This commit is contained in:
Jeffrey Morgan 2024-05-09 09:06:13 -07:00 committed by GitHub
parent daa1a032f7
commit d5eec16d23
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 61 additions and 86 deletions

View file

@ -4,6 +4,7 @@ import (
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
"log/slog"
"math" "math"
"os" "os"
"reflect" "reflect"
@ -161,7 +162,6 @@ type Runner struct {
UseNUMA bool `json:"numa,omitempty"` UseNUMA bool `json:"numa,omitempty"`
NumCtx int `json:"num_ctx,omitempty"` NumCtx int `json:"num_ctx,omitempty"`
NumBatch int `json:"num_batch,omitempty"` NumBatch int `json:"num_batch,omitempty"`
NumGQA int `json:"num_gqa,omitempty"`
NumGPU int `json:"num_gpu,omitempty"` NumGPU int `json:"num_gpu,omitempty"`
MainGPU int `json:"main_gpu,omitempty"` MainGPU int `json:"main_gpu,omitempty"`
LowVRAM bool `json:"low_vram,omitempty"` LowVRAM bool `json:"low_vram,omitempty"`
@ -171,11 +171,6 @@ type Runner struct {
UseMMap bool `json:"use_mmap,omitempty"` UseMMap bool `json:"use_mmap,omitempty"`
UseMLock bool `json:"use_mlock,omitempty"` UseMLock bool `json:"use_mlock,omitempty"`
NumThread int `json:"num_thread,omitempty"` NumThread int `json:"num_thread,omitempty"`
// Unused: RopeFrequencyBase is ignored. Instead the value in the model will be used
RopeFrequencyBase float32 `json:"rope_frequency_base,omitempty"`
// Unused: RopeFrequencyScale is ignored. Instead the value in the model will be used
RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
} }
// EmbeddingRequest is the request passed to [Client.Embeddings]. // EmbeddingRequest is the request passed to [Client.Embeddings].
@ -359,8 +354,6 @@ func (m *Metrics) Summary() {
} }
} }
// ErrInvalidOpts is returned when invalid options are passed to the client.
var ErrInvalidOpts = errors.New("invalid options")
var ErrInvalidHostPort = errors.New("invalid port specified in OLLAMA_HOST") var ErrInvalidHostPort = errors.New("invalid port specified in OLLAMA_HOST")
func (opts *Options) FromMap(m map[string]interface{}) error { func (opts *Options) FromMap(m map[string]interface{}) error {
@ -376,9 +369,13 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
} }
} }
invalidOpts := []string{}
for key, val := range m { for key, val := range m {
if opt, ok := jsonOpts[key]; ok { opt, ok := jsonOpts[key]
if !ok {
slog.Warn("invalid option provided", "option", opt.Name)
continue
}
field := valueOpts.FieldByName(opt.Name) field := valueOpts.FieldByName(opt.Name)
if field.IsValid() && field.CanSet() { if field.IsValid() && field.CanSet() {
if val == nil { if val == nil {
@ -435,14 +432,8 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
return fmt.Errorf("unknown type loading config params: %v", field.Kind()) return fmt.Errorf("unknown type loading config params: %v", field.Kind())
} }
} }
} else {
invalidOpts = append(invalidOpts, key)
}
} }
if len(invalidOpts) > 0 {
return fmt.Errorf("%w: %v", ErrInvalidOpts, strings.Join(invalidOpts, ", "))
}
return nil return nil
} }
@ -475,7 +466,6 @@ func DefaultOptions() Options {
NumCtx: 2048, NumCtx: 2048,
NumBatch: 512, NumBatch: 512,
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
NumGQA: 1,
NumThread: 0, // let the runtime decide NumThread: 0, // let the runtime decide
LowVRAM: false, LowVRAM: false,
F16KV: true, F16KV: true,

View file

@ -313,7 +313,6 @@ curl http://localhost:11434/api/generate -d '{
"numa": false, "numa": false,
"num_ctx": 1024, "num_ctx": 1024,
"num_batch": 2, "num_batch": 2,
"num_gqa": 1,
"num_gpu": 1, "num_gpu": 1,
"main_gpu": 0, "main_gpu": 0,
"low_vram": false, "low_vram": false,
@ -321,8 +320,6 @@ curl http://localhost:11434/api/generate -d '{
"vocab_only": false, "vocab_only": false,
"use_mmap": true, "use_mmap": true,
"use_mlock": false, "use_mlock": false,
"rope_frequency_base": 1.1,
"rope_frequency_scale": 0.8,
"num_thread": 8 "num_thread": 8
} }
}' }'

View file

@ -127,10 +127,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
opts, err := modelOptions(model, req.Options) opts, err := modelOptions(model, req.Options)
if err != nil { if err != nil {
if errors.Is(err, api.ErrInvalidOpts) {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return return
} }
@ -370,10 +366,6 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
opts, err := modelOptions(model, req.Options) opts, err := modelOptions(model, req.Options)
if err != nil { if err != nil {
if errors.Is(err, api.ErrInvalidOpts) {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return return
} }
@ -1177,10 +1169,6 @@ func (s *Server) ChatHandler(c *gin.Context) {
opts, err := modelOptions(model, req.Options) opts, err := modelOptions(model, req.Options)
if err != nil { if err != nil {
if errors.Is(err, api.ErrInvalidOpts) {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return return
} }