use model defaults for num_gqa
, rope_frequency_base
and rope_frequency_scale
(#1983)
This commit is contained in:
parent
daa1a032f7
commit
d5eec16d23
3 changed files with 61 additions and 86 deletions
24
api/types.go
24
api/types.go
|
@ -4,6 +4,7 @@ import (
|
|||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"math"
|
||||
"os"
|
||||
"reflect"
|
||||
|
@ -161,7 +162,6 @@ type Runner struct {
|
|||
UseNUMA bool `json:"numa,omitempty"`
|
||||
NumCtx int `json:"num_ctx,omitempty"`
|
||||
NumBatch int `json:"num_batch,omitempty"`
|
||||
NumGQA int `json:"num_gqa,omitempty"`
|
||||
NumGPU int `json:"num_gpu,omitempty"`
|
||||
MainGPU int `json:"main_gpu,omitempty"`
|
||||
LowVRAM bool `json:"low_vram,omitempty"`
|
||||
|
@ -171,11 +171,6 @@ type Runner struct {
|
|||
UseMMap bool `json:"use_mmap,omitempty"`
|
||||
UseMLock bool `json:"use_mlock,omitempty"`
|
||||
NumThread int `json:"num_thread,omitempty"`
|
||||
|
||||
// Unused: RopeFrequencyBase is ignored. Instead the value in the model will be used
|
||||
RopeFrequencyBase float32 `json:"rope_frequency_base,omitempty"`
|
||||
// Unused: RopeFrequencyScale is ignored. Instead the value in the model will be used
|
||||
RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
|
||||
}
|
||||
|
||||
// EmbeddingRequest is the request passed to [Client.Embeddings].
|
||||
|
@ -359,8 +354,6 @@ func (m *Metrics) Summary() {
|
|||
}
|
||||
}
|
||||
|
||||
// ErrInvalidOpts is returned when invalid options are passed to the client.
|
||||
var ErrInvalidOpts = errors.New("invalid options")
|
||||
var ErrInvalidHostPort = errors.New("invalid port specified in OLLAMA_HOST")
|
||||
|
||||
func (opts *Options) FromMap(m map[string]interface{}) error {
|
||||
|
@ -376,9 +369,13 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
|
|||
}
|
||||
}
|
||||
|
||||
invalidOpts := []string{}
|
||||
for key, val := range m {
|
||||
if opt, ok := jsonOpts[key]; ok {
|
||||
opt, ok := jsonOpts[key]
|
||||
if !ok {
|
||||
slog.Warn("invalid option provided", "option", opt.Name)
|
||||
continue
|
||||
}
|
||||
|
||||
field := valueOpts.FieldByName(opt.Name)
|
||||
if field.IsValid() && field.CanSet() {
|
||||
if val == nil {
|
||||
|
@ -435,14 +432,8 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
|
|||
return fmt.Errorf("unknown type loading config params: %v", field.Kind())
|
||||
}
|
||||
}
|
||||
} else {
|
||||
invalidOpts = append(invalidOpts, key)
|
||||
}
|
||||
}
|
||||
|
||||
if len(invalidOpts) > 0 {
|
||||
return fmt.Errorf("%w: %v", ErrInvalidOpts, strings.Join(invalidOpts, ", "))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -475,7 +466,6 @@ func DefaultOptions() Options {
|
|||
NumCtx: 2048,
|
||||
NumBatch: 512,
|
||||
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
|
||||
NumGQA: 1,
|
||||
NumThread: 0, // let the runtime decide
|
||||
LowVRAM: false,
|
||||
F16KV: true,
|
||||
|
|
|
@ -313,7 +313,6 @@ curl http://localhost:11434/api/generate -d '{
|
|||
"numa": false,
|
||||
"num_ctx": 1024,
|
||||
"num_batch": 2,
|
||||
"num_gqa": 1,
|
||||
"num_gpu": 1,
|
||||
"main_gpu": 0,
|
||||
"low_vram": false,
|
||||
|
@ -321,8 +320,6 @@ curl http://localhost:11434/api/generate -d '{
|
|||
"vocab_only": false,
|
||||
"use_mmap": true,
|
||||
"use_mlock": false,
|
||||
"rope_frequency_base": 1.1,
|
||||
"rope_frequency_scale": 0.8,
|
||||
"num_thread": 8
|
||||
}
|
||||
}'
|
||||
|
|
|
@ -127,10 +127,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||
|
||||
opts, err := modelOptions(model, req.Options)
|
||||
if err != nil {
|
||||
if errors.Is(err, api.ErrInvalidOpts) {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
@ -370,10 +366,6 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
|
|||
|
||||
opts, err := modelOptions(model, req.Options)
|
||||
if err != nil {
|
||||
if errors.Is(err, api.ErrInvalidOpts) {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
@ -1177,10 +1169,6 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||
|
||||
opts, err := modelOptions(model, req.Options)
|
||||
if err != nil {
|
||||
if errors.Is(err, api.ErrInvalidOpts) {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue