no rope parameters

2024-04-05 17:59:58 -07:00 · 2024-04-05 17:59:58 -07:00 · be517e491c
commit be517e491c
parent fc8e108642
4 changed files with 0 additions and 14 deletions
--- a/api/types.go
+++ b/api/types.go
@ -121,8 +121,6 @@ type Runner struct {
 	VocabOnly          bool    `json:"vocab_only,omitempty"`
 	UseMMap            bool    `json:"use_mmap,omitempty"`
 	UseMLock           bool    `json:"use_mlock,omitempty"`
 	RopeFrequencyBase  float32 `json:"rope_frequency_base,omitempty"`
 	RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
 	NumThread          int     `json:"num_thread,omitempty"`
 }
@ -383,8 +381,6 @@ func DefaultOptions() Options {
 		Runner: Runner{
 			// options set when the model is loaded
 			NumCtx:             2048,
 			RopeFrequencyBase:  10000.0,
 			RopeFrequencyScale: 1.0,
 			NumBatch:           512,
 			NumGPU:             -1, // -1 here indicates that NumGPU should be set dynamically
 			NumGQA:             1,
--- a/convert/convert.go
+++ b/convert/convert.go
@ -32,7 +32,6 @@ type Params struct {
 	AttentionHeads   int      `json:"num_attention_heads"` // n_head
 	KeyValHeads      int      `json:"num_key_value_heads"`
 	NormEPS          float64  `json:"rms_norm_eps"`
 	RopeFreqBase     float64  `json:"rope_theta"`
 	BoSTokenID       int      `json:"bos_token_id"`
 	EoSTokenID       int      `json:"eos_token_id"`
 	HeadDimension    int      `json:"head_dim"`
--- a/convert/mistral.go
+++ b/convert/mistral.go
@ -144,7 +144,6 @@ func (m *MistralModel) WriteGGUF() (string, error) {
 		"llama.attention.head_count":             uint32(m.Params.AttentionHeads),
 		"llama.attention.head_count_kv":          uint32(m.Params.KeyValHeads),
 		"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
 		"llama.rope.freq_base":                   float32(m.Params.RopeFreqBase),
 		"general.file_type":                      uint32(1),
 		"tokenizer.ggml.model":                   "llama",
--- a/llm/server.go
+++ b/llm/server.go
@ -172,14 +172,6 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 		params = append(params, "--main-gpu", fmt.Sprintf("%d", opts.MainGPU))
 	}
 	if opts.RopeFrequencyBase > 0 {
 		params = append(params, "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase))
 	}
 	if opts.RopeFrequencyScale > 0 {
 		params = append(params, "--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale))
 	}
 	if len(adapters) > 0 {
 		// TODO: applying multiple adapters is not supported by the llama.cpp server yet
 		params = append(params, "--lora", adapters[0])