diff --git a/api/types.go b/api/types.go index 3169e11f..690b1cd1 100644 --- a/api/types.go +++ b/api/types.go @@ -121,8 +121,6 @@ type Runner struct { VocabOnly bool `json:"vocab_only,omitempty"` UseMMap bool `json:"use_mmap,omitempty"` UseMLock bool `json:"use_mlock,omitempty"` - RopeFrequencyBase float32 `json:"rope_frequency_base,omitempty"` - RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"` NumThread int `json:"num_thread,omitempty"` } @@ -383,8 +381,6 @@ func DefaultOptions() Options { Runner: Runner{ // options set when the model is loaded NumCtx: 2048, - RopeFrequencyBase: 10000.0, - RopeFrequencyScale: 1.0, NumBatch: 512, NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically NumGQA: 1, diff --git a/convert/convert.go b/convert/convert.go index d518ee32..fc4f3085 100644 --- a/convert/convert.go +++ b/convert/convert.go @@ -32,7 +32,6 @@ type Params struct { AttentionHeads int `json:"num_attention_heads"` // n_head KeyValHeads int `json:"num_key_value_heads"` NormEPS float64 `json:"rms_norm_eps"` - RopeFreqBase float64 `json:"rope_theta"` BoSTokenID int `json:"bos_token_id"` EoSTokenID int `json:"eos_token_id"` HeadDimension int `json:"head_dim"` diff --git a/convert/mistral.go b/convert/mistral.go index fef3f04b..51ad6729 100644 --- a/convert/mistral.go +++ b/convert/mistral.go @@ -144,7 +144,6 @@ func (m *MistralModel) WriteGGUF() (string, error) { "llama.attention.head_count": uint32(m.Params.AttentionHeads), "llama.attention.head_count_kv": uint32(m.Params.KeyValHeads), "llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS), - "llama.rope.freq_base": float32(m.Params.RopeFreqBase), "general.file_type": uint32(1), "tokenizer.ggml.model": "llama", diff --git a/llm/server.go b/llm/server.go index 2994f9a6..0e084d5a 100644 --- a/llm/server.go +++ b/llm/server.go @@ -172,14 +172,6 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option params = append(params, "--main-gpu", fmt.Sprintf("%d", opts.MainGPU)) } - if opts.RopeFrequencyBase > 0 { - params = append(params, "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase)) - } - - if opts.RopeFrequencyScale > 0 { - params = append(params, "--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale)) - } - if len(adapters) > 0 { // TODO: applying multiple adapters is not supported by the llama.cpp server yet params = append(params, "--lora", adapters[0])