From be517e491c5c09a7bdd21293878f37c18e7a81f9 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Fri, 5 Apr 2024 17:59:58 -0700
Subject: [PATCH] no rope parameters

---
 api/types.go       | 4 ----
 convert/convert.go | 1 -
 convert/mistral.go | 1 -
 llm/server.go      | 8 --------
 4 files changed, 14 deletions(-)

diff --git a/api/types.go b/api/types.go
index 3169e11f..690b1cd1 100644
--- a/api/types.go
+++ b/api/types.go
@@ -121,8 +121,6 @@ type Runner struct {
 	VocabOnly          bool    `json:"vocab_only,omitempty"`
 	UseMMap            bool    `json:"use_mmap,omitempty"`
 	UseMLock           bool    `json:"use_mlock,omitempty"`
-	RopeFrequencyBase  float32 `json:"rope_frequency_base,omitempty"`
-	RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
 	NumThread          int     `json:"num_thread,omitempty"`
 }
 
@@ -383,8 +381,6 @@ func DefaultOptions() Options {
 		Runner: Runner{
 			// options set when the model is loaded
 			NumCtx:             2048,
-			RopeFrequencyBase:  10000.0,
-			RopeFrequencyScale: 1.0,
 			NumBatch:           512,
 			NumGPU:             -1, // -1 here indicates that NumGPU should be set dynamically
 			NumGQA:             1,
diff --git a/convert/convert.go b/convert/convert.go
index d518ee32..fc4f3085 100644
--- a/convert/convert.go
+++ b/convert/convert.go
@@ -32,7 +32,6 @@ type Params struct {
 	AttentionHeads   int      `json:"num_attention_heads"` // n_head
 	KeyValHeads      int      `json:"num_key_value_heads"`
 	NormEPS          float64  `json:"rms_norm_eps"`
-	RopeFreqBase     float64  `json:"rope_theta"`
 	BoSTokenID       int      `json:"bos_token_id"`
 	EoSTokenID       int      `json:"eos_token_id"`
 	HeadDimension    int      `json:"head_dim"`
diff --git a/convert/mistral.go b/convert/mistral.go
index fef3f04b..51ad6729 100644
--- a/convert/mistral.go
+++ b/convert/mistral.go
@@ -144,7 +144,6 @@ func (m *MistralModel) WriteGGUF() (string, error) {
 		"llama.attention.head_count":             uint32(m.Params.AttentionHeads),
 		"llama.attention.head_count_kv":          uint32(m.Params.KeyValHeads),
 		"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
-		"llama.rope.freq_base":                   float32(m.Params.RopeFreqBase),
 		"general.file_type":                      uint32(1),
 		"tokenizer.ggml.model":                   "llama",
 
diff --git a/llm/server.go b/llm/server.go
index 2994f9a6..0e084d5a 100644
--- a/llm/server.go
+++ b/llm/server.go
@@ -172,14 +172,6 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 		params = append(params, "--main-gpu", fmt.Sprintf("%d", opts.MainGPU))
 	}
 
-	if opts.RopeFrequencyBase > 0 {
-		params = append(params, "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase))
-	}
-
-	if opts.RopeFrequencyScale > 0 {
-		params = append(params, "--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale))
-	}
-
 	if len(adapters) > 0 {
 		// TODO: applying multiple adapters is not supported by the llama.cpp server yet
 		params = append(params, "--lora", adapters[0])