Merge pull request #3508 from ollama/mxyng/rope
This commit is contained in:
commit
a5ec9cfc0f
4 changed files with 0 additions and 14 deletions
|
@ -121,8 +121,6 @@ type Runner struct {
|
||||||
VocabOnly bool `json:"vocab_only,omitempty"`
|
VocabOnly bool `json:"vocab_only,omitempty"`
|
||||||
UseMMap bool `json:"use_mmap,omitempty"`
|
UseMMap bool `json:"use_mmap,omitempty"`
|
||||||
UseMLock bool `json:"use_mlock,omitempty"`
|
UseMLock bool `json:"use_mlock,omitempty"`
|
||||||
RopeFrequencyBase float32 `json:"rope_frequency_base,omitempty"`
|
|
||||||
RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
|
|
||||||
NumThread int `json:"num_thread,omitempty"`
|
NumThread int `json:"num_thread,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -383,8 +381,6 @@ func DefaultOptions() Options {
|
||||||
Runner: Runner{
|
Runner: Runner{
|
||||||
// options set when the model is loaded
|
// options set when the model is loaded
|
||||||
NumCtx: 2048,
|
NumCtx: 2048,
|
||||||
RopeFrequencyBase: 10000.0,
|
|
||||||
RopeFrequencyScale: 1.0,
|
|
||||||
NumBatch: 512,
|
NumBatch: 512,
|
||||||
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
|
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
|
||||||
NumGQA: 1,
|
NumGQA: 1,
|
||||||
|
|
|
@ -32,7 +32,6 @@ type Params struct {
|
||||||
AttentionHeads int `json:"num_attention_heads"` // n_head
|
AttentionHeads int `json:"num_attention_heads"` // n_head
|
||||||
KeyValHeads int `json:"num_key_value_heads"`
|
KeyValHeads int `json:"num_key_value_heads"`
|
||||||
NormEPS float64 `json:"rms_norm_eps"`
|
NormEPS float64 `json:"rms_norm_eps"`
|
||||||
RopeFreqBase float64 `json:"rope_theta"`
|
|
||||||
BoSTokenID int `json:"bos_token_id"`
|
BoSTokenID int `json:"bos_token_id"`
|
||||||
EoSTokenID int `json:"eos_token_id"`
|
EoSTokenID int `json:"eos_token_id"`
|
||||||
HeadDimension int `json:"head_dim"`
|
HeadDimension int `json:"head_dim"`
|
||||||
|
|
|
@ -144,7 +144,6 @@ func (m *MistralModel) WriteGGUF() (string, error) {
|
||||||
"llama.attention.head_count": uint32(m.Params.AttentionHeads),
|
"llama.attention.head_count": uint32(m.Params.AttentionHeads),
|
||||||
"llama.attention.head_count_kv": uint32(m.Params.KeyValHeads),
|
"llama.attention.head_count_kv": uint32(m.Params.KeyValHeads),
|
||||||
"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
|
"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
|
||||||
"llama.rope.freq_base": float32(m.Params.RopeFreqBase),
|
|
||||||
"general.file_type": uint32(1),
|
"general.file_type": uint32(1),
|
||||||
"tokenizer.ggml.model": "llama",
|
"tokenizer.ggml.model": "llama",
|
||||||
|
|
||||||
|
|
|
@ -172,14 +172,6 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
|
||||||
params = append(params, "--main-gpu", fmt.Sprintf("%d", opts.MainGPU))
|
params = append(params, "--main-gpu", fmt.Sprintf("%d", opts.MainGPU))
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts.RopeFrequencyBase > 0 {
|
|
||||||
params = append(params, "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase))
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.RopeFrequencyScale > 0 {
|
|
||||||
params = append(params, "--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale))
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(adapters) > 0 {
|
if len(adapters) > 0 {
|
||||||
// TODO: applying multiple adapters is not supported by the llama.cpp server yet
|
// TODO: applying multiple adapters is not supported by the llama.cpp server yet
|
||||||
params = append(params, "--lora", adapters[0])
|
params = append(params, "--lora", adapters[0])
|
||||||
|
|
Loading…
Reference in a new issue