support new "longrope" attention factor
This commit is contained in:
parent
6ffb5cb017
commit
aec77d6a05
1 changed files with 1 additions and 1 deletions
|
@ -58,7 +58,7 @@ func (p *phi3) KV(t *Tokenizer) llm.KV {
|
||||||
switch p.RopeScaling.Type {
|
switch p.RopeScaling.Type {
|
||||||
case "":
|
case "":
|
||||||
// no scaling
|
// no scaling
|
||||||
case "su":
|
case "su", "longrope":
|
||||||
kv["phi3.rope.scaling.attn_factor"] = float32(max(math.Sqrt(1+math.Log(scale)/math.Log(float64(p.OriginalMaxPositionEmbeddings))), 1.0))
|
kv["phi3.rope.scaling.attn_factor"] = float32(max(math.Sqrt(1+math.Log(scale)/math.Log(float64(p.OriginalMaxPositionEmbeddings))), 1.0))
|
||||||
case "yarn":
|
case "yarn":
|
||||||
kv["phi3.rope.scaling.attn_factor"] = float32(max(0.1*math.Log(scale)+1.0, 1.0))
|
kv["phi3.rope.scaling.attn_factor"] = float32(max(0.1*math.Log(scale)+1.0, 1.0))
|
||||||
|
|
Loading…
Reference in a new issue