Merge pull request #6064 from ollama/mxyng/convert-llama3
convert: update llama conversion for llama3.1
This commit is contained in:
commit
6bd8a4b0a1
9 changed files with 44 additions and 9 deletions
|
@ -88,7 +88,6 @@ func (p *bert) parseMore(fsys fs.FS) error {
|
||||||
func (p *bert) KV(t *Tokenizer) llm.KV {
|
func (p *bert) KV(t *Tokenizer) llm.KV {
|
||||||
kv := p.Parameters.KV(t)
|
kv := p.Parameters.KV(t)
|
||||||
kv["general.architecture"] = "bert"
|
kv["general.architecture"] = "bert"
|
||||||
kv["general.name"] = "bert"
|
|
||||||
kv["bert.attention.causal"] = false
|
kv["bert.attention.causal"] = false
|
||||||
kv["bert.pooling_type"] = p.PoolingType
|
kv["bert.pooling_type"] = p.PoolingType
|
||||||
|
|
||||||
|
|
|
@ -26,7 +26,6 @@ var _ Converter = (*gemma)(nil)
|
||||||
func (p *gemma) KV(t *Tokenizer) llm.KV {
|
func (p *gemma) KV(t *Tokenizer) llm.KV {
|
||||||
kv := p.Parameters.KV(t)
|
kv := p.Parameters.KV(t)
|
||||||
kv["general.architecture"] = "gemma"
|
kv["general.architecture"] = "gemma"
|
||||||
kv["general.name"] = "gemma"
|
|
||||||
kv["gemma.context_length"] = p.MaxPositionEmbeddings
|
kv["gemma.context_length"] = p.MaxPositionEmbeddings
|
||||||
kv["gemma.embedding_length"] = p.HiddenSize
|
kv["gemma.embedding_length"] = p.HiddenSize
|
||||||
kv["gemma.block_count"] = p.HiddenLayers
|
kv["gemma.block_count"] = p.HiddenLayers
|
||||||
|
|
|
@ -14,7 +14,6 @@ type gemma2 struct {
|
||||||
func (p *gemma2) KV(t *Tokenizer) llm.KV {
|
func (p *gemma2) KV(t *Tokenizer) llm.KV {
|
||||||
kv := p.Parameters.KV(t)
|
kv := p.Parameters.KV(t)
|
||||||
kv["general.architecture"] = "gemma2"
|
kv["general.architecture"] = "gemma2"
|
||||||
kv["general.name"] = "gemma2"
|
|
||||||
kv["gemma2.context_length"] = p.MaxPositionEmbeddings
|
kv["gemma2.context_length"] = p.MaxPositionEmbeddings
|
||||||
kv["gemma2.embedding_length"] = p.HiddenSize
|
kv["gemma2.embedding_length"] = p.HiddenSize
|
||||||
kv["gemma2.block_count"] = p.HiddenLayers
|
kv["gemma2.block_count"] = p.HiddenLayers
|
||||||
|
|
|
@ -3,6 +3,7 @@ package convert
|
||||||
import (
|
import (
|
||||||
"cmp"
|
"cmp"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/pdevine/tensor"
|
"github.com/pdevine/tensor"
|
||||||
|
@ -28,7 +29,13 @@ type llama struct {
|
||||||
RopeTheta float32 `json:"rope_theta"`
|
RopeTheta float32 `json:"rope_theta"`
|
||||||
RopeScaling struct {
|
RopeScaling struct {
|
||||||
Type string `json:"type"`
|
Type string `json:"type"`
|
||||||
|
RopeType string `json:"rope_type"`
|
||||||
Factor float32 `json:"factor"`
|
Factor float32 `json:"factor"`
|
||||||
|
LowFrequencyFactor float32 `json:"low_freq_factor"`
|
||||||
|
HighFrequencyFactor float32 `json:"high_freq_factor"`
|
||||||
|
OriginalMaxPositionalEmbeddings uint32 `json:"original_max_positional_embeddings"`
|
||||||
|
|
||||||
|
factors ropeFactor
|
||||||
} `json:"rope_scaling"`
|
} `json:"rope_scaling"`
|
||||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||||
LayerNormEPS float32 `json:"layer_norm_eps"`
|
LayerNormEPS float32 `json:"layer_norm_eps"`
|
||||||
|
@ -42,7 +49,6 @@ var _ Converter = (*llama)(nil)
|
||||||
func (p *llama) KV(t *Tokenizer) llm.KV {
|
func (p *llama) KV(t *Tokenizer) llm.KV {
|
||||||
kv := p.Parameters.KV(t)
|
kv := p.Parameters.KV(t)
|
||||||
kv["general.architecture"] = "llama"
|
kv["general.architecture"] = "llama"
|
||||||
kv["general.name"] = "llama"
|
|
||||||
kv["llama.vocab_size"] = p.VocabSize
|
kv["llama.vocab_size"] = p.VocabSize
|
||||||
|
|
||||||
kv["llama.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
|
kv["llama.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
|
||||||
|
@ -71,6 +77,27 @@ func (p *llama) KV(t *Tokenizer) llm.KV {
|
||||||
if p.RopeScaling.Type == "linear" {
|
if p.RopeScaling.Type == "linear" {
|
||||||
kv["llama.rope.scaling.type"] = p.RopeScaling.Type
|
kv["llama.rope.scaling.type"] = p.RopeScaling.Type
|
||||||
kv["llama.rope.scaling.factor"] = p.RopeScaling.Factor
|
kv["llama.rope.scaling.factor"] = p.RopeScaling.Factor
|
||||||
|
} else if p.RopeScaling.RopeType == "llama3" {
|
||||||
|
dim := p.HiddenSize / p.NumAttentionHeads
|
||||||
|
for i := uint32(0); i < dim; i += 2 {
|
||||||
|
factor := cmp.Or(p.RopeScaling.Factor, 8.0)
|
||||||
|
factorLow := cmp.Or(p.RopeScaling.LowFrequencyFactor, 1.0)
|
||||||
|
factorHigh := cmp.Or(p.RopeScaling.HighFrequencyFactor, 4.0)
|
||||||
|
|
||||||
|
original := cmp.Or(p.RopeScaling.OriginalMaxPositionalEmbeddings, 8192)
|
||||||
|
lambdaLow := float32(original) / factorLow
|
||||||
|
lambdaHigh := float32(original) / factorHigh
|
||||||
|
|
||||||
|
lambda := 2 * math.Pi * math.Pow(float64(p.RopeTheta), float64(i)/float64(dim))
|
||||||
|
if lambda < float64(lambdaHigh) {
|
||||||
|
p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0)
|
||||||
|
} else if lambda > float64(lambdaLow) {
|
||||||
|
p.RopeScaling.factors = append(p.RopeScaling.factors, factor)
|
||||||
|
} else {
|
||||||
|
smooth := (float32(original)/float32(lambda) - factorLow) / (factorHigh - factorLow)
|
||||||
|
p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0/((1-smooth)/factor+smooth))
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if p.NumKeyValueHeads > 0 {
|
if p.NumKeyValueHeads > 0 {
|
||||||
|
@ -95,6 +122,16 @@ func (p *llama) KV(t *Tokenizer) llm.KV {
|
||||||
|
|
||||||
func (p *llama) Tensors(ts []Tensor) []llm.Tensor {
|
func (p *llama) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
var out []llm.Tensor
|
var out []llm.Tensor
|
||||||
|
|
||||||
|
if p.RopeScaling.factors != nil {
|
||||||
|
out = append(out, llm.Tensor{
|
||||||
|
Name: "rope_freqs.weight",
|
||||||
|
Kind: 0,
|
||||||
|
Shape: []uint64{uint64(len(p.RopeScaling.factors))},
|
||||||
|
WriterTo: p.RopeScaling.factors,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
for _, t := range ts {
|
for _, t := range ts {
|
||||||
if strings.HasSuffix(t.Name(), "attn_q.weight") ||
|
if strings.HasSuffix(t.Name(), "attn_q.weight") ||
|
||||||
strings.HasSuffix(t.Name(), "attn_k.weight") {
|
strings.HasSuffix(t.Name(), "attn_k.weight") {
|
||||||
|
|
|
@ -40,7 +40,6 @@ var _ Converter = (*phi3)(nil)
|
||||||
func (p *phi3) KV(t *Tokenizer) llm.KV {
|
func (p *phi3) KV(t *Tokenizer) llm.KV {
|
||||||
kv := p.Parameters.KV(t)
|
kv := p.Parameters.KV(t)
|
||||||
kv["general.architecture"] = "phi3"
|
kv["general.architecture"] = "phi3"
|
||||||
kv["general.name"] = "phi3"
|
|
||||||
kv["phi3.context_length"] = p.MaxPositionEmbeddings
|
kv["phi3.context_length"] = p.MaxPositionEmbeddings
|
||||||
kv["phi3.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
|
kv["phi3.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
|
||||||
kv["phi3.feed_forward_length"] = p.IntermediateSize
|
kv["phi3.feed_forward_length"] = p.IntermediateSize
|
||||||
|
|
|
@ -62,6 +62,7 @@ func TestMain(m *testing.M) {
|
||||||
func TestConvertFull(t *testing.T) {
|
func TestConvertFull(t *testing.T) {
|
||||||
cases := []string{
|
cases := []string{
|
||||||
"Meta-Llama-3-8B-Instruct",
|
"Meta-Llama-3-8B-Instruct",
|
||||||
|
"Meta-Llama-3.1-8B-Instruct",
|
||||||
"Mistral-7B-Instruct-v0.2",
|
"Mistral-7B-Instruct-v0.2",
|
||||||
"Mixtral-8x7B-Instruct-v0.1",
|
"Mixtral-8x7B-Instruct-v0.1",
|
||||||
"gemma-2b-it",
|
"gemma-2b-it",
|
||||||
|
|
3
convert/testdata/Meta-Llama-3.1-8B-Instruct.json
vendored
Normal file
3
convert/testdata/Meta-Llama-3.1-8B-Instruct.json
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
{
|
||||||
|
"rope_freqs.weight": "80fd5efb2f729381785b293a091a268cfeceb0079167f6ece9b07070e662b222"
|
||||||
|
}
|
|
@ -33,7 +33,6 @@ func TestEstimateGPULayers(t *testing.T) {
|
||||||
assert.Len(t, tensors, inputLayerCount+1)
|
assert.Len(t, tensors, inputLayerCount+1)
|
||||||
err = WriteGGUF(f, KV{
|
err = WriteGGUF(f, KV{
|
||||||
"general.architecture": "llama",
|
"general.architecture": "llama",
|
||||||
"general.name": "name",
|
|
||||||
"llama.context_length": uint32(32),
|
"llama.context_length": uint32(32),
|
||||||
"llama.embedding_length": uint32(4096),
|
"llama.embedding_length": uint32(4096),
|
||||||
"llama.block_count": uint32(inputLayerCount),
|
"llama.block_count": uint32(inputLayerCount),
|
||||||
|
|
|
@ -117,7 +117,6 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
|
||||||
|
|
||||||
require.NoError(t, llm.WriteGGUF(f, llm.KV{
|
require.NoError(t, llm.WriteGGUF(f, llm.KV{
|
||||||
"general.architecture": "llama",
|
"general.architecture": "llama",
|
||||||
"general.name": "name",
|
|
||||||
"llama.context_length": uint32(32),
|
"llama.context_length": uint32(32),
|
||||||
"llama.embedding_length": uint32(4096),
|
"llama.embedding_length": uint32(4096),
|
||||||
"llama.block_count": uint32(1),
|
"llama.block_count": uint32(1),
|
||||||
|
|
Loading…
Reference in a new issue