Merge pull request #5365 from ollama/mxyng/convert-gemma2

convert gemma2
This commit is contained in:
Michael Yang 2024-08-21 11:48:43 -07:00 committed by GitHub
commit e22286c9e1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 132 additions and 46 deletions

View file

@ -7,6 +7,7 @@ import (
"io" "io"
"io/fs" "io/fs"
"log/slog" "log/slog"
"strings"
"github.com/ollama/ollama/llm" "github.com/ollama/ollama/llm"
) )
@ -58,11 +59,13 @@ type Converter interface {
KV(*Tokenizer) llm.KV KV(*Tokenizer) llm.KV
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here. // Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
Tensors([]Tensor) []llm.Tensor Tensors([]Tensor) []llm.Tensor
// Replacements returns a list of string pairs to replace in tensor names.
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
Replacements() []string
// tensorName returns the LLM tensor name for a specific input name
tensorName(string) string
// specialTokenTypes returns any special token types the model uses // specialTokenTypes returns any special token types the model uses
specialTokenTypes() []string specialTokenTypes() []string
// writeFile writes the model to the provided io.WriteSeeker
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
} }
@ -97,6 +100,8 @@ func Convert(fsys fs.FS, ws io.WriteSeeker) error {
conv = &mixtral{} conv = &mixtral{}
case "GemmaForCausalLM": case "GemmaForCausalLM":
conv = &gemma{} conv = &gemma{}
case "Gemma2ForCausalLM":
conv = &gemma2{}
case "Phi3ForCausalLM": case "Phi3ForCausalLM":
conv = &phi3{} conv = &phi3{}
case "BertModel": case "BertModel":
@ -131,7 +136,7 @@ func Convert(fsys fs.FS, ws io.WriteSeeker) error {
slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens)) slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
} }
ts, err := parseTensors(fsys) ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
if err != nil { if err != nil {
return err return err
} }

View file

@ -144,9 +144,8 @@ func (p *bert) Tensors(ts []Tensor) []llm.Tensor {
continue continue
} }
name := p.tensorName(t.Name())
out = append(out, llm.Tensor{ out = append(out, llm.Tensor{
Name: name, Name: t.Name(),
Kind: t.Kind(), Kind: t.Kind(),
Shape: t.Shape(), Shape: t.Shape(),
WriterTo: t, WriterTo: t,
@ -156,8 +155,8 @@ func (p *bert) Tensors(ts []Tensor) []llm.Tensor {
return out return out
} }
func (bert) tensorName(n string) string { func (bert) Replacements() []string {
return strings.NewReplacer( return []string{
"encoder.layer", "blk", "encoder.layer", "blk",
"encoder.layers", "blk", "encoder.layers", "blk",
"embeddings.word_embeddings", "token_embd", "embeddings.word_embeddings", "token_embd",
@ -172,5 +171,5 @@ func (bert) tensorName(n string) string {
"intermediate.dense", "ffn_up", "intermediate.dense", "ffn_up",
"output.dense", "ffn_down", "output.dense", "ffn_down",
"output.LayerNorm", "layer_output_norm", "output.LayerNorm", "layer_output_norm",
).Replace(n) }
} }

View file

@ -44,15 +44,14 @@ func (p *gemma) KV(t *Tokenizer) llm.KV {
} }
func (p *gemma) Tensors(ts []Tensor) []llm.Tensor { func (p *gemma) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor out := make([]llm.Tensor, 0, len(ts))
for _, t := range ts { for _, t := range ts {
name := p.tensorName(t.Name()) if strings.HasSuffix(t.Name(), "_norm.weight") {
if strings.HasSuffix(name, "_norm.weight") {
t.SetRepacker(p.addOne) t.SetRepacker(p.addOne)
} }
out = append(out, llm.Tensor{ out = append(out, llm.Tensor{
Name: name, Name: t.Name(),
Kind: t.Kind(), Kind: t.Kind(),
Shape: t.Shape(), Shape: t.Shape(),
WriterTo: t, WriterTo: t,
@ -62,8 +61,8 @@ func (p *gemma) Tensors(ts []Tensor) []llm.Tensor {
return out return out
} }
func (p *gemma) tensorName(n string) string { func (p *gemma) Replacements() []string {
return strings.NewReplacer( return []string{
"model.embed_tokens", "token_embd", "model.embed_tokens", "token_embd",
"model.norm", "output_norm", "model.norm", "output_norm",
"model.layers", "blk", "model.layers", "blk",
@ -76,8 +75,7 @@ func (p *gemma) tensorName(n string) string {
"mlp.down_proj", "ffn_down", "mlp.down_proj", "ffn_down",
"mlp.up_proj", "ffn_up", "mlp.up_proj", "ffn_up",
"post_attention_layernorm", "ffn_norm", "post_attention_layernorm", "ffn_norm",
"block_sparse_moe.gate", "ffn_inp", }
).Replace(n)
} }
func (*gemma) addOne(_ string, data []float32, shape []uint64) ([]float32, error) { func (*gemma) addOne(_ string, data []float32, shape []uint64) ([]float32, error) {

44
convert/convert_gemma2.go Normal file
View file

@ -0,0 +1,44 @@
package convert
import (
"github.com/ollama/ollama/llm"
)
type gemma2 struct {
gemma
SlidingWindow uint32 `json:"sliding_window"`
AttentionLogitSoftcap float32 `json:"attn_logit_softcapping"`
FinalLogitSoftcap float32 `json:"final_logit_softcapping"`
}
func (p *gemma2) KV(t *Tokenizer) llm.KV {
kv := p.Parameters.KV(t)
kv["general.architecture"] = "gemma2"
kv["general.name"] = "gemma2"
kv["gemma2.context_length"] = p.MaxPositionEmbeddings
kv["gemma2.embedding_length"] = p.HiddenSize
kv["gemma2.block_count"] = p.HiddenLayers
kv["gemma2.feed_forward_length"] = p.IntermediateSize
kv["gemma2.attention.head_count"] = p.NumAttentionHeads
kv["gemma2.attention.head_count_kv"] = p.NumKeyValueHeads
kv["gemma2.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
kv["gemma2.attention.key_length"] = p.HeadDim
kv["gemma2.attention.value_length"] = p.HeadDim
kv["gemma2.attention.sliding_window"] = p.SlidingWindow
kv["gemma2.attn_logit_softcapping"] = p.AttentionLogitSoftcap
kv["gemma2.final_logit_softcapping"] = p.FinalLogitSoftcap
kv["tokenizer.ggml.eot_token_id"] = uint32(107)
kv["tokenizer.ggml.middle_token_id"] = uint32(68)
kv["tokenizer.ggml.prefix_token_id"] = uint32(67)
kv["tokenizer.ggml.suffix_token_id"] = uint32(69)
return kv
}
func (p *gemma2) Replacements() []string {
return append(
p.gemma.Replacements(),
"post_attention_layernorm", "post_attention_norm",
"pre_feedforward_layernorm", "ffn_norm",
"post_feedforward_layernorm", "post_ffw_norm",
)
}

View file

@ -96,14 +96,13 @@ func (p *llama) KV(t *Tokenizer) llm.KV {
func (p *llama) Tensors(ts []Tensor) []llm.Tensor { func (p *llama) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor var out []llm.Tensor
for _, t := range ts { for _, t := range ts {
name := p.tensorName(t.Name()) if strings.HasSuffix(t.Name(), "attn_q.weight") ||
if strings.HasSuffix(name, "attn_q.weight") || strings.HasSuffix(t.Name(), "attn_k.weight") {
strings.HasSuffix(name, "attn_k.weight") {
t.SetRepacker(p.repack) t.SetRepacker(p.repack)
} }
out = append(out, llm.Tensor{ out = append(out, llm.Tensor{
Name: name, Name: t.Name(),
Kind: t.Kind(), Kind: t.Kind(),
Shape: t.Shape(), Shape: t.Shape(),
WriterTo: t, WriterTo: t,
@ -113,8 +112,8 @@ func (p *llama) Tensors(ts []Tensor) []llm.Tensor {
return out return out
} }
func (p *llama) tensorName(n string) string { func (p *llama) Replacements() []string {
return strings.NewReplacer( return []string{
"lm_head", "output", "lm_head", "output",
"model.embed_tokens", "token_embd", "model.embed_tokens", "token_embd",
"model.norm", "output_norm", "model.norm", "output_norm",
@ -128,9 +127,7 @@ func (p *llama) tensorName(n string) string {
"mlp.down_proj", "ffn_down", "mlp.down_proj", "ffn_down",
"mlp.up_proj", "ffn_up", "mlp.up_proj", "ffn_up",
"post_attention_layernorm", "ffn_norm", "post_attention_layernorm", "ffn_norm",
// mixtral }
"block_sparse_moe.gate", "ffn_gate_inp",
).Replace(n)
} }
func (p *llama) repack(name string, data []float32, shape []uint64) ([]float32, error) { func (p *llama) repack(name string, data []float32, shape []uint64) ([]float32, error) {
@ -140,9 +137,9 @@ func (p *llama) repack(name string, data []float32, shape []uint64) ([]float32,
} }
var heads uint32 var heads uint32
if strings.HasSuffix(name, "q_proj.weight") { if strings.HasSuffix(name, "attn_q.weight") {
heads = p.NumAttentionHeads heads = p.NumAttentionHeads
} else if strings.HasSuffix(name, "k_proj.weight") { } else if strings.HasSuffix(name, "attn_k.weight") {
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads) heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
} else { } else {
return nil, fmt.Errorf("unknown tensor for repack: %s", name) return nil, fmt.Errorf("unknown tensor for repack: %s", name)

View file

@ -15,8 +15,6 @@ type mixtral struct {
NumExpertsPerToken uint32 `json:"num_experts_per_tok"` NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
} }
var _ Converter = (*mixtral)(nil)
func (p *mixtral) KV(t *Tokenizer) llm.KV { func (p *mixtral) KV(t *Tokenizer) llm.KV {
kv := p.llama.KV(t) kv := p.llama.KV(t)
@ -72,6 +70,13 @@ func (p *mixtral) Tensors(ts []Tensor) []llm.Tensor {
return append(out, p.llama.Tensors(ts)...) return append(out, p.llama.Tensors(ts)...)
} }
func (p *mixtral) Replacements() []string {
return append(
p.llama.Replacements(),
"block_sparse_moe.gate", "ffn_gate_inp",
)
}
type experts []Tensor type experts []Tensor
func (e experts) WriteTo(w io.Writer) (int64, error) { func (e experts) WriteTo(w io.Writer) (int64, error) {

View file

@ -74,8 +74,7 @@ func (p *phi3) Tensors(ts []Tensor) []llm.Tensor {
out := make([]llm.Tensor, 0, len(ts)+2) out := make([]llm.Tensor, 0, len(ts)+2)
for _, t := range ts { for _, t := range ts {
name := p.tensorName(t.Name()) if strings.HasPrefix(t.Name(), "blk.0.") {
if strings.HasPrefix(name, "blk.0.") {
addRopeFactors.Do(func() { addRopeFactors.Do(func() {
out = append(out, llm.Tensor{ out = append(out, llm.Tensor{
Name: "rope_factors_long.weight", Name: "rope_factors_long.weight",
@ -92,7 +91,7 @@ func (p *phi3) Tensors(ts []Tensor) []llm.Tensor {
} }
out = append(out, llm.Tensor{ out = append(out, llm.Tensor{
Name: name, Name: t.Name(),
Kind: t.Kind(), Kind: t.Kind(),
Shape: t.Shape(), Shape: t.Shape(),
WriterTo: t, WriterTo: t,
@ -102,8 +101,8 @@ func (p *phi3) Tensors(ts []Tensor) []llm.Tensor {
return out return out
} }
func (p *phi3) tensorName(n string) string { func (p *phi3) Replacements() []string {
return strings.NewReplacer( return []string{
"lm_head", "output", "lm_head", "output",
"model.embed_tokens", "token_embd", "model.embed_tokens", "token_embd",
"model.norm", "output_norm", "model.norm", "output_norm",
@ -114,7 +113,7 @@ func (p *phi3) tensorName(n string) string {
"mlp.down_proj", "ffn_down", "mlp.down_proj", "ffn_down",
"mlp.gate_up_proj", "ffn_up", "mlp.gate_up_proj", "ffn_up",
"post_attention_layernorm", "ffn_norm", "post_attention_layernorm", "ffn_norm",
).Replace(n) }
} }
type ropeFactor []float32 type ropeFactor []float32

View file

@ -68,6 +68,7 @@ func TestConvertFull(t *testing.T) {
// microsoft/Phi-3-mini-128-instruct@d548c233192db00165d842bf8edff054bb3212f8 // microsoft/Phi-3-mini-128-instruct@d548c233192db00165d842bf8edff054bb3212f8
"Phi-3-mini-128k-instruct", "Phi-3-mini-128k-instruct",
"all-MiniLM-L6-v2", "all-MiniLM-L6-v2",
"gemma-2-9b-it",
} }
for i := range cases { for i := range cases {

View file

@ -35,9 +35,9 @@ const (
) )
func (t tensorBase) Kind() uint32 { func (t tensorBase) Kind() uint32 {
if strings.HasSuffix(t.name, ".block_sparse_moe.gate.weight") { if strings.HasSuffix(t.name, ".ffn_gate_inp.weight") ||
return 0 t.name == "token_types.weight" {
} else if t.name == "embeddings.token_type_embeddings.weight" { // these tensors are always F32
return 0 return 0
} }
@ -57,10 +57,10 @@ func (t *tensorBase) SetRepacker(fn repacker) {
type repacker func(string, []float32, []uint64) ([]float32, error) type repacker func(string, []float32, []uint64) ([]float32, error)
func parseTensors(fsys fs.FS) ([]Tensor, error) { func parseTensors(fsys fs.FS, replacer *strings.Replacer) ([]Tensor, error) {
patterns := []struct { patterns := []struct {
Pattern string Pattern string
Func func(fs.FS, ...string) ([]Tensor, error) Func func(fs.FS, *strings.Replacer, ...string) ([]Tensor, error)
}{ }{
{"model-*-of-*.safetensors", parseSafetensors}, {"model-*-of-*.safetensors", parseSafetensors},
{"model.safetensors", parseSafetensors}, {"model.safetensors", parseSafetensors},
@ -76,7 +76,7 @@ func parseTensors(fsys fs.FS) ([]Tensor, error) {
} }
if len(matches) > 0 { if len(matches) > 0 {
return pattern.Func(fsys, matches...) return pattern.Func(fsys, replacer, matches...)
} }
} }

View file

@ -8,6 +8,7 @@ import (
"io" "io"
"io/fs" "io/fs"
"slices" "slices"
"strings"
"github.com/d4l3k/go-bfloat16" "github.com/d4l3k/go-bfloat16"
"github.com/x448/float16" "github.com/x448/float16"
@ -20,7 +21,7 @@ type safetensorMetadata struct {
Offsets []int64 `json:"data_offsets"` Offsets []int64 `json:"data_offsets"`
} }
func parseSafetensors(fsys fs.FS, ps ...string) ([]Tensor, error) { func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) {
var ts []Tensor var ts []Tensor
for _, p := range ps { for _, p := range ps {
f, err := fsys.Open(p) f, err := fsys.Open(p)
@ -56,7 +57,7 @@ func parseSafetensors(fsys fs.FS, ps ...string) ([]Tensor, error) {
offset: safetensorsPad(n, value.Offsets[0]), offset: safetensorsPad(n, value.Offsets[0]),
size: safetensorsPad(n, value.Offsets[1]) - safetensorsPad(n, value.Offsets[0]), size: safetensorsPad(n, value.Offsets[1]) - safetensorsPad(n, value.Offsets[0]),
tensorBase: &tensorBase{ tensorBase: &tensorBase{
name: key, name: replacer.Replace(key),
shape: value.Shape, shape: value.Shape,
}, },
}) })

View file

@ -3,12 +3,13 @@ package convert
import ( import (
"io" "io"
"io/fs" "io/fs"
"strings"
"github.com/nlpodyssey/gopickle/pytorch" "github.com/nlpodyssey/gopickle/pytorch"
"github.com/nlpodyssey/gopickle/types" "github.com/nlpodyssey/gopickle/types"
) )
func parseTorch(fsys fs.FS, ps ...string) ([]Tensor, error) { func parseTorch(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) {
var ts []Tensor var ts []Tensor
for _, p := range ps { for _, p := range ps {
pt, err := pytorch.Load(p) pt, err := pytorch.Load(p)
@ -27,7 +28,7 @@ func parseTorch(fsys fs.FS, ps ...string) ([]Tensor, error) {
ts = append(ts, torch{ ts = append(ts, torch{
storage: t.(*pytorch.Tensor).Source, storage: t.(*pytorch.Tensor).Source,
tensorBase: &tensorBase{ tensorBase: &tensorBase{
name: k.(string), name: replacer.Replace(k.(string)),
shape: shape, shape: shape,
}, },
}) })

6
convert/testdata/gemma-2-9b-it.json vendored Normal file
View file

@ -0,0 +1,6 @@
{
"general.architecture": "gemma2",
"gemma2.attention.sliding_window": "4096",
"gemma2.attn_logit_softcapping": "50",
"gemma2.final_logit_softcapping": "30"
}

View file

@ -15,6 +15,11 @@ import (
) )
func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) { func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
ast, err := parseAdditionalSpecialTokens(fsys)
if err != nil {
return nil, err
}
bts, err := fs.ReadFile(fsys, "tokenizer.model") bts, err := fs.ReadFile(fsys, "tokenizer.model")
if err != nil { if err != nil {
return nil, err return nil, err
@ -37,7 +42,12 @@ func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
sentencepiece.ModelProto_SentencePiece_BYTE: sentencepiece.ModelProto_SentencePiece_BYTE:
v.Types = append(v.Types, int32(t)) v.Types = append(v.Types, int32(t))
default: default:
v.Types = append(v.Types, int32(sentencepiece.ModelProto_SentencePiece_NORMAL)) tt := int32(sentencepiece.ModelProto_SentencePiece_NORMAL)
if slices.Contains(ast, piece.GetPiece()) {
tt = int32(sentencepiece.ModelProto_SentencePiece_CONTROL)
}
v.Types = append(v.Types, tt)
} }
} }
@ -81,3 +91,23 @@ func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
return &v, nil return &v, nil
} }
func parseAdditionalSpecialTokens(fsys fs.FS) ([]string, error) {
f, err := fsys.Open("special_tokens_map.json")
if errors.Is(err, os.ErrNotExist) {
return nil, nil
} else if err != nil {
return nil, err
}
defer f.Close()
var m struct {
AdditionalSpecialTokens []string `json:"additional_special_tokens"`
}
if err := json.NewDecoder(f).Decode(&m); err != nil {
return nil, err
}
return m.AdditionalSpecialTokens, nil
}