convert gemma2
This commit is contained in:
parent
beb49eef65
commit
3546bbd08c
13 changed files with 132 additions and 46 deletions
|
@ -7,6 +7,7 @@ import (
|
||||||
"io"
|
"io"
|
||||||
"io/fs"
|
"io/fs"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
@ -58,11 +59,13 @@ type Converter interface {
|
||||||
KV(*Tokenizer) llm.KV
|
KV(*Tokenizer) llm.KV
|
||||||
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
|
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
|
||||||
Tensors([]Tensor) []llm.Tensor
|
Tensors([]Tensor) []llm.Tensor
|
||||||
|
// Replacements returns a list of string pairs to replace in tensor names.
|
||||||
|
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
||||||
|
Replacements() []string
|
||||||
|
|
||||||
// tensorName returns the LLM tensor name for a specific input name
|
|
||||||
tensorName(string) string
|
|
||||||
// specialTokenTypes returns any special token types the model uses
|
// specialTokenTypes returns any special token types the model uses
|
||||||
specialTokenTypes() []string
|
specialTokenTypes() []string
|
||||||
|
// writeFile writes the model to the provided io.WriteSeeker
|
||||||
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
|
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -97,6 +100,8 @@ func Convert(fsys fs.FS, ws io.WriteSeeker) error {
|
||||||
conv = &mixtral{}
|
conv = &mixtral{}
|
||||||
case "GemmaForCausalLM":
|
case "GemmaForCausalLM":
|
||||||
conv = &gemma{}
|
conv = &gemma{}
|
||||||
|
case "Gemma2ForCausalLM":
|
||||||
|
conv = &gemma2{}
|
||||||
case "Phi3ForCausalLM":
|
case "Phi3ForCausalLM":
|
||||||
conv = &phi3{}
|
conv = &phi3{}
|
||||||
case "BertModel":
|
case "BertModel":
|
||||||
|
@ -131,7 +136,7 @@ func Convert(fsys fs.FS, ws io.WriteSeeker) error {
|
||||||
slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
|
slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
|
||||||
}
|
}
|
||||||
|
|
||||||
ts, err := parseTensors(fsys)
|
ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -144,9 +144,8 @@ func (p *bert) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
name := p.tensorName(t.Name())
|
|
||||||
out = append(out, llm.Tensor{
|
out = append(out, llm.Tensor{
|
||||||
Name: name,
|
Name: t.Name(),
|
||||||
Kind: t.Kind(),
|
Kind: t.Kind(),
|
||||||
Shape: t.Shape(),
|
Shape: t.Shape(),
|
||||||
WriterTo: t,
|
WriterTo: t,
|
||||||
|
@ -156,8 +155,8 @@ func (p *bert) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
func (bert) tensorName(n string) string {
|
func (bert) Replacements() []string {
|
||||||
return strings.NewReplacer(
|
return []string{
|
||||||
"encoder.layer", "blk",
|
"encoder.layer", "blk",
|
||||||
"encoder.layers", "blk",
|
"encoder.layers", "blk",
|
||||||
"embeddings.word_embeddings", "token_embd",
|
"embeddings.word_embeddings", "token_embd",
|
||||||
|
@ -172,5 +171,5 @@ func (bert) tensorName(n string) string {
|
||||||
"intermediate.dense", "ffn_up",
|
"intermediate.dense", "ffn_up",
|
||||||
"output.dense", "ffn_down",
|
"output.dense", "ffn_down",
|
||||||
"output.LayerNorm", "layer_output_norm",
|
"output.LayerNorm", "layer_output_norm",
|
||||||
).Replace(n)
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -44,15 +44,14 @@ func (p *gemma) KV(t *Tokenizer) llm.KV {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *gemma) Tensors(ts []Tensor) []llm.Tensor {
|
func (p *gemma) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
var out []llm.Tensor
|
out := make([]llm.Tensor, 0, len(ts))
|
||||||
for _, t := range ts {
|
for _, t := range ts {
|
||||||
name := p.tensorName(t.Name())
|
if strings.HasSuffix(t.Name(), "_norm.weight") {
|
||||||
if strings.HasSuffix(name, "_norm.weight") {
|
|
||||||
t.SetRepacker(p.addOne)
|
t.SetRepacker(p.addOne)
|
||||||
}
|
}
|
||||||
|
|
||||||
out = append(out, llm.Tensor{
|
out = append(out, llm.Tensor{
|
||||||
Name: name,
|
Name: t.Name(),
|
||||||
Kind: t.Kind(),
|
Kind: t.Kind(),
|
||||||
Shape: t.Shape(),
|
Shape: t.Shape(),
|
||||||
WriterTo: t,
|
WriterTo: t,
|
||||||
|
@ -62,8 +61,8 @@ func (p *gemma) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *gemma) tensorName(n string) string {
|
func (p *gemma) Replacements() []string {
|
||||||
return strings.NewReplacer(
|
return []string{
|
||||||
"model.embed_tokens", "token_embd",
|
"model.embed_tokens", "token_embd",
|
||||||
"model.norm", "output_norm",
|
"model.norm", "output_norm",
|
||||||
"model.layers", "blk",
|
"model.layers", "blk",
|
||||||
|
@ -76,8 +75,7 @@ func (p *gemma) tensorName(n string) string {
|
||||||
"mlp.down_proj", "ffn_down",
|
"mlp.down_proj", "ffn_down",
|
||||||
"mlp.up_proj", "ffn_up",
|
"mlp.up_proj", "ffn_up",
|
||||||
"post_attention_layernorm", "ffn_norm",
|
"post_attention_layernorm", "ffn_norm",
|
||||||
"block_sparse_moe.gate", "ffn_inp",
|
}
|
||||||
).Replace(n)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (*gemma) addOne(_ string, data []float32, shape []uint64) ([]float32, error) {
|
func (*gemma) addOne(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||||
|
|
44
convert/convert_gemma2.go
Normal file
44
convert/convert_gemma2.go
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
type gemma2 struct {
|
||||||
|
gemma
|
||||||
|
SlidingWindow uint32 `json:"sliding_window"`
|
||||||
|
AttentionLogitSoftcap float32 `json:"attn_logit_softcapping"`
|
||||||
|
FinalLogitSoftcap float32 `json:"final_logit_softcapping"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *gemma2) KV(t *Tokenizer) llm.KV {
|
||||||
|
kv := p.Parameters.KV(t)
|
||||||
|
kv["general.architecture"] = "gemma2"
|
||||||
|
kv["general.name"] = "gemma2"
|
||||||
|
kv["gemma2.context_length"] = p.MaxPositionEmbeddings
|
||||||
|
kv["gemma2.embedding_length"] = p.HiddenSize
|
||||||
|
kv["gemma2.block_count"] = p.HiddenLayers
|
||||||
|
kv["gemma2.feed_forward_length"] = p.IntermediateSize
|
||||||
|
kv["gemma2.attention.head_count"] = p.NumAttentionHeads
|
||||||
|
kv["gemma2.attention.head_count_kv"] = p.NumKeyValueHeads
|
||||||
|
kv["gemma2.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
||||||
|
kv["gemma2.attention.key_length"] = p.HeadDim
|
||||||
|
kv["gemma2.attention.value_length"] = p.HeadDim
|
||||||
|
kv["gemma2.attention.sliding_window"] = p.SlidingWindow
|
||||||
|
kv["gemma2.attn_logit_softcapping"] = p.AttentionLogitSoftcap
|
||||||
|
kv["gemma2.final_logit_softcapping"] = p.FinalLogitSoftcap
|
||||||
|
kv["tokenizer.ggml.eot_token_id"] = uint32(107)
|
||||||
|
kv["tokenizer.ggml.middle_token_id"] = uint32(68)
|
||||||
|
kv["tokenizer.ggml.prefix_token_id"] = uint32(67)
|
||||||
|
kv["tokenizer.ggml.suffix_token_id"] = uint32(69)
|
||||||
|
return kv
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *gemma2) Replacements() []string {
|
||||||
|
return append(
|
||||||
|
p.gemma.Replacements(),
|
||||||
|
"post_attention_layernorm", "post_attention_norm",
|
||||||
|
"pre_feedforward_layernorm", "ffn_norm",
|
||||||
|
"post_feedforward_layernorm", "post_ffw_norm",
|
||||||
|
)
|
||||||
|
}
|
|
@ -96,14 +96,13 @@ func (p *llama) KV(t *Tokenizer) llm.KV {
|
||||||
func (p *llama) Tensors(ts []Tensor) []llm.Tensor {
|
func (p *llama) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
var out []llm.Tensor
|
var out []llm.Tensor
|
||||||
for _, t := range ts {
|
for _, t := range ts {
|
||||||
name := p.tensorName(t.Name())
|
if strings.HasSuffix(t.Name(), "attn_q.weight") ||
|
||||||
if strings.HasSuffix(name, "attn_q.weight") ||
|
strings.HasSuffix(t.Name(), "attn_k.weight") {
|
||||||
strings.HasSuffix(name, "attn_k.weight") {
|
|
||||||
t.SetRepacker(p.repack)
|
t.SetRepacker(p.repack)
|
||||||
}
|
}
|
||||||
|
|
||||||
out = append(out, llm.Tensor{
|
out = append(out, llm.Tensor{
|
||||||
Name: name,
|
Name: t.Name(),
|
||||||
Kind: t.Kind(),
|
Kind: t.Kind(),
|
||||||
Shape: t.Shape(),
|
Shape: t.Shape(),
|
||||||
WriterTo: t,
|
WriterTo: t,
|
||||||
|
@ -113,8 +112,8 @@ func (p *llama) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *llama) tensorName(n string) string {
|
func (p *llama) Replacements() []string {
|
||||||
return strings.NewReplacer(
|
return []string{
|
||||||
"lm_head", "output",
|
"lm_head", "output",
|
||||||
"model.embed_tokens", "token_embd",
|
"model.embed_tokens", "token_embd",
|
||||||
"model.norm", "output_norm",
|
"model.norm", "output_norm",
|
||||||
|
@ -128,9 +127,7 @@ func (p *llama) tensorName(n string) string {
|
||||||
"mlp.down_proj", "ffn_down",
|
"mlp.down_proj", "ffn_down",
|
||||||
"mlp.up_proj", "ffn_up",
|
"mlp.up_proj", "ffn_up",
|
||||||
"post_attention_layernorm", "ffn_norm",
|
"post_attention_layernorm", "ffn_norm",
|
||||||
// mixtral
|
}
|
||||||
"block_sparse_moe.gate", "ffn_gate_inp",
|
|
||||||
).Replace(n)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *llama) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
func (p *llama) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||||
|
@ -140,9 +137,9 @@ func (p *llama) repack(name string, data []float32, shape []uint64) ([]float32,
|
||||||
}
|
}
|
||||||
|
|
||||||
var heads uint32
|
var heads uint32
|
||||||
if strings.HasSuffix(name, "q_proj.weight") {
|
if strings.HasSuffix(name, "attn_q.weight") {
|
||||||
heads = p.NumAttentionHeads
|
heads = p.NumAttentionHeads
|
||||||
} else if strings.HasSuffix(name, "k_proj.weight") {
|
} else if strings.HasSuffix(name, "attn_k.weight") {
|
||||||
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
|
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
|
||||||
} else {
|
} else {
|
||||||
return nil, fmt.Errorf("unknown tensor for repack: %s", name)
|
return nil, fmt.Errorf("unknown tensor for repack: %s", name)
|
||||||
|
|
|
@ -15,8 +15,6 @@ type mixtral struct {
|
||||||
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
|
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
|
||||||
}
|
}
|
||||||
|
|
||||||
var _ Converter = (*mixtral)(nil)
|
|
||||||
|
|
||||||
func (p *mixtral) KV(t *Tokenizer) llm.KV {
|
func (p *mixtral) KV(t *Tokenizer) llm.KV {
|
||||||
kv := p.llama.KV(t)
|
kv := p.llama.KV(t)
|
||||||
|
|
||||||
|
@ -72,6 +70,13 @@ func (p *mixtral) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
return append(out, p.llama.Tensors(ts)...)
|
return append(out, p.llama.Tensors(ts)...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *mixtral) Replacements() []string {
|
||||||
|
return append(
|
||||||
|
p.llama.Replacements(),
|
||||||
|
"block_sparse_moe.gate", "ffn_gate_inp",
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
type experts []Tensor
|
type experts []Tensor
|
||||||
|
|
||||||
func (e experts) WriteTo(w io.Writer) (int64, error) {
|
func (e experts) WriteTo(w io.Writer) (int64, error) {
|
||||||
|
|
|
@ -74,8 +74,7 @@ func (p *phi3) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
|
|
||||||
out := make([]llm.Tensor, 0, len(ts)+2)
|
out := make([]llm.Tensor, 0, len(ts)+2)
|
||||||
for _, t := range ts {
|
for _, t := range ts {
|
||||||
name := p.tensorName(t.Name())
|
if strings.HasPrefix(t.Name(), "blk.0.") {
|
||||||
if strings.HasPrefix(name, "blk.0.") {
|
|
||||||
addRopeFactors.Do(func() {
|
addRopeFactors.Do(func() {
|
||||||
out = append(out, llm.Tensor{
|
out = append(out, llm.Tensor{
|
||||||
Name: "rope_factors_long.weight",
|
Name: "rope_factors_long.weight",
|
||||||
|
@ -92,7 +91,7 @@ func (p *phi3) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
}
|
}
|
||||||
|
|
||||||
out = append(out, llm.Tensor{
|
out = append(out, llm.Tensor{
|
||||||
Name: name,
|
Name: t.Name(),
|
||||||
Kind: t.Kind(),
|
Kind: t.Kind(),
|
||||||
Shape: t.Shape(),
|
Shape: t.Shape(),
|
||||||
WriterTo: t,
|
WriterTo: t,
|
||||||
|
@ -102,8 +101,8 @@ func (p *phi3) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *phi3) tensorName(n string) string {
|
func (p *phi3) Replacements() []string {
|
||||||
return strings.NewReplacer(
|
return []string{
|
||||||
"lm_head", "output",
|
"lm_head", "output",
|
||||||
"model.embed_tokens", "token_embd",
|
"model.embed_tokens", "token_embd",
|
||||||
"model.norm", "output_norm",
|
"model.norm", "output_norm",
|
||||||
|
@ -114,7 +113,7 @@ func (p *phi3) tensorName(n string) string {
|
||||||
"mlp.down_proj", "ffn_down",
|
"mlp.down_proj", "ffn_down",
|
||||||
"mlp.gate_up_proj", "ffn_up",
|
"mlp.gate_up_proj", "ffn_up",
|
||||||
"post_attention_layernorm", "ffn_norm",
|
"post_attention_layernorm", "ffn_norm",
|
||||||
).Replace(n)
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type ropeFactor []float32
|
type ropeFactor []float32
|
||||||
|
|
|
@ -68,6 +68,7 @@ func TestConvertFull(t *testing.T) {
|
||||||
// microsoft/Phi-3-mini-128-instruct@d548c233192db00165d842bf8edff054bb3212f8
|
// microsoft/Phi-3-mini-128-instruct@d548c233192db00165d842bf8edff054bb3212f8
|
||||||
"Phi-3-mini-128k-instruct",
|
"Phi-3-mini-128k-instruct",
|
||||||
"all-MiniLM-L6-v2",
|
"all-MiniLM-L6-v2",
|
||||||
|
"gemma-2-9b-it",
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := range cases {
|
for i := range cases {
|
||||||
|
|
|
@ -35,9 +35,9 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
func (t tensorBase) Kind() uint32 {
|
func (t tensorBase) Kind() uint32 {
|
||||||
if strings.HasSuffix(t.name, ".block_sparse_moe.gate.weight") {
|
if strings.HasSuffix(t.name, ".ffn_gate_inp.weight") ||
|
||||||
return 0
|
t.name == "token_types.weight" {
|
||||||
} else if t.name == "embeddings.token_type_embeddings.weight" {
|
// these tensors are always F32
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -57,10 +57,10 @@ func (t *tensorBase) SetRepacker(fn repacker) {
|
||||||
|
|
||||||
type repacker func(string, []float32, []uint64) ([]float32, error)
|
type repacker func(string, []float32, []uint64) ([]float32, error)
|
||||||
|
|
||||||
func parseTensors(fsys fs.FS) ([]Tensor, error) {
|
func parseTensors(fsys fs.FS, replacer *strings.Replacer) ([]Tensor, error) {
|
||||||
patterns := []struct {
|
patterns := []struct {
|
||||||
Pattern string
|
Pattern string
|
||||||
Func func(fs.FS, ...string) ([]Tensor, error)
|
Func func(fs.FS, *strings.Replacer, ...string) ([]Tensor, error)
|
||||||
}{
|
}{
|
||||||
{"model-*-of-*.safetensors", parseSafetensors},
|
{"model-*-of-*.safetensors", parseSafetensors},
|
||||||
{"model.safetensors", parseSafetensors},
|
{"model.safetensors", parseSafetensors},
|
||||||
|
@ -76,7 +76,7 @@ func parseTensors(fsys fs.FS) ([]Tensor, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(matches) > 0 {
|
if len(matches) > 0 {
|
||||||
return pattern.Func(fsys, matches...)
|
return pattern.Func(fsys, replacer, matches...)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,7 @@ import (
|
||||||
"io"
|
"io"
|
||||||
"io/fs"
|
"io/fs"
|
||||||
"slices"
|
"slices"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/d4l3k/go-bfloat16"
|
"github.com/d4l3k/go-bfloat16"
|
||||||
"github.com/x448/float16"
|
"github.com/x448/float16"
|
||||||
|
@ -20,7 +21,7 @@ type safetensorMetadata struct {
|
||||||
Offsets []int64 `json:"data_offsets"`
|
Offsets []int64 `json:"data_offsets"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseSafetensors(fsys fs.FS, ps ...string) ([]Tensor, error) {
|
func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) {
|
||||||
var ts []Tensor
|
var ts []Tensor
|
||||||
for _, p := range ps {
|
for _, p := range ps {
|
||||||
f, err := fsys.Open(p)
|
f, err := fsys.Open(p)
|
||||||
|
@ -56,7 +57,7 @@ func parseSafetensors(fsys fs.FS, ps ...string) ([]Tensor, error) {
|
||||||
offset: safetensorsPad(n, value.Offsets[0]),
|
offset: safetensorsPad(n, value.Offsets[0]),
|
||||||
size: safetensorsPad(n, value.Offsets[1]) - safetensorsPad(n, value.Offsets[0]),
|
size: safetensorsPad(n, value.Offsets[1]) - safetensorsPad(n, value.Offsets[0]),
|
||||||
tensorBase: &tensorBase{
|
tensorBase: &tensorBase{
|
||||||
name: key,
|
name: replacer.Replace(key),
|
||||||
shape: value.Shape,
|
shape: value.Shape,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
|
@ -3,12 +3,13 @@ package convert
|
||||||
import (
|
import (
|
||||||
"io"
|
"io"
|
||||||
"io/fs"
|
"io/fs"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/nlpodyssey/gopickle/pytorch"
|
"github.com/nlpodyssey/gopickle/pytorch"
|
||||||
"github.com/nlpodyssey/gopickle/types"
|
"github.com/nlpodyssey/gopickle/types"
|
||||||
)
|
)
|
||||||
|
|
||||||
func parseTorch(fsys fs.FS, ps ...string) ([]Tensor, error) {
|
func parseTorch(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) {
|
||||||
var ts []Tensor
|
var ts []Tensor
|
||||||
for _, p := range ps {
|
for _, p := range ps {
|
||||||
pt, err := pytorch.Load(p)
|
pt, err := pytorch.Load(p)
|
||||||
|
@ -27,7 +28,7 @@ func parseTorch(fsys fs.FS, ps ...string) ([]Tensor, error) {
|
||||||
ts = append(ts, torch{
|
ts = append(ts, torch{
|
||||||
storage: t.(*pytorch.Tensor).Source,
|
storage: t.(*pytorch.Tensor).Source,
|
||||||
tensorBase: &tensorBase{
|
tensorBase: &tensorBase{
|
||||||
name: k.(string),
|
name: replacer.Replace(k.(string)),
|
||||||
shape: shape,
|
shape: shape,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
6
convert/testdata/gemma-2-9b-it.json
vendored
Normal file
6
convert/testdata/gemma-2-9b-it.json
vendored
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
{
|
||||||
|
"general.architecture": "gemma2",
|
||||||
|
"gemma2.attention.sliding_window": "4096",
|
||||||
|
"gemma2.attn_logit_softcapping": "50",
|
||||||
|
"gemma2.final_logit_softcapping": "30"
|
||||||
|
}
|
|
@ -15,6 +15,11 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
|
func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
|
||||||
|
ast, err := parseAdditionalSpecialTokens(fsys)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
bts, err := fs.ReadFile(fsys, "tokenizer.model")
|
bts, err := fs.ReadFile(fsys, "tokenizer.model")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -37,7 +42,12 @@ func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
|
||||||
sentencepiece.ModelProto_SentencePiece_BYTE:
|
sentencepiece.ModelProto_SentencePiece_BYTE:
|
||||||
v.Types = append(v.Types, int32(t))
|
v.Types = append(v.Types, int32(t))
|
||||||
default:
|
default:
|
||||||
v.Types = append(v.Types, int32(sentencepiece.ModelProto_SentencePiece_NORMAL))
|
tt := int32(sentencepiece.ModelProto_SentencePiece_NORMAL)
|
||||||
|
if slices.Contains(ast, piece.GetPiece()) {
|
||||||
|
tt = int32(sentencepiece.ModelProto_SentencePiece_CONTROL)
|
||||||
|
}
|
||||||
|
|
||||||
|
v.Types = append(v.Types, tt)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -81,3 +91,23 @@ func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
|
||||||
|
|
||||||
return &v, nil
|
return &v, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func parseAdditionalSpecialTokens(fsys fs.FS) ([]string, error) {
|
||||||
|
f, err := fsys.Open("special_tokens_map.json")
|
||||||
|
if errors.Is(err, os.ErrNotExist) {
|
||||||
|
return nil, nil
|
||||||
|
} else if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
var m struct {
|
||||||
|
AdditionalSpecialTokens []string `json:"additional_special_tokens"`
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.NewDecoder(f).Decode(&m); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return m.AdditionalSpecialTokens, nil
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue