ollama/convert/convert.go

package convert

import (
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"io/fs"
	"log/slog"
	"strings"

	"github.com/ollama/ollama/llm"
)

type ModelParameters struct {
	Architectures []string `json:"architectures"`
	VocabSize     uint32   `json:"vocab_size"`
}

type AdapterParameters struct {
	Alpha          uint32 `json:"lora_alpha"`
	LoraLayers     uint32 `json:"lora_layers"`
	LoraParameters struct {
		Rank  uint32  `json:"rank"`
		Alpha float32 `json:"alpha"`
		Scale float32 `json:"scale"`
	} `json:"lora_parameters"`
}

func (ModelParameters) KV(t *Tokenizer) llm.KV {
	kv := llm.KV{
		"general.file_type":            uint32(1),
		"general.quantization_version": uint32(2),
		"tokenizer.ggml.pre":           t.Pre,
		"tokenizer.ggml.model":         t.Vocabulary.Model,
		"tokenizer.ggml.tokens":        t.Vocabulary.Tokens,
		"tokenizer.ggml.scores":        t.Vocabulary.Scores,
		"tokenizer.ggml.token_type":    t.Vocabulary.Types,
	}

	if len(t.Merges) > 0 {
		kv["tokenizer.ggml.merges"] = t.Merges
	}

	if t.Template != "" {
		kv["tokenizer.chat_template"] = t.Template
	}

	for _, sv := range t.SpecialVocabulary {
		kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)
		kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken
	}

	return kv
}

func (p AdapterParameters) KV() llm.KV {
	var alpha float32
	if p.LoraParameters.Alpha == 0 {
		alpha = float32(p.Alpha)
	} else {
		alpha = p.LoraParameters.Alpha
	}

	kv := llm.KV{
		"adapter.lora.alpha": alpha,
		"adapter.type":       "lora",
		"general.file_type":  uint32(1),
		"general.type":       "adapter",
		"general.version":    "v0.2",
	}

	return kv
}

func (ModelParameters) specialTokenTypes() []string {
	return []string{
		"bos", "eos", "unk", "sep", "pad", "cls", "mask",
	}
}

func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
	return llm.WriteGGUF(ws, kv, ts)
}

func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
	return llm.WriteGGUF(ws, kv, ts)
}

type ModelConverter interface {
	// KV maps parameters to LLM key-values
	KV(*Tokenizer) llm.KV
	// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
	Tensors([]Tensor) []llm.Tensor
	// Replacements returns a list of string pairs to replace in tensor names.
	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
	Replacements() []string

	// specialTokenTypes returns any special token types the model uses
	specialTokenTypes() []string
	// writeFile writes the model to the provided io.WriteSeeker
	writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
}

type moreParser interface {
	parseMore(fs.FS) error
}

type AdapterConverter interface {
	// KV maps parameters to LLM key-values
	KV(llm.KV) llm.KV
	// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
	Tensors([]Tensor) []llm.Tensor
	// Replacements returns a list of string pairs to replace in tensor names.
	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
	Replacements() []string

	writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
}

func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
	bts, err := fs.ReadFile(fsys, "adapter_config.json")
	if err != nil {
		return err
	}

	var p AdapterParameters
	if err := json.Unmarshal(bts, &p); err != nil {
		return err
	}

	arch, ok := baseKV["general.architecture"]
	if !ok {
		return errors.New("architecture not set for the base model")
	}

	var conv AdapterConverter
	switch arch {
	case "llama":
		conv = &llamaAdapter{}
	case "gemma2":
		conv = &gemma2Adapter{}
	default:
		return errors.New("unsupported architecture")
	}

	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
	if err != nil {
		return err
	}

	if err := json.Unmarshal(bts, conv); err != nil {
		return err
	}

	return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))
}

// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
// and files it finds in the input path.
// Supported input model formats include safetensors.
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
	bts, err := fs.ReadFile(fsys, "config.json")
	if err != nil {
		return err
	}

	var p ModelParameters
	if err := json.Unmarshal(bts, &p); err != nil {
		return err
	}

	if len(p.Architectures) < 1 {
		return errors.New("unknown architecture")
	}

	var conv ModelConverter
	switch p.Architectures[0] {
	case "LlamaForCausalLM", "MistralForCausalLM":
		conv = &llamaModel{}
	case "MixtralForCausalLM":
		conv = &mixtralModel{}
	case "GemmaForCausalLM":
		conv = &gemmaModel{}
	case "Gemma2ForCausalLM":
		conv = &gemma2Model{}
	case "Phi3ForCausalLM":
		conv = &phi3Model{}
	case "BertModel":
		conv = &bertModel{}
	default:
		return errors.New("unsupported architecture")
	}

	if err := json.Unmarshal(bts, conv); err != nil {
		return err
	}

	if t, ok := conv.(moreParser); ok {
		if err := t.parseMore(fsys); err != nil {
			return err
		}
	}

	t, err := parseTokenizer(fsys, conv.specialTokenTypes())
	if err != nil {
		return err
	}

	if vocabSize := int(p.VocabSize); vocabSize > len(t.Vocabulary.Tokens) {
		slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", p.VocabSize, "actual", len(t.Vocabulary.Tokens))
		for i := range vocabSize - len(t.Vocabulary.Tokens) {
			t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
			t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
			t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
		}
	} else {
		slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
	}

	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
	if err != nil {
		return err
	}

	return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
}
Convert Safetensors to an Ollama model (#2824) 2024-03-07 05:01:51 +00:00			`package convert`

			`import (`
			`"encoding/json"`
refactor convert 2024-06-01 03:00:49 +00:00			`"errors"`
Convert Safetensors to an Ollama model (#2824) 2024-03-07 05:01:51 +00:00			`"fmt"`
quantize any fp16/fp32 model - FROM /path/to/{safetensors,pytorch} - FROM /path/to/fp{16,32}.bin - FROM model:fp{16,32} 2024-04-12 20:55:12 +00:00			`"io"`
convert: only extract large files 2024-06-29 23:53:59 +00:00			`"io/fs"`
Convert Safetensors to an Ollama model (#2824) 2024-03-07 05:01:51 +00:00			`"log/slog"`
convert gemma2 2024-06-28 20:27:05 +00:00			`"strings"`
Convert Safetensors to an Ollama model (#2824) 2024-03-07 05:01:51 +00:00
change `github.com/jmorganca/ollama` to `github.com/ollama/ollama` (#3347) 2024-03-26 20:04:17 +00:00			`"github.com/ollama/ollama/llm"`
Convert Safetensors to an Ollama model (#2824) 2024-03-07 05:01:51 +00:00			`)`

convert safetensor adapters into GGUF (#6327) 2024-08-23 18:29:56 +00:00			`type ModelParameters struct {`
refactor convert 2024-06-01 03:00:49 +00:00			Architectures []string `json:"architectures"`
			VocabSize uint32 `json:"vocab_size"`
Add gemma safetensors conversion (#3250) Co-authored-by: Michael Yang <mxyng@pm.me> 2024-03-29 01:54:01 +00:00			`}`

convert safetensor adapters into GGUF (#6327) 2024-08-23 18:29:56 +00:00			`type AdapterParameters struct {`
			Alpha uint32 `json:"lora_alpha"`
			LoraLayers uint32 `json:"lora_layers"`
			`LoraParameters struct {`
			Rank uint32 `json:"rank"`
			Alpha float32 `json:"alpha"`
			Scale float32 `json:"scale"`
			} `json:"lora_parameters"`
			`}`

			`func (ModelParameters) KV(t *Tokenizer) llm.KV {`
refactor convert 2024-06-01 03:00:49 +00:00			`kv := llm.KV{`
			`"general.file_type": uint32(1),`
			`"general.quantization_version": uint32(2),`
			`"tokenizer.ggml.pre": t.Pre,`
			`"tokenizer.ggml.model": t.Vocabulary.Model,`
			`"tokenizer.ggml.tokens": t.Vocabulary.Tokens,`
			`"tokenizer.ggml.scores": t.Vocabulary.Scores,`
			`"tokenizer.ggml.token_type": t.Vocabulary.Types,`
			`}`
Convert Safetensors to an Ollama model (#2824) 2024-03-07 05:01:51 +00:00
add conversion for microsoft phi 3 mini/medium 4k, 128 2024-06-03 22:53:58 +00:00			`if len(t.Merges) > 0 {`
			`kv["tokenizer.ggml.merges"] = t.Merges`
			`}`

refactor convert 2024-06-01 03:00:49 +00:00			`if t.Template != "" {`
			`kv["tokenizer.chat_template"] = t.Template`
			`}`
Simplify model conversion (#3422) 2024-04-01 23:14:53 +00:00
refactor convert 2024-06-01 03:00:49 +00:00			`for _, sv := range t.SpecialVocabulary {`
			`kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)`
			`kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken`
			`}`
Add llama2 / torch models for `ollama create` (#3607) 2024-04-15 18:26:42 +00:00
refactor convert 2024-06-01 03:00:49 +00:00			`return kv`
Simplify model conversion (#3422) 2024-04-01 23:14:53 +00:00			`}`

convert safetensor adapters into GGUF (#6327) 2024-08-23 18:29:56 +00:00			`func (p AdapterParameters) KV() llm.KV {`
			`var alpha float32`
			`if p.LoraParameters.Alpha == 0 {`
			`alpha = float32(p.Alpha)`
			`} else {`
			`alpha = p.LoraParameters.Alpha`
			`}`

			`kv := llm.KV{`
			`"adapter.lora.alpha": alpha,`
			`"adapter.type": "lora",`
			`"general.file_type": uint32(1),`
			`"general.type": "adapter",`
			`"general.version": "v0.2",`
			`}`

			`return kv`
			`}`

			`func (ModelParameters) specialTokenTypes() []string {`
refactor convert 2024-06-01 03:00:49 +00:00			`return []string{`
			`"bos", "eos", "unk", "sep", "pad", "cls", "mask",`
Convert Safetensors to an Ollama model (#2824) 2024-03-07 05:01:51 +00:00			`}`
refactor convert 2024-06-01 03:00:49 +00:00			`}`
Convert Safetensors to an Ollama model (#2824) 2024-03-07 05:01:51 +00:00
convert safetensor adapters into GGUF (#6327) 2024-08-23 18:29:56 +00:00			`func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {`
refactor convert 2024-06-01 03:00:49 +00:00			`return llm.WriteGGUF(ws, kv, ts)`
Convert Safetensors to an Ollama model (#2824) 2024-03-07 05:01:51 +00:00			`}`

convert safetensor adapters into GGUF (#6327) 2024-08-23 18:29:56 +00:00			`func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {`
			`return llm.WriteGGUF(ws, kv, ts)`
			`}`

			`type ModelConverter interface {`
refactor convert 2024-06-01 03:00:49 +00:00			`// KV maps parameters to LLM key-values`
			`KV(*Tokenizer) llm.KV`
			`// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.`
comments 2024-07-08 23:59:48 +00:00			`Tensors([]Tensor) []llm.Tensor`
convert gemma2 2024-06-28 20:27:05 +00:00			`// Replacements returns a list of string pairs to replace in tensor names.`
			`// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details`
			`Replacements() []string`
refactor convert 2024-06-01 03:00:49 +00:00
comments 2024-07-08 23:59:48 +00:00			`// specialTokenTypes returns any special token types the model uses`
			`specialTokenTypes() []string`
convert gemma2 2024-06-28 20:27:05 +00:00			`// writeFile writes the model to the provided io.WriteSeeker`
comments 2024-07-08 23:59:48 +00:00			`writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error`
Convert Safetensors to an Ollama model (#2824) 2024-03-07 05:01:51 +00:00			`}`

bert 2024-06-06 15:59:04 +00:00			`type moreParser interface {`
			`parseMore(fs.FS) error`
			`}`

convert safetensor adapters into GGUF (#6327) 2024-08-23 18:29:56 +00:00			`type AdapterConverter interface {`
			`// KV maps parameters to LLM key-values`
			`KV(llm.KV) llm.KV`
			`// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.`
			`Tensors([]Tensor) []llm.Tensor`
			`// Replacements returns a list of string pairs to replace in tensor names.`
			`// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details`
			`Replacements() []string`

			`writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error`
			`}`

			`func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {`
			`bts, err := fs.ReadFile(fsys, "adapter_config.json")`
			`if err != nil {`
			`return err`
			`}`

			`var p AdapterParameters`
			`if err := json.Unmarshal(bts, &p); err != nil {`
			`return err`
			`}`

			`arch, ok := baseKV["general.architecture"]`
			`if !ok {`
			`return errors.New("architecture not set for the base model")`
			`}`

			`var conv AdapterConverter`
			`switch arch {`
			`case "llama":`
			`conv = &llamaAdapter{}`
			`case "gemma2":`
			`conv = &gemma2Adapter{}`
			`default:`
			`return errors.New("unsupported architecture")`
			`}`

			`ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))`
			`if err != nil {`
			`return err`
			`}`

			`if err := json.Unmarshal(bts, conv); err != nil {`
			`return err`
			`}`

			`return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))`
			`}`

comments 2024-07-08 23:59:48 +00:00			`// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations`
			`// and files it finds in the input path.`
			`// Supported input model formats include safetensors.`
			`// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.`
convert safetensor adapters into GGUF (#6327) 2024-08-23 18:29:56 +00:00			`func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {`
convert: only extract large files 2024-06-29 23:53:59 +00:00			`bts, err := fs.ReadFile(fsys, "config.json")`
Convert Safetensors to an Ollama model (#2824) 2024-03-07 05:01:51 +00:00			`if err != nil {`
refactor convert 2024-06-01 03:00:49 +00:00			`return err`
Convert Safetensors to an Ollama model (#2824) 2024-03-07 05:01:51 +00:00			`}`

convert safetensor adapters into GGUF (#6327) 2024-08-23 18:29:56 +00:00			`var p ModelParameters`
comments 2024-07-08 23:59:48 +00:00			`if err := json.Unmarshal(bts, &p); err != nil {`
refactor convert 2024-06-01 03:00:49 +00:00			`return err`
Convert Safetensors to an Ollama model (#2824) 2024-03-07 05:01:51 +00:00			`}`

refactor convert 2024-06-01 03:00:49 +00:00			`if len(p.Architectures) < 1 {`
			`return errors.New("unknown architecture")`
Convert Safetensors to an Ollama model (#2824) 2024-03-07 05:01:51 +00:00			`}`

convert safetensor adapters into GGUF (#6327) 2024-08-23 18:29:56 +00:00			`var conv ModelConverter`
refactor convert 2024-06-01 03:00:49 +00:00			`switch p.Architectures[0] {`
			`case "LlamaForCausalLM", "MistralForCausalLM":`
convert safetensor adapters into GGUF (#6327) 2024-08-23 18:29:56 +00:00			`conv = &llamaModel{}`
refactor convert 2024-06-01 03:00:49 +00:00			`case "MixtralForCausalLM":`
convert safetensor adapters into GGUF (#6327) 2024-08-23 18:29:56 +00:00			`conv = &mixtralModel{}`
refactor convert 2024-06-01 03:00:49 +00:00			`case "GemmaForCausalLM":`
convert safetensor adapters into GGUF (#6327) 2024-08-23 18:29:56 +00:00			`conv = &gemmaModel{}`
convert gemma2 2024-06-28 20:27:05 +00:00			`case "Gemma2ForCausalLM":`
convert safetensor adapters into GGUF (#6327) 2024-08-23 18:29:56 +00:00			`conv = &gemma2Model{}`
add conversion for microsoft phi 3 mini/medium 4k, 128 2024-06-03 22:53:58 +00:00			`case "Phi3ForCausalLM":`
convert safetensor adapters into GGUF (#6327) 2024-08-23 18:29:56 +00:00			`conv = &phi3Model{}`
bert 2024-06-06 15:59:04 +00:00			`case "BertModel":`
convert safetensor adapters into GGUF (#6327) 2024-08-23 18:29:56 +00:00			`conv = &bertModel{}`
refactor convert 2024-06-01 03:00:49 +00:00			`default:`
			`return errors.New("unsupported architecture")`
Convert Safetensors to an Ollama model (#2824) 2024-03-07 05:01:51 +00:00			`}`

comments 2024-07-08 23:59:48 +00:00			`if err := json.Unmarshal(bts, conv); err != nil {`
refactor convert 2024-06-01 03:00:49 +00:00			`return err`
Convert Safetensors to an Ollama model (#2824) 2024-03-07 05:01:51 +00:00			`}`

bert 2024-06-06 15:59:04 +00:00			`if t, ok := conv.(moreParser); ok {`
			`if err := t.parseMore(fsys); err != nil {`
			`return err`
			`}`
			`}`

convert: only extract large files 2024-06-29 23:53:59 +00:00			`t, err := parseTokenizer(fsys, conv.specialTokenTypes())`
refactor convert 2024-06-01 03:00:49 +00:00			`if err != nil {`
			`return err`
Convert Safetensors to an Ollama model (#2824) 2024-03-07 05:01:51 +00:00			`}`

refactor convert 2024-06-01 03:00:49 +00:00			`if vocabSize := int(p.VocabSize); vocabSize > len(t.Vocabulary.Tokens) {`
			`slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", p.VocabSize, "actual", len(t.Vocabulary.Tokens))`
			`for i := range vocabSize - len(t.Vocabulary.Tokens) {`
			`t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))`
			`t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)`
			`t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)`
Convert Safetensors to an Ollama model (#2824) 2024-03-07 05:01:51 +00:00			`}`
comments 2024-07-08 23:59:48 +00:00			`} else {`
			`slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))`
Convert Safetensors to an Ollama model (#2824) 2024-03-07 05:01:51 +00:00			`}`
refactor convert 2024-06-01 03:00:49 +00:00
convert gemma2 2024-06-28 20:27:05 +00:00			`ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))`
refactor convert 2024-06-01 03:00:49 +00:00			`if err != nil {`
			`return err`
Add gemma safetensors conversion (#3250) Co-authored-by: Michael Yang <mxyng@pm.me> 2024-03-29 01:54:01 +00:00			`}`

comments 2024-07-08 23:59:48 +00:00			`return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))`
Convert Safetensors to an Ollama model (#2824) 2024-03-07 05:01:51 +00:00			`}`