ollama/convert/convert_mixtral.go

package convert

import (
	"fmt"
	"io"
	"slices"
	"strings"

	"github.com/ollama/ollama/llm"
)

type mixtral struct {
	llama
	NumLocalExperts    uint32 `json:"num_local_experts"`
	NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
}

var _ Converter = (*mixtral)(nil)

func (p *mixtral) KV(t *Tokenizer) llm.KV {
	kv := p.llama.KV(t)

	if p.NumLocalExperts > 0 {
		kv["llama.expert_count"] = p.NumLocalExperts
	}

	if p.NumExpertsPerToken > 0 {
		kv["llama.expert_used_count"] = p.NumExpertsPerToken
	}

	return kv
}

func (p *mixtral) Tensors(ts []Tensor) []llm.Tensor {
	oldnew := []string{
		"model.layers", "blk",
		"w1", "ffn_gate_exps",
		"w2", "ffn_down_exps",
		"w3", "ffn_up_exps",
	}

	for i := range p.NumLocalExperts {
		oldnew = append(oldnew, fmt.Sprintf(".block_sparse_moe.experts.%d.", i), ".")
	}

	// group experts of the same layer (model.layers.%d) and type (w[123]) into a single tensor
	namer := strings.NewReplacer(oldnew...)
	experts := make(map[string]experts)

	// merge experts into a single tensor while removing them from ts
	ts = slices.DeleteFunc(ts, func(t Tensor) bool {
		if !strings.Contains(t.Name(), ".block_sparse_moe.experts.") {
			return false
		}

		name := namer.Replace(t.Name())
		experts[name] = append(experts[name], t)
		return true
	})

	var out []llm.Tensor
	for n, e := range experts {
		// TODO(mxyng): sanity check experts
		out = append(out, llm.Tensor{
			Name:     n,
			Kind:     e[0].Kind(),
			Shape:    append([]uint64{uint64(len(e))}, e[0].Shape()...),
			WriterTo: e,
		})
	}

	return append(out, p.llama.Tensors(ts)...)
}

type experts []Tensor

func (e experts) WriteTo(w io.Writer) (int64, error) {
	// TODO(mxyng): experts _should_ be numerically sorted by expert but this should check
	for _, t := range e {
		// the canonical merged experts tensor stacks all experts along a new, 0 axis,
		// e.g. `tensor.Stack(0, e[0], e[1:]...)`, which requires allocating temporary buffers
		// this accomplishes the same thing by writing each expert tensor in sequence
		if _, err := t.WriteTo(w); err != nil {
			return 0, err
		}
	}

	return 0, nil
}
refactor convert 2024-06-01 03:00:49 +00:00			`package convert`

			`import (`
			`"fmt"`
			`"io"`
			`"slices"`
			`"strings"`

			`"github.com/ollama/ollama/llm"`
			`)`

			`type mixtral struct {`
			`llama`
			NumLocalExperts uint32 `json:"num_local_experts"`
			NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
			`}`

			`var _ Converter = (*mixtral)(nil)`

			`func (p mixtral) KV(t Tokenizer) llm.KV {`
			`kv := p.llama.KV(t)`

			`if p.NumLocalExperts > 0 {`
			`kv["llama.expert_count"] = p.NumLocalExperts`
			`}`

			`if p.NumExpertsPerToken > 0 {`
			`kv["llama.expert_used_count"] = p.NumExpertsPerToken`
			`}`

			`return kv`
			`}`

comments 2024-07-08 23:59:48 +00:00			`func (p *mixtral) Tensors(ts []Tensor) []llm.Tensor {`
refactor convert 2024-06-01 03:00:49 +00:00			`oldnew := []string{`
			`"model.layers", "blk",`
			`"w1", "ffn_gate_exps",`
			`"w2", "ffn_down_exps",`
			`"w3", "ffn_up_exps",`
			`}`

			`for i := range p.NumLocalExperts {`
			`oldnew = append(oldnew, fmt.Sprintf(".block_sparse_moe.experts.%d.", i), ".")`
			`}`

			`// group experts of the same layer (model.layers.%d) and type (w[123]) into a single tensor`
			`namer := strings.NewReplacer(oldnew...)`
			`experts := make(map[string]experts)`

			`// merge experts into a single tensor while removing them from ts`
			`ts = slices.DeleteFunc(ts, func(t Tensor) bool {`
			`if !strings.Contains(t.Name(), ".block_sparse_moe.experts.") {`
			`return false`
			`}`

			`name := namer.Replace(t.Name())`
			`experts[name] = append(experts[name], t)`
			`return true`
			`})`

comments 2024-07-08 23:59:48 +00:00			`var out []llm.Tensor`
refactor convert 2024-06-01 03:00:49 +00:00			`for n, e := range experts {`
			`// TODO(mxyng): sanity check experts`
comments 2024-07-08 23:59:48 +00:00			`out = append(out, llm.Tensor{`
refactor convert 2024-06-01 03:00:49 +00:00			`Name: n,`
			`Kind: e[0].Kind(),`
			`Shape: append([]uint64{uint64(len(e))}, e[0].Shape()...),`
			`WriterTo: e,`
			`})`
			`}`

			`return append(out, p.llama.Tensors(ts)...)`
			`}`

			`type experts []Tensor`

			`func (e experts) WriteTo(w io.Writer) (int64, error) {`
			`// TODO(mxyng): experts _should_ be numerically sorted by expert but this should check`
			`for _, t := range e {`
			`// the canonical merged experts tensor stacks all experts along a new, 0 axis,`
			// e.g. `tensor.Stack(0, e[0], e[1:]...)`, which requires allocating temporary buffers
			`// this accomplishes the same thing by writing each expert tensor in sequence`
			`if _, err := t.WriteTo(w); err != nil {`
			`return 0, err`
			`}`
			`}`

			`return 0, nil`
			`}`