convert safetensor adapters into GGUF (#6327)
This commit is contained in:
parent
7a1e1c1caf
commit
0c819e167b
16 changed files with 697 additions and 101 deletions
|
@ -204,6 +204,12 @@ func tempZipFiles(path string) (string, error) {
|
||||||
// safetensors files might be unresolved git lfs references; skip if they are
|
// safetensors files might be unresolved git lfs references; skip if they are
|
||||||
// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
|
// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
|
||||||
files = append(files, st...)
|
files = append(files, st...)
|
||||||
|
} else if st, _ := glob(filepath.Join(path, "adapters.safetensors"), "application/octet-stream"); len(st) > 0 {
|
||||||
|
// covers adapters.safetensors
|
||||||
|
files = append(files, st...)
|
||||||
|
} else if st, _ := glob(filepath.Join(path, "adapter_model.safetensors"), "application/octet-stream"); len(st) > 0 {
|
||||||
|
// covers adapter_model.safetensors
|
||||||
|
files = append(files, st...)
|
||||||
} else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 {
|
} else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 {
|
||||||
// pytorch files might also be unresolved git lfs references; skip if they are
|
// pytorch files might also be unresolved git lfs references; skip if they are
|
||||||
// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
|
// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
|
||||||
|
|
|
@ -12,12 +12,22 @@ import (
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Parameters struct {
|
type ModelParameters struct {
|
||||||
Architectures []string `json:"architectures"`
|
Architectures []string `json:"architectures"`
|
||||||
VocabSize uint32 `json:"vocab_size"`
|
VocabSize uint32 `json:"vocab_size"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (Parameters) KV(t *Tokenizer) llm.KV {
|
type AdapterParameters struct {
|
||||||
|
Alpha uint32 `json:"lora_alpha"`
|
||||||
|
LoraLayers uint32 `json:"lora_layers"`
|
||||||
|
LoraParameters struct {
|
||||||
|
Rank uint32 `json:"rank"`
|
||||||
|
Alpha float32 `json:"alpha"`
|
||||||
|
Scale float32 `json:"scale"`
|
||||||
|
} `json:"lora_parameters"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ModelParameters) KV(t *Tokenizer) llm.KV {
|
||||||
kv := llm.KV{
|
kv := llm.KV{
|
||||||
"general.file_type": uint32(1),
|
"general.file_type": uint32(1),
|
||||||
"general.quantization_version": uint32(2),
|
"general.quantization_version": uint32(2),
|
||||||
|
@ -44,17 +54,40 @@ func (Parameters) KV(t *Tokenizer) llm.KV {
|
||||||
return kv
|
return kv
|
||||||
}
|
}
|
||||||
|
|
||||||
func (Parameters) specialTokenTypes() []string {
|
func (p AdapterParameters) KV() llm.KV {
|
||||||
|
var alpha float32
|
||||||
|
if p.LoraParameters.Alpha == 0 {
|
||||||
|
alpha = float32(p.Alpha)
|
||||||
|
} else {
|
||||||
|
alpha = p.LoraParameters.Alpha
|
||||||
|
}
|
||||||
|
|
||||||
|
kv := llm.KV{
|
||||||
|
"adapter.lora.alpha": alpha,
|
||||||
|
"adapter.type": "lora",
|
||||||
|
"general.file_type": uint32(1),
|
||||||
|
"general.type": "adapter",
|
||||||
|
"general.version": "v0.2",
|
||||||
|
}
|
||||||
|
|
||||||
|
return kv
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ModelParameters) specialTokenTypes() []string {
|
||||||
return []string{
|
return []string{
|
||||||
"bos", "eos", "unk", "sep", "pad", "cls", "mask",
|
"bos", "eos", "unk", "sep", "pad", "cls", "mask",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (Parameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
|
func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
|
||||||
return llm.WriteGGUF(ws, kv, ts)
|
return llm.WriteGGUF(ws, kv, ts)
|
||||||
}
|
}
|
||||||
|
|
||||||
type Converter interface {
|
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
|
||||||
|
return llm.WriteGGUF(ws, kv, ts)
|
||||||
|
}
|
||||||
|
|
||||||
|
type ModelConverter interface {
|
||||||
// KV maps parameters to LLM key-values
|
// KV maps parameters to LLM key-values
|
||||||
KV(*Tokenizer) llm.KV
|
KV(*Tokenizer) llm.KV
|
||||||
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
|
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
|
||||||
|
@ -73,17 +106,67 @@ type moreParser interface {
|
||||||
parseMore(fs.FS) error
|
parseMore(fs.FS) error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type AdapterConverter interface {
|
||||||
|
// KV maps parameters to LLM key-values
|
||||||
|
KV(llm.KV) llm.KV
|
||||||
|
// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
|
||||||
|
Tensors([]Tensor) []llm.Tensor
|
||||||
|
// Replacements returns a list of string pairs to replace in tensor names.
|
||||||
|
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
||||||
|
Replacements() []string
|
||||||
|
|
||||||
|
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
|
||||||
|
}
|
||||||
|
|
||||||
|
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
|
||||||
|
bts, err := fs.ReadFile(fsys, "adapter_config.json")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
var p AdapterParameters
|
||||||
|
if err := json.Unmarshal(bts, &p); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
arch, ok := baseKV["general.architecture"]
|
||||||
|
if !ok {
|
||||||
|
return errors.New("architecture not set for the base model")
|
||||||
|
}
|
||||||
|
|
||||||
|
var conv AdapterConverter
|
||||||
|
switch arch {
|
||||||
|
case "llama":
|
||||||
|
conv = &llamaAdapter{}
|
||||||
|
case "gemma2":
|
||||||
|
conv = &gemma2Adapter{}
|
||||||
|
default:
|
||||||
|
return errors.New("unsupported architecture")
|
||||||
|
}
|
||||||
|
|
||||||
|
ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.Unmarshal(bts, conv); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))
|
||||||
|
}
|
||||||
|
|
||||||
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
|
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
|
||||||
// and files it finds in the input path.
|
// and files it finds in the input path.
|
||||||
// Supported input model formats include safetensors.
|
// Supported input model formats include safetensors.
|
||||||
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
|
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
|
||||||
func Convert(fsys fs.FS, ws io.WriteSeeker) error {
|
func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
|
||||||
bts, err := fs.ReadFile(fsys, "config.json")
|
bts, err := fs.ReadFile(fsys, "config.json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
var p Parameters
|
var p ModelParameters
|
||||||
if err := json.Unmarshal(bts, &p); err != nil {
|
if err := json.Unmarshal(bts, &p); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -92,20 +175,20 @@ func Convert(fsys fs.FS, ws io.WriteSeeker) error {
|
||||||
return errors.New("unknown architecture")
|
return errors.New("unknown architecture")
|
||||||
}
|
}
|
||||||
|
|
||||||
var conv Converter
|
var conv ModelConverter
|
||||||
switch p.Architectures[0] {
|
switch p.Architectures[0] {
|
||||||
case "LlamaForCausalLM", "MistralForCausalLM":
|
case "LlamaForCausalLM", "MistralForCausalLM":
|
||||||
conv = &llama{}
|
conv = &llamaModel{}
|
||||||
case "MixtralForCausalLM":
|
case "MixtralForCausalLM":
|
||||||
conv = &mixtral{}
|
conv = &mixtralModel{}
|
||||||
case "GemmaForCausalLM":
|
case "GemmaForCausalLM":
|
||||||
conv = &gemma{}
|
conv = &gemmaModel{}
|
||||||
case "Gemma2ForCausalLM":
|
case "Gemma2ForCausalLM":
|
||||||
conv = &gemma2{}
|
conv = &gemma2Model{}
|
||||||
case "Phi3ForCausalLM":
|
case "Phi3ForCausalLM":
|
||||||
conv = &phi3{}
|
conv = &phi3Model{}
|
||||||
case "BertModel":
|
case "BertModel":
|
||||||
conv = &bert{}
|
conv = &bertModel{}
|
||||||
default:
|
default:
|
||||||
return errors.New("unsupported architecture")
|
return errors.New("unsupported architecture")
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,8 +11,8 @@ import (
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
type bert struct {
|
type bertModel struct {
|
||||||
Parameters
|
ModelParameters
|
||||||
NLayers uint32 `json:"n_layers"`
|
NLayers uint32 `json:"n_layers"`
|
||||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||||
NLayer uint32 `json:"n_layer"`
|
NLayer uint32 `json:"n_layer"`
|
||||||
|
@ -33,11 +33,11 @@ type bert struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
_ Converter = (*bert)(nil)
|
_ ModelConverter = (*bertModel)(nil)
|
||||||
_ moreParser = (*bert)(nil)
|
_ moreParser = (*bertModel)(nil)
|
||||||
)
|
)
|
||||||
|
|
||||||
func (p *bert) parseMore(fsys fs.FS) error {
|
func (p *bertModel) parseMore(fsys fs.FS) error {
|
||||||
bts, err := fs.ReadFile(fsys, "modules.json")
|
bts, err := fs.ReadFile(fsys, "modules.json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -85,8 +85,8 @@ func (p *bert) parseMore(fsys fs.FS) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *bert) KV(t *Tokenizer) llm.KV {
|
func (p *bertModel) KV(t *Tokenizer) llm.KV {
|
||||||
kv := p.Parameters.KV(t)
|
kv := p.ModelParameters.KV(t)
|
||||||
kv["general.architecture"] = "bert"
|
kv["general.architecture"] = "bert"
|
||||||
kv["bert.attention.causal"] = false
|
kv["bert.attention.causal"] = false
|
||||||
kv["bert.pooling_type"] = p.PoolingType
|
kv["bert.pooling_type"] = p.PoolingType
|
||||||
|
@ -132,7 +132,7 @@ func (p *bert) KV(t *Tokenizer) llm.KV {
|
||||||
return kv
|
return kv
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *bert) Tensors(ts []Tensor) []llm.Tensor {
|
func (p *bertModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
var out []llm.Tensor
|
var out []llm.Tensor
|
||||||
for _, t := range ts {
|
for _, t := range ts {
|
||||||
if slices.Contains([]string{
|
if slices.Contains([]string{
|
||||||
|
@ -154,7 +154,7 @@ func (p *bert) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
func (bert) Replacements() []string {
|
func (bertModel) Replacements() []string {
|
||||||
return []string{
|
return []string{
|
||||||
"encoder.layer", "blk",
|
"encoder.layer", "blk",
|
||||||
"encoder.layers", "blk",
|
"encoder.layers", "blk",
|
||||||
|
|
|
@ -9,8 +9,8 @@ import (
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
type gemma struct {
|
type gemmaModel struct {
|
||||||
Parameters
|
ModelParameters
|
||||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||||
HiddenSize uint32 `json:"hidden_size"`
|
HiddenSize uint32 `json:"hidden_size"`
|
||||||
HiddenLayers uint32 `json:"num_hidden_layers"`
|
HiddenLayers uint32 `json:"num_hidden_layers"`
|
||||||
|
@ -21,10 +21,10 @@ type gemma struct {
|
||||||
HeadDim uint32 `json:"head_dim"`
|
HeadDim uint32 `json:"head_dim"`
|
||||||
}
|
}
|
||||||
|
|
||||||
var _ Converter = (*gemma)(nil)
|
var _ ModelConverter = (*gemmaModel)(nil)
|
||||||
|
|
||||||
func (p *gemma) KV(t *Tokenizer) llm.KV {
|
func (p *gemmaModel) KV(t *Tokenizer) llm.KV {
|
||||||
kv := p.Parameters.KV(t)
|
kv := p.ModelParameters.KV(t)
|
||||||
kv["general.architecture"] = "gemma"
|
kv["general.architecture"] = "gemma"
|
||||||
kv["gemma.context_length"] = p.MaxPositionEmbeddings
|
kv["gemma.context_length"] = p.MaxPositionEmbeddings
|
||||||
kv["gemma.embedding_length"] = p.HiddenSize
|
kv["gemma.embedding_length"] = p.HiddenSize
|
||||||
|
@ -42,8 +42,8 @@ func (p *gemma) KV(t *Tokenizer) llm.KV {
|
||||||
return kv
|
return kv
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *gemma) Tensors(ts []Tensor) []llm.Tensor {
|
func (p *gemmaModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
out := make([]llm.Tensor, 0, len(ts))
|
var out []llm.Tensor
|
||||||
for _, t := range ts {
|
for _, t := range ts {
|
||||||
if strings.HasSuffix(t.Name(), "_norm.weight") {
|
if strings.HasSuffix(t.Name(), "_norm.weight") {
|
||||||
t.SetRepacker(p.addOne)
|
t.SetRepacker(p.addOne)
|
||||||
|
@ -60,7 +60,7 @@ func (p *gemma) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *gemma) Replacements() []string {
|
func (p *gemmaModel) Replacements() []string {
|
||||||
return []string{
|
return []string{
|
||||||
"model.embed_tokens", "token_embd",
|
"model.embed_tokens", "token_embd",
|
||||||
"model.norm", "output_norm",
|
"model.norm", "output_norm",
|
||||||
|
@ -77,7 +77,7 @@ func (p *gemma) Replacements() []string {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (*gemma) addOne(_ string, data []float32, shape []uint64) ([]float32, error) {
|
func (*gemmaModel) addOne(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||||
n := tensor.New(tensor.WithShape(int(shape[0])), tensor.WithBacking(data))
|
n := tensor.New(tensor.WithShape(int(shape[0])), tensor.WithBacking(data))
|
||||||
ones := tensor.Ones(tensor.Float32, int(shape[0]))
|
ones := tensor.Ones(tensor.Float32, int(shape[0]))
|
||||||
|
|
||||||
|
|
|
@ -4,15 +4,15 @@ import (
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
type gemma2 struct {
|
type gemma2Model struct {
|
||||||
gemma
|
gemmaModel
|
||||||
SlidingWindow uint32 `json:"sliding_window"`
|
SlidingWindow uint32 `json:"sliding_window"`
|
||||||
AttentionLogitSoftcap float32 `json:"attn_logit_softcapping"`
|
AttentionLogitSoftcap float32 `json:"attn_logit_softcapping"`
|
||||||
FinalLogitSoftcap float32 `json:"final_logit_softcapping"`
|
FinalLogitSoftcap float32 `json:"final_logit_softcapping"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *gemma2) KV(t *Tokenizer) llm.KV {
|
func (p *gemma2Model) KV(t *Tokenizer) llm.KV {
|
||||||
kv := p.Parameters.KV(t)
|
kv := p.ModelParameters.KV(t)
|
||||||
kv["general.architecture"] = "gemma2"
|
kv["general.architecture"] = "gemma2"
|
||||||
kv["gemma2.context_length"] = p.MaxPositionEmbeddings
|
kv["gemma2.context_length"] = p.MaxPositionEmbeddings
|
||||||
kv["gemma2.embedding_length"] = p.HiddenSize
|
kv["gemma2.embedding_length"] = p.HiddenSize
|
||||||
|
@ -33,9 +33,9 @@ func (p *gemma2) KV(t *Tokenizer) llm.KV {
|
||||||
return kv
|
return kv
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *gemma2) Replacements() []string {
|
func (p *gemma2Model) Replacements() []string {
|
||||||
return append(
|
return append(
|
||||||
p.gemma.Replacements(),
|
p.gemmaModel.Replacements(),
|
||||||
"post_attention_layernorm", "post_attention_norm",
|
"post_attention_layernorm", "post_attention_norm",
|
||||||
"pre_feedforward_layernorm", "ffn_norm",
|
"pre_feedforward_layernorm", "ffn_norm",
|
||||||
"post_feedforward_layernorm", "post_ffw_norm",
|
"post_feedforward_layernorm", "post_ffw_norm",
|
||||||
|
|
91
convert/convert_gemma2_adapter.go
Normal file
91
convert/convert_gemma2_adapter.go
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/pdevine/tensor"
|
||||||
|
"github.com/pdevine/tensor/native"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
type gemma2Adapter struct {
|
||||||
|
AdapterParameters
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ AdapterConverter = (*gemma2Adapter)(nil)
|
||||||
|
|
||||||
|
func (p *gemma2Adapter) KV(baseKV llm.KV) llm.KV {
|
||||||
|
kv := p.AdapterParameters.KV()
|
||||||
|
kv["general.architecture"] = "gemma2"
|
||||||
|
return kv
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *gemma2Adapter) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
|
var out []llm.Tensor
|
||||||
|
for _, t := range ts {
|
||||||
|
shape := t.Shape()
|
||||||
|
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
|
||||||
|
(strings.HasSuffix(t.Name(), "weight.lora_b") && shape[0] < shape[1]) {
|
||||||
|
shape[0], shape[1] = shape[1], shape[0]
|
||||||
|
t.SetRepacker(p.repack)
|
||||||
|
}
|
||||||
|
|
||||||
|
out = append(out, llm.Tensor{
|
||||||
|
Name: t.Name(),
|
||||||
|
Kind: t.Kind(),
|
||||||
|
Shape: t.Shape(),
|
||||||
|
WriterTo: t,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *gemma2Adapter) Replacements() []string {
|
||||||
|
return []string{
|
||||||
|
"base_model.model.", "",
|
||||||
|
"model.layers", "blk",
|
||||||
|
"self_attn.q_proj", "attn_q",
|
||||||
|
"self_attn.k_proj", "attn_k",
|
||||||
|
"self_attn.v_proj", "attn_v",
|
||||||
|
"self_attn.o_proj", "attn_output",
|
||||||
|
"mlp.gate_proj", "ffn_gate",
|
||||||
|
"mlp.down_proj", "ffn_down",
|
||||||
|
"mlp.up_proj", "ffn_up",
|
||||||
|
"lora_A.weight", "weight.lora_a",
|
||||||
|
"lora_B.weight", "weight.lora_b",
|
||||||
|
"lora_a", "weight.lora_a",
|
||||||
|
"lora_b", "weight.lora_b",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *gemma2Adapter) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||||
|
dims := []int{int(shape[1]), int(shape[0])}
|
||||||
|
|
||||||
|
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||||
|
|
||||||
|
if err := n.T(1, 0); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.Reshape(dims...); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.Transpose(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
ts, err := native.SelectF32(n, 1)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var f32s []float32
|
||||||
|
for _, t := range ts {
|
||||||
|
f32s = append(f32s, t...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return f32s, nil
|
||||||
|
}
|
|
@ -12,8 +12,8 @@ import (
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
type llama struct {
|
type llamaModel struct {
|
||||||
Parameters
|
ModelParameters
|
||||||
NLayers uint32 `json:"n_layers"`
|
NLayers uint32 `json:"n_layers"`
|
||||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||||
NLayer uint32 `json:"n_layer"`
|
NLayer uint32 `json:"n_layer"`
|
||||||
|
@ -44,10 +44,10 @@ type llama struct {
|
||||||
HeadDim uint32 `json:"head_dim"`
|
HeadDim uint32 `json:"head_dim"`
|
||||||
}
|
}
|
||||||
|
|
||||||
var _ Converter = (*llama)(nil)
|
var _ ModelConverter = (*llamaModel)(nil)
|
||||||
|
|
||||||
func (p *llama) KV(t *Tokenizer) llm.KV {
|
func (p *llamaModel) KV(t *Tokenizer) llm.KV {
|
||||||
kv := p.Parameters.KV(t)
|
kv := p.ModelParameters.KV(t)
|
||||||
kv["general.architecture"] = "llama"
|
kv["general.architecture"] = "llama"
|
||||||
kv["llama.vocab_size"] = p.VocabSize
|
kv["llama.vocab_size"] = p.VocabSize
|
||||||
|
|
||||||
|
@ -120,7 +120,7 @@ func (p *llama) KV(t *Tokenizer) llm.KV {
|
||||||
return kv
|
return kv
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *llama) Tensors(ts []Tensor) []llm.Tensor {
|
func (p *llamaModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
var out []llm.Tensor
|
var out []llm.Tensor
|
||||||
|
|
||||||
if p.RopeScaling.factors != nil {
|
if p.RopeScaling.factors != nil {
|
||||||
|
@ -149,7 +149,7 @@ func (p *llama) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *llama) Replacements() []string {
|
func (p *llamaModel) Replacements() []string {
|
||||||
return []string{
|
return []string{
|
||||||
"lm_head", "output",
|
"lm_head", "output",
|
||||||
"model.embed_tokens", "token_embd",
|
"model.embed_tokens", "token_embd",
|
||||||
|
@ -167,7 +167,7 @@ func (p *llama) Replacements() []string {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *llama) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
func (p *llamaModel) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||||
var dims []int
|
var dims []int
|
||||||
for _, dim := range shape {
|
for _, dim := range shape {
|
||||||
dims = append(dims, int(dim))
|
dims = append(dims, int(dim))
|
||||||
|
|
169
convert/convert_llama_adapter.go
Normal file
169
convert/convert_llama_adapter.go
Normal file
|
@ -0,0 +1,169 @@
|
||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"cmp"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/pdevine/tensor"
|
||||||
|
"github.com/pdevine/tensor/native"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
type llamaAdapter struct {
|
||||||
|
AdapterParameters
|
||||||
|
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||||
|
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ AdapterConverter = (*llamaAdapter)(nil)
|
||||||
|
|
||||||
|
func (p *llamaAdapter) KV(baseKV llm.KV) llm.KV {
|
||||||
|
kv := p.AdapterParameters.KV()
|
||||||
|
kv["general.architecture"] = "llama"
|
||||||
|
kv["llama.attention.head_count"] = baseKV["llama.attention.head_count"]
|
||||||
|
kv["llama.attention.head_count_kv"] = baseKV["llama.attention.head_count_kv"]
|
||||||
|
|
||||||
|
p.NumAttentionHeads = baseKV["llama.attention.head_count"].(uint32)
|
||||||
|
|
||||||
|
return kv
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *llamaAdapter) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
|
var out []llm.Tensor
|
||||||
|
for _, t := range ts {
|
||||||
|
shape := t.Shape()
|
||||||
|
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
|
||||||
|
(strings.HasSuffix(t.Name(), "weight.lora_b") && shape[0] < shape[1]) {
|
||||||
|
shape[0], shape[1] = shape[1], shape[0]
|
||||||
|
t.SetRepacker(p.repackAndTranspose)
|
||||||
|
} else {
|
||||||
|
t.SetRepacker(p.repack)
|
||||||
|
}
|
||||||
|
|
||||||
|
out = append(out, llm.Tensor{
|
||||||
|
Name: t.Name(),
|
||||||
|
Kind: t.Kind(),
|
||||||
|
Shape: shape,
|
||||||
|
WriterTo: t,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *llamaAdapter) Replacements() []string {
|
||||||
|
return []string{
|
||||||
|
"base_model.model.", "",
|
||||||
|
"model.layers", "blk",
|
||||||
|
"self_attn.q_proj", "attn_q",
|
||||||
|
"self_attn.k_proj", "attn_k",
|
||||||
|
"self_attn.v_proj", "attn_v",
|
||||||
|
"self_attn.o_proj", "attn_output",
|
||||||
|
"mlp.gate_proj", "ffn_gate",
|
||||||
|
"mlp.down_proj", "ffn_down",
|
||||||
|
"mlp.up_proj", "ffn_up",
|
||||||
|
"lora_A.weight", "weight.lora_a",
|
||||||
|
"lora_B.weight", "weight.lora_b",
|
||||||
|
"lora_a", "weight.lora_a",
|
||||||
|
"lora_b", "weight.lora_b",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *llamaAdapter) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||||
|
dims := []int{int(shape[1]), int(shape[0])}
|
||||||
|
|
||||||
|
var heads uint32
|
||||||
|
if strings.HasSuffix(name, "attn_q.weight.lora_a") {
|
||||||
|
heads = p.NumAttentionHeads
|
||||||
|
} else if strings.HasSuffix(name, "attn_k.weight.lora_a") {
|
||||||
|
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
|
||||||
|
} else {
|
||||||
|
return data, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||||
|
|
||||||
|
if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.T(0, 2, 1, 3); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.Reshape(dims...); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.Transpose(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
ts, err := native.SelectF32(n, 1)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var f32s []float32
|
||||||
|
for _, t := range ts {
|
||||||
|
f32s = append(f32s, t...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return f32s, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *llamaAdapter) repackAndTranspose(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||||
|
dims := []int{int(shape[1]), int(shape[0])}
|
||||||
|
|
||||||
|
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||||
|
|
||||||
|
var heads uint32
|
||||||
|
if strings.HasSuffix(name, "attn_q.weight.lora_a") {
|
||||||
|
heads = p.NumAttentionHeads
|
||||||
|
} else if strings.HasSuffix(name, "attn_k.weight.lora_a") {
|
||||||
|
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
|
||||||
|
}
|
||||||
|
|
||||||
|
if heads > 0 {
|
||||||
|
if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.T(0, 2, 1, 3); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.Reshape(dims...); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.Transpose(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.T(1, 0); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.Reshape(dims...); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.Transpose(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
ts, err := native.SelectF32(n, 1)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var f32s []float32
|
||||||
|
for _, t := range ts {
|
||||||
|
f32s = append(f32s, t...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return f32s, nil
|
||||||
|
}
|
|
@ -9,14 +9,14 @@ import (
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
type mixtral struct {
|
type mixtralModel struct {
|
||||||
llama
|
llamaModel
|
||||||
NumLocalExperts uint32 `json:"num_local_experts"`
|
NumLocalExperts uint32 `json:"num_local_experts"`
|
||||||
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
|
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *mixtral) KV(t *Tokenizer) llm.KV {
|
func (p *mixtralModel) KV(t *Tokenizer) llm.KV {
|
||||||
kv := p.llama.KV(t)
|
kv := p.llamaModel.KV(t)
|
||||||
|
|
||||||
if p.NumLocalExperts > 0 {
|
if p.NumLocalExperts > 0 {
|
||||||
kv["llama.expert_count"] = p.NumLocalExperts
|
kv["llama.expert_count"] = p.NumLocalExperts
|
||||||
|
@ -29,7 +29,7 @@ func (p *mixtral) KV(t *Tokenizer) llm.KV {
|
||||||
return kv
|
return kv
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *mixtral) Tensors(ts []Tensor) []llm.Tensor {
|
func (p *mixtralModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
oldnew := []string{
|
oldnew := []string{
|
||||||
"model.layers", "blk",
|
"model.layers", "blk",
|
||||||
"w1", "ffn_gate_exps",
|
"w1", "ffn_gate_exps",
|
||||||
|
@ -67,12 +67,12 @@ func (p *mixtral) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
return append(out, p.llama.Tensors(ts)...)
|
return append(out, p.llamaModel.Tensors(ts)...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *mixtral) Replacements() []string {
|
func (p *mixtralModel) Replacements() []string {
|
||||||
return append(
|
return append(
|
||||||
p.llama.Replacements(),
|
p.llamaModel.Replacements(),
|
||||||
"block_sparse_moe.gate", "ffn_gate_inp",
|
"block_sparse_moe.gate", "ffn_gate_inp",
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,8 +11,8 @@ import (
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
type phi3 struct {
|
type phi3Model struct {
|
||||||
Parameters
|
ModelParameters
|
||||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||||
NLayers uint32 `json:"n_layers"`
|
NLayers uint32 `json:"n_layers"`
|
||||||
HiddenSize uint32 `json:"hidden_size"`
|
HiddenSize uint32 `json:"hidden_size"`
|
||||||
|
@ -35,10 +35,10 @@ type phi3 struct {
|
||||||
SlidingWindow uint32 `json:"sliding_window"`
|
SlidingWindow uint32 `json:"sliding_window"`
|
||||||
}
|
}
|
||||||
|
|
||||||
var _ Converter = (*phi3)(nil)
|
var _ ModelConverter = (*phi3Model)(nil)
|
||||||
|
|
||||||
func (p *phi3) KV(t *Tokenizer) llm.KV {
|
func (p *phi3Model) KV(t *Tokenizer) llm.KV {
|
||||||
kv := p.Parameters.KV(t)
|
kv := p.ModelParameters.KV(t)
|
||||||
kv["general.architecture"] = "phi3"
|
kv["general.architecture"] = "phi3"
|
||||||
kv["phi3.context_length"] = p.MaxPositionEmbeddings
|
kv["phi3.context_length"] = p.MaxPositionEmbeddings
|
||||||
kv["phi3.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
|
kv["phi3.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
|
||||||
|
@ -68,7 +68,7 @@ func (p *phi3) KV(t *Tokenizer) llm.KV {
|
||||||
return kv
|
return kv
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *phi3) Tensors(ts []Tensor) []llm.Tensor {
|
func (p *phi3Model) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
var addRopeFactors sync.Once
|
var addRopeFactors sync.Once
|
||||||
|
|
||||||
out := make([]llm.Tensor, 0, len(ts)+2)
|
out := make([]llm.Tensor, 0, len(ts)+2)
|
||||||
|
@ -100,7 +100,7 @@ func (p *phi3) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *phi3) Replacements() []string {
|
func (p *phi3Model) Replacements() []string {
|
||||||
return []string{
|
return []string{
|
||||||
"lm_head", "output",
|
"lm_head", "output",
|
||||||
"model.embed_tokens", "token_embd",
|
"model.embed_tokens", "token_embd",
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
package convert
|
package convert
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"crypto/sha256"
|
"crypto/sha256"
|
||||||
|
"encoding/binary"
|
||||||
"encoding/hex"
|
"encoding/hex"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"flag"
|
"flag"
|
||||||
|
@ -29,7 +31,7 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
|
||||||
}
|
}
|
||||||
defer f.Close()
|
defer f.Close()
|
||||||
|
|
||||||
if err := Convert(fsys, f); err != nil {
|
if err := ConvertModel(fsys, f); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -51,6 +53,34 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
|
||||||
return r, m.KV(), m.Tensors()
|
return r, m.KV(), m.Tensors()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func generateResultsJSON(t *testing.T, f *os.File, kv llm.KV, tensors llm.Tensors) map[string]string {
|
||||||
|
actual := make(map[string]string)
|
||||||
|
for k, v := range kv {
|
||||||
|
if s, ok := v.(json.Marshaler); !ok {
|
||||||
|
actual[k] = fmt.Sprintf("%v", v)
|
||||||
|
} else {
|
||||||
|
bts, err := json.Marshal(s)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
actual[k] = fmt.Sprintf("%x", sha256.Sum256(bts))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tensor := range tensors.Items {
|
||||||
|
sha256sum := sha256.New()
|
||||||
|
sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size()))
|
||||||
|
if _, err := io.Copy(sha256sum, sr); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
actual[tensor.Name] = hex.EncodeToString(sha256sum.Sum(nil))
|
||||||
|
}
|
||||||
|
|
||||||
|
return actual
|
||||||
|
}
|
||||||
|
|
||||||
func TestMain(m *testing.M) {
|
func TestMain(m *testing.M) {
|
||||||
var level slog.Level
|
var level slog.Level
|
||||||
flag.TextVar(&level, "level", slog.LevelInfo, "log level")
|
flag.TextVar(&level, "level", slog.LevelInfo, "log level")
|
||||||
|
@ -85,29 +115,7 @@ func TestConvertFull(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
f, kv, tensors := convertFull(t, os.DirFS(p))
|
f, kv, tensors := convertFull(t, os.DirFS(p))
|
||||||
actual := make(map[string]string)
|
actual := generateResultsJSON(t, f, kv, tensors)
|
||||||
for k, v := range kv {
|
|
||||||
if s, ok := v.(json.Marshaler); !ok {
|
|
||||||
actual[k] = fmt.Sprintf("%v", v)
|
|
||||||
} else {
|
|
||||||
bts, err := json.Marshal(s)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
actual[k] = fmt.Sprintf("%x", sha256.Sum256(bts))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tensor := range tensors.Items {
|
|
||||||
sha256sum := sha256.New()
|
|
||||||
sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size()))
|
|
||||||
if _, err := io.Copy(sha256sum, sr); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
actual[tensor.Name] = hex.EncodeToString(sha256sum.Sum(nil))
|
|
||||||
}
|
|
||||||
|
|
||||||
expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt)))
|
expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt)))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -131,3 +139,209 @@ func TestConvertFull(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestConvertAdapter(t *testing.T) {
|
||||||
|
type AdapterCase struct {
|
||||||
|
Name string
|
||||||
|
BaseKV map[string]any
|
||||||
|
Expected map[string]string
|
||||||
|
}
|
||||||
|
|
||||||
|
cases := []AdapterCase{
|
||||||
|
{
|
||||||
|
Name: "discollama",
|
||||||
|
BaseKV: map[string]any{
|
||||||
|
"general.architecture": "llama",
|
||||||
|
"llama.attention.head_count": uint32(32),
|
||||||
|
"llama.attention.head_count_kv": uint32(8),
|
||||||
|
},
|
||||||
|
Expected: map[string]string{
|
||||||
|
"general.architecture": "llama",
|
||||||
|
"general.file_type": "1",
|
||||||
|
"general.parameter_count": "106496",
|
||||||
|
"general.type": "adapter",
|
||||||
|
"general.version": "v0.2",
|
||||||
|
"adapter.lora.alpha": "16",
|
||||||
|
"adapter.type": "lora",
|
||||||
|
"llama.attention.head_count": "32",
|
||||||
|
"llama.attention.head_count_kv": "8",
|
||||||
|
"blk.31.attn_q.weight.lora_a": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
|
||||||
|
"blk.31.attn_q.weight.lora_b": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
|
||||||
|
"blk.31.attn_v.weight.lora_a": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
|
||||||
|
"blk.31.attn_v.weight.lora_b": "071dcafe89df065d6e1c935ecb8fdf6479b3c202eb912e7da938597673ff5857",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run(c.Name, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
f, err := os.CreateTemp(t.TempDir(), "f16")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
tempDir := t.TempDir()
|
||||||
|
generateLoraTestData(t, tempDir)
|
||||||
|
|
||||||
|
if err = ConvertAdapter(os.DirFS(tempDir), f, c.BaseKV); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
r, err := os.Open(f.Name())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer r.Close()
|
||||||
|
|
||||||
|
m, _, err := llm.DecodeGGML(r, math.MaxInt)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := r.Seek(0, io.SeekStart); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
actual := generateResultsJSON(t, r, m.KV(), m.Tensors())
|
||||||
|
|
||||||
|
keys := maps.Keys(c.Expected)
|
||||||
|
slices.Sort(keys)
|
||||||
|
for _, k := range keys {
|
||||||
|
if v, ok := actual[k]; !ok {
|
||||||
|
t.Errorf("missing %s", k)
|
||||||
|
} else if v != c.Expected[k] {
|
||||||
|
t.Errorf("unexpected %s: want %s, got %s", k, c.Expected[k], v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateLoraTestData(t *testing.T, tempDir string) {
|
||||||
|
type tensorData struct {
|
||||||
|
Offsets []int `json:"data_offsets"`
|
||||||
|
Type string `json:"dtype"`
|
||||||
|
Shape []int `json:"shape"`
|
||||||
|
}
|
||||||
|
offset := 4096 * 8 * 4
|
||||||
|
|
||||||
|
td := map[string]*tensorData{"__metadata__": nil}
|
||||||
|
td["model.layers.31.self_attn.q_proj.lora_a"] = &tensorData{
|
||||||
|
Offsets: []int{0, offset},
|
||||||
|
Type: "F32",
|
||||||
|
Shape: []int{4096, 8},
|
||||||
|
}
|
||||||
|
td["model.layers.31.self_attn.q_proj.lora_b"] = &tensorData{
|
||||||
|
Offsets: []int{offset, offset * 2},
|
||||||
|
Type: "F32",
|
||||||
|
Shape: []int{8, 4096},
|
||||||
|
}
|
||||||
|
td["model.layers.31.self_attn.v_proj.lora_a"] = &tensorData{
|
||||||
|
Offsets: []int{offset * 2, offset * 3},
|
||||||
|
Type: "F32",
|
||||||
|
Shape: []int{4096, 8},
|
||||||
|
}
|
||||||
|
td["model.layers.31.self_attn.v_proj.lora_b"] = &tensorData{
|
||||||
|
Offsets: []int{offset * 3, offset*3 + 8*1024*4},
|
||||||
|
Type: "F32",
|
||||||
|
Shape: []int{8, 1024},
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := json.Marshal(td)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var buf bytes.Buffer
|
||||||
|
|
||||||
|
l := int64(len(data))
|
||||||
|
err = binary.Write(&buf, binary.LittleEndian, l)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = buf.Write(data)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// write some data for the tensors
|
||||||
|
|
||||||
|
ones := make([]float32, 4096*8)
|
||||||
|
for i := range ones {
|
||||||
|
ones[i] = float32(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
for range 3 {
|
||||||
|
err = binary.Write(&buf, binary.LittleEndian, ones)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ones = make([]float32, 1024*8)
|
||||||
|
for i := range ones {
|
||||||
|
ones[i] = float32(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = binary.Write(&buf, binary.LittleEndian, ones)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fdata, err := os.Create(filepath.Join(tempDir, "adapters.safetensors"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer fdata.Close()
|
||||||
|
|
||||||
|
_, err = fdata.Write(buf.Bytes())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
configData := `
|
||||||
|
{
|
||||||
|
"adapter_path": "adapters-test",
|
||||||
|
"batch_size": 8,
|
||||||
|
"config": "config-tiny.json",
|
||||||
|
"data": "../discollama-completion",
|
||||||
|
"grad_checkpoint": null,
|
||||||
|
"iters": 1000,
|
||||||
|
"learning_rate": 1e-05,
|
||||||
|
"lora_layers": 1,
|
||||||
|
"lora_parameters": {
|
||||||
|
"rank": 8,
|
||||||
|
"alpha": 16,
|
||||||
|
"dropout": 0.0,
|
||||||
|
"scale": 2.0
|
||||||
|
},
|
||||||
|
"lr_schedule": null,
|
||||||
|
"max_seq_length": 2048,
|
||||||
|
"model": "/Users/pdevine/git/Meta-Llama-3-8B-Instruct",
|
||||||
|
"resume_adapter_file": null,
|
||||||
|
"save_every": 100,
|
||||||
|
"seed": 0,
|
||||||
|
"steps_per_eval": 200,
|
||||||
|
"steps_per_report": 10,
|
||||||
|
"test": false,
|
||||||
|
"test_batches": 500,
|
||||||
|
"train": true,
|
||||||
|
"use_dora": false,
|
||||||
|
"val_batches": 25
|
||||||
|
}
|
||||||
|
`
|
||||||
|
f, err := os.Create(filepath.Join(tempDir, "adapter_config.json"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
_, err = f.WriteString(configData)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -64,6 +64,8 @@ func parseTensors(fsys fs.FS, replacer *strings.Replacer) ([]Tensor, error) {
|
||||||
}{
|
}{
|
||||||
{"model-*-of-*.safetensors", parseSafetensors},
|
{"model-*-of-*.safetensors", parseSafetensors},
|
||||||
{"model.safetensors", parseSafetensors},
|
{"model.safetensors", parseSafetensors},
|
||||||
|
{"adapters.safetensors", parseSafetensors},
|
||||||
|
{"adapter_model.safetensors", parseSafetensors},
|
||||||
{"pytorch_model-*-of-*.bin", parseTorch},
|
{"pytorch_model-*-of-*.bin", parseTorch},
|
||||||
{"pytorch_model.bin", parseTorch},
|
{"pytorch_model.bin", parseTorch},
|
||||||
{"consolidated.*.pth", parseTorch},
|
{"consolidated.*.pth", parseTorch},
|
||||||
|
|
|
@ -43,6 +43,14 @@ func (kv KV) Architecture() string {
|
||||||
return "unknown"
|
return "unknown"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (kv KV) Kind() string {
|
||||||
|
if s, ok := kv["general.type"].(string); ok {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
return "unknown"
|
||||||
|
}
|
||||||
|
|
||||||
func (kv KV) ParameterCount() uint64 {
|
func (kv KV) ParameterCount() uint64 {
|
||||||
return kv.u64("general.parameter_count")
|
return kv.u64("general.parameter_count")
|
||||||
}
|
}
|
||||||
|
|
|
@ -369,13 +369,14 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
|
||||||
parameters := make(map[string]any)
|
parameters := make(map[string]any)
|
||||||
|
|
||||||
var layers []Layer
|
var layers []Layer
|
||||||
|
var baseLayers []*layerGGML
|
||||||
for _, c := range modelfile.Commands {
|
for _, c := range modelfile.Commands {
|
||||||
mediatype := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name)
|
mediatype := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name)
|
||||||
|
command := c.Name
|
||||||
|
|
||||||
switch c.Name {
|
switch command {
|
||||||
case "model", "adapter":
|
case "model", "adapter":
|
||||||
var baseLayers []*layerGGML
|
if name := model.ParseName(c.Args); name.IsValid() && command == "model" {
|
||||||
if name := model.ParseName(c.Args); name.IsValid() {
|
|
||||||
baseLayers, err = parseFromModel(ctx, name, fn)
|
baseLayers, err = parseFromModel(ctx, name, fn)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -409,14 +410,14 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
|
||||||
}
|
}
|
||||||
defer blob.Close()
|
defer blob.Close()
|
||||||
|
|
||||||
baseLayers, err = parseFromFile(ctx, blob, digest, fn)
|
baseLayers, err = parseFromFile(ctx, command, baseLayers, blob, digest, fn)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
} else if file, err := os.Open(realpath(modelFileDir, c.Args)); err == nil {
|
} else if file, err := os.Open(realpath(modelFileDir, c.Args)); err == nil {
|
||||||
defer file.Close()
|
defer file.Close()
|
||||||
|
|
||||||
baseLayers, err = parseFromFile(ctx, file, "", fn)
|
baseLayers, err = parseFromFile(ctx, command, baseLayers, file, "", fn)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -81,7 +81,7 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe
|
||||||
return layers, nil
|
return layers, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseFromZipFile(_ context.Context, f *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
|
func parseFromZipFile(_ context.Context, command string, baseLayers []*layerGGML, f *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
|
||||||
fi, err := f.Stat()
|
fi, err := f.Stat()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -108,16 +108,38 @@ func parseFromZipFile(_ context.Context, f *os.File, digest string, fn func(api.
|
||||||
defer t.Close()
|
defer t.Close()
|
||||||
defer os.Remove(t.Name())
|
defer os.Remove(t.Name())
|
||||||
|
|
||||||
fn(api.ProgressResponse{Status: "converting model"})
|
var layerType string
|
||||||
if err := convert.Convert(convert.NewZipReader(r, p, 32<<20), t); err != nil {
|
|
||||||
return nil, err
|
switch command {
|
||||||
|
case "adapter":
|
||||||
|
var baseModel *llm.GGML
|
||||||
|
for _, l := range baseLayers {
|
||||||
|
if l.GGML != nil {
|
||||||
|
baseModel = l.GGML
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if baseModel == nil {
|
||||||
|
return nil, fmt.Errorf("no base model specified for the adapter")
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := convert.ConvertAdapter(convert.NewZipReader(r, p, 32<<20), t, baseModel.KV()); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
layerType = "application/vnd.ollama.image.adapter"
|
||||||
|
case "model":
|
||||||
|
if err := convert.ConvertModel(convert.NewZipReader(r, p, 32<<20), t); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
layerType = "application/vnd.ollama.image.model"
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, err := t.Seek(0, io.SeekStart); err != nil {
|
if _, err := t.Seek(0, io.SeekStart); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
layer, err := NewLayer(t, "application/vnd.ollama.image.model")
|
layer, err := NewLayer(t, layerType)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -139,7 +161,7 @@ func parseFromZipFile(_ context.Context, f *os.File, digest string, fn func(api.
|
||||||
return detectChatTemplate(layers)
|
return detectChatTemplate(layers)
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
|
func parseFromFile(ctx context.Context, command string, baseLayers []*layerGGML, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
|
||||||
sr := io.NewSectionReader(file, 0, 512)
|
sr := io.NewSectionReader(file, 0, 512)
|
||||||
contentType, err := detectContentType(sr)
|
contentType, err := detectContentType(sr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -150,7 +172,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
|
||||||
case "gguf", "ggla":
|
case "gguf", "ggla":
|
||||||
// noop
|
// noop
|
||||||
case "application/zip":
|
case "application/zip":
|
||||||
return parseFromZipFile(ctx, file, digest, fn)
|
return parseFromZipFile(ctx, command, baseLayers, file, digest, fn)
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("unsupported content type: %s", contentType)
|
return nil, fmt.Errorf("unsupported content type: %s", contentType)
|
||||||
}
|
}
|
||||||
|
@ -170,7 +192,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
|
||||||
}
|
}
|
||||||
|
|
||||||
mediatype := "application/vnd.ollama.image.model"
|
mediatype := "application/vnd.ollama.image.model"
|
||||||
if ggml.Name() == "ggla" {
|
if ggml.Name() == "ggla" || ggml.KV().Kind() == "adapter" {
|
||||||
mediatype = "application/vnd.ollama.image.adapter"
|
mediatype = "application/vnd.ollama.image.adapter"
|
||||||
} else if ggml.KV().Architecture() == "clip" {
|
} else if ggml.KV().Architecture() == "clip" {
|
||||||
mediatype = "application/vnd.ollama.image.projector"
|
mediatype = "application/vnd.ollama.image.projector"
|
||||||
|
|
|
@ -153,7 +153,7 @@ func TestParseFromFileFromLayer(t *testing.T) {
|
||||||
t.Fatalf("failed to seek to start: %v", err)
|
t.Fatalf("failed to seek to start: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
layers, err := parseFromFile(context.Background(), file, "", func(api.ProgressResponse) {})
|
layers, err := parseFromFile(context.Background(), "model", []*layerGGML{}, file, "", func(api.ProgressResponse) {})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to parse from file: %v", err)
|
t.Fatalf("failed to parse from file: %v", err)
|
||||||
}
|
}
|
||||||
|
@ -166,7 +166,7 @@ func TestParseFromFileFromLayer(t *testing.T) {
|
||||||
t.Fatalf("failed to seek to start: %v", err)
|
t.Fatalf("failed to seek to start: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
layers2, err := parseFromFile(context.Background(), file, layers[0].Digest, func(api.ProgressResponse) {})
|
layers2, err := parseFromFile(context.Background(), "model", []*layerGGML{}, file, layers[0].Digest, func(api.ProgressResponse) {})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to parse from file: %v", err)
|
t.Fatalf("failed to parse from file: %v", err)
|
||||||
}
|
}
|
||||||
|
@ -206,7 +206,7 @@ func TestParseLayerFromCopy(t *testing.T) {
|
||||||
t.Fatalf("failed to seek to start: %v", err)
|
t.Fatalf("failed to seek to start: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
layers, err := parseFromFile(context.Background(), file2, "", func(api.ProgressResponse) {})
|
layers, err := parseFromFile(context.Background(), "model", []*layerGGML{}, file2, "", func(api.ProgressResponse) {})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to parse from file: %v", err)
|
t.Fatalf("failed to parse from file: %v", err)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue