convert safetensor adapters into GGUF (#6327)
This commit is contained in:
parent
7a1e1c1caf
commit
0c819e167b
16 changed files with 697 additions and 101 deletions
|
@ -204,6 +204,12 @@ func tempZipFiles(path string) (string, error) {
|
|||
// safetensors files might be unresolved git lfs references; skip if they are
|
||||
// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
|
||||
files = append(files, st...)
|
||||
} else if st, _ := glob(filepath.Join(path, "adapters.safetensors"), "application/octet-stream"); len(st) > 0 {
|
||||
// covers adapters.safetensors
|
||||
files = append(files, st...)
|
||||
} else if st, _ := glob(filepath.Join(path, "adapter_model.safetensors"), "application/octet-stream"); len(st) > 0 {
|
||||
// covers adapter_model.safetensors
|
||||
files = append(files, st...)
|
||||
} else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 {
|
||||
// pytorch files might also be unresolved git lfs references; skip if they are
|
||||
// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
|
||||
|
|
|
@ -12,12 +12,22 @@ import (
|
|||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type Parameters struct {
|
||||
type ModelParameters struct {
|
||||
Architectures []string `json:"architectures"`
|
||||
VocabSize uint32 `json:"vocab_size"`
|
||||
}
|
||||
|
||||
func (Parameters) KV(t *Tokenizer) llm.KV {
|
||||
type AdapterParameters struct {
|
||||
Alpha uint32 `json:"lora_alpha"`
|
||||
LoraLayers uint32 `json:"lora_layers"`
|
||||
LoraParameters struct {
|
||||
Rank uint32 `json:"rank"`
|
||||
Alpha float32 `json:"alpha"`
|
||||
Scale float32 `json:"scale"`
|
||||
} `json:"lora_parameters"`
|
||||
}
|
||||
|
||||
func (ModelParameters) KV(t *Tokenizer) llm.KV {
|
||||
kv := llm.KV{
|
||||
"general.file_type": uint32(1),
|
||||
"general.quantization_version": uint32(2),
|
||||
|
@ -44,17 +54,40 @@ func (Parameters) KV(t *Tokenizer) llm.KV {
|
|||
return kv
|
||||
}
|
||||
|
||||
func (Parameters) specialTokenTypes() []string {
|
||||
func (p AdapterParameters) KV() llm.KV {
|
||||
var alpha float32
|
||||
if p.LoraParameters.Alpha == 0 {
|
||||
alpha = float32(p.Alpha)
|
||||
} else {
|
||||
alpha = p.LoraParameters.Alpha
|
||||
}
|
||||
|
||||
kv := llm.KV{
|
||||
"adapter.lora.alpha": alpha,
|
||||
"adapter.type": "lora",
|
||||
"general.file_type": uint32(1),
|
||||
"general.type": "adapter",
|
||||
"general.version": "v0.2",
|
||||
}
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (ModelParameters) specialTokenTypes() []string {
|
||||
return []string{
|
||||
"bos", "eos", "unk", "sep", "pad", "cls", "mask",
|
||||
}
|
||||
}
|
||||
|
||||
func (Parameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
|
||||
func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
|
||||
return llm.WriteGGUF(ws, kv, ts)
|
||||
}
|
||||
|
||||
type Converter interface {
|
||||
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
|
||||
return llm.WriteGGUF(ws, kv, ts)
|
||||
}
|
||||
|
||||
type ModelConverter interface {
|
||||
// KV maps parameters to LLM key-values
|
||||
KV(*Tokenizer) llm.KV
|
||||
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
|
||||
|
@ -73,17 +106,67 @@ type moreParser interface {
|
|||
parseMore(fs.FS) error
|
||||
}
|
||||
|
||||
type AdapterConverter interface {
|
||||
// KV maps parameters to LLM key-values
|
||||
KV(llm.KV) llm.KV
|
||||
// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
|
||||
Tensors([]Tensor) []llm.Tensor
|
||||
// Replacements returns a list of string pairs to replace in tensor names.
|
||||
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
||||
Replacements() []string
|
||||
|
||||
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
|
||||
}
|
||||
|
||||
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
|
||||
bts, err := fs.ReadFile(fsys, "adapter_config.json")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var p AdapterParameters
|
||||
if err := json.Unmarshal(bts, &p); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
arch, ok := baseKV["general.architecture"]
|
||||
if !ok {
|
||||
return errors.New("architecture not set for the base model")
|
||||
}
|
||||
|
||||
var conv AdapterConverter
|
||||
switch arch {
|
||||
case "llama":
|
||||
conv = &llamaAdapter{}
|
||||
case "gemma2":
|
||||
conv = &gemma2Adapter{}
|
||||
default:
|
||||
return errors.New("unsupported architecture")
|
||||
}
|
||||
|
||||
ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(bts, conv); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))
|
||||
}
|
||||
|
||||
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
|
||||
// and files it finds in the input path.
|
||||
// Supported input model formats include safetensors.
|
||||
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
|
||||
func Convert(fsys fs.FS, ws io.WriteSeeker) error {
|
||||
func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
|
||||
bts, err := fs.ReadFile(fsys, "config.json")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var p Parameters
|
||||
var p ModelParameters
|
||||
if err := json.Unmarshal(bts, &p); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -92,20 +175,20 @@ func Convert(fsys fs.FS, ws io.WriteSeeker) error {
|
|||
return errors.New("unknown architecture")
|
||||
}
|
||||
|
||||
var conv Converter
|
||||
var conv ModelConverter
|
||||
switch p.Architectures[0] {
|
||||
case "LlamaForCausalLM", "MistralForCausalLM":
|
||||
conv = &llama{}
|
||||
conv = &llamaModel{}
|
||||
case "MixtralForCausalLM":
|
||||
conv = &mixtral{}
|
||||
conv = &mixtralModel{}
|
||||
case "GemmaForCausalLM":
|
||||
conv = &gemma{}
|
||||
conv = &gemmaModel{}
|
||||
case "Gemma2ForCausalLM":
|
||||
conv = &gemma2{}
|
||||
conv = &gemma2Model{}
|
||||
case "Phi3ForCausalLM":
|
||||
conv = &phi3{}
|
||||
conv = &phi3Model{}
|
||||
case "BertModel":
|
||||
conv = &bert{}
|
||||
conv = &bertModel{}
|
||||
default:
|
||||
return errors.New("unsupported architecture")
|
||||
}
|
||||
|
|
|
@ -11,8 +11,8 @@ import (
|
|||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type bert struct {
|
||||
Parameters
|
||||
type bertModel struct {
|
||||
ModelParameters
|
||||
NLayers uint32 `json:"n_layers"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
NLayer uint32 `json:"n_layer"`
|
||||
|
@ -33,11 +33,11 @@ type bert struct {
|
|||
}
|
||||
|
||||
var (
|
||||
_ Converter = (*bert)(nil)
|
||||
_ moreParser = (*bert)(nil)
|
||||
_ ModelConverter = (*bertModel)(nil)
|
||||
_ moreParser = (*bertModel)(nil)
|
||||
)
|
||||
|
||||
func (p *bert) parseMore(fsys fs.FS) error {
|
||||
func (p *bertModel) parseMore(fsys fs.FS) error {
|
||||
bts, err := fs.ReadFile(fsys, "modules.json")
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -85,8 +85,8 @@ func (p *bert) parseMore(fsys fs.FS) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (p *bert) KV(t *Tokenizer) llm.KV {
|
||||
kv := p.Parameters.KV(t)
|
||||
func (p *bertModel) KV(t *Tokenizer) llm.KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "bert"
|
||||
kv["bert.attention.causal"] = false
|
||||
kv["bert.pooling_type"] = p.PoolingType
|
||||
|
@ -132,7 +132,7 @@ func (p *bert) KV(t *Tokenizer) llm.KV {
|
|||
return kv
|
||||
}
|
||||
|
||||
func (p *bert) Tensors(ts []Tensor) []llm.Tensor {
|
||||
func (p *bertModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||
var out []llm.Tensor
|
||||
for _, t := range ts {
|
||||
if slices.Contains([]string{
|
||||
|
@ -154,7 +154,7 @@ func (p *bert) Tensors(ts []Tensor) []llm.Tensor {
|
|||
return out
|
||||
}
|
||||
|
||||
func (bert) Replacements() []string {
|
||||
func (bertModel) Replacements() []string {
|
||||
return []string{
|
||||
"encoder.layer", "blk",
|
||||
"encoder.layers", "blk",
|
||||
|
|
|
@ -9,8 +9,8 @@ import (
|
|||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type gemma struct {
|
||||
Parameters
|
||||
type gemmaModel struct {
|
||||
ModelParameters
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
HiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
|
@ -21,10 +21,10 @@ type gemma struct {
|
|||
HeadDim uint32 `json:"head_dim"`
|
||||
}
|
||||
|
||||
var _ Converter = (*gemma)(nil)
|
||||
var _ ModelConverter = (*gemmaModel)(nil)
|
||||
|
||||
func (p *gemma) KV(t *Tokenizer) llm.KV {
|
||||
kv := p.Parameters.KV(t)
|
||||
func (p *gemmaModel) KV(t *Tokenizer) llm.KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "gemma"
|
||||
kv["gemma.context_length"] = p.MaxPositionEmbeddings
|
||||
kv["gemma.embedding_length"] = p.HiddenSize
|
||||
|
@ -42,8 +42,8 @@ func (p *gemma) KV(t *Tokenizer) llm.KV {
|
|||
return kv
|
||||
}
|
||||
|
||||
func (p *gemma) Tensors(ts []Tensor) []llm.Tensor {
|
||||
out := make([]llm.Tensor, 0, len(ts))
|
||||
func (p *gemmaModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||
var out []llm.Tensor
|
||||
for _, t := range ts {
|
||||
if strings.HasSuffix(t.Name(), "_norm.weight") {
|
||||
t.SetRepacker(p.addOne)
|
||||
|
@ -60,7 +60,7 @@ func (p *gemma) Tensors(ts []Tensor) []llm.Tensor {
|
|||
return out
|
||||
}
|
||||
|
||||
func (p *gemma) Replacements() []string {
|
||||
func (p *gemmaModel) Replacements() []string {
|
||||
return []string{
|
||||
"model.embed_tokens", "token_embd",
|
||||
"model.norm", "output_norm",
|
||||
|
@ -77,7 +77,7 @@ func (p *gemma) Replacements() []string {
|
|||
}
|
||||
}
|
||||
|
||||
func (*gemma) addOne(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||
func (*gemmaModel) addOne(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||
n := tensor.New(tensor.WithShape(int(shape[0])), tensor.WithBacking(data))
|
||||
ones := tensor.Ones(tensor.Float32, int(shape[0]))
|
||||
|
||||
|
|
|
@ -4,15 +4,15 @@ import (
|
|||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type gemma2 struct {
|
||||
gemma
|
||||
type gemma2Model struct {
|
||||
gemmaModel
|
||||
SlidingWindow uint32 `json:"sliding_window"`
|
||||
AttentionLogitSoftcap float32 `json:"attn_logit_softcapping"`
|
||||
FinalLogitSoftcap float32 `json:"final_logit_softcapping"`
|
||||
}
|
||||
|
||||
func (p *gemma2) KV(t *Tokenizer) llm.KV {
|
||||
kv := p.Parameters.KV(t)
|
||||
func (p *gemma2Model) KV(t *Tokenizer) llm.KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "gemma2"
|
||||
kv["gemma2.context_length"] = p.MaxPositionEmbeddings
|
||||
kv["gemma2.embedding_length"] = p.HiddenSize
|
||||
|
@ -33,9 +33,9 @@ func (p *gemma2) KV(t *Tokenizer) llm.KV {
|
|||
return kv
|
||||
}
|
||||
|
||||
func (p *gemma2) Replacements() []string {
|
||||
func (p *gemma2Model) Replacements() []string {
|
||||
return append(
|
||||
p.gemma.Replacements(),
|
||||
p.gemmaModel.Replacements(),
|
||||
"post_attention_layernorm", "post_attention_norm",
|
||||
"pre_feedforward_layernorm", "ffn_norm",
|
||||
"post_feedforward_layernorm", "post_ffw_norm",
|
||||
|
|
91
convert/convert_gemma2_adapter.go
Normal file
91
convert/convert_gemma2_adapter.go
Normal file
|
@ -0,0 +1,91 @@
|
|||
package convert
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type gemma2Adapter struct {
|
||||
AdapterParameters
|
||||
}
|
||||
|
||||
var _ AdapterConverter = (*gemma2Adapter)(nil)
|
||||
|
||||
func (p *gemma2Adapter) KV(baseKV llm.KV) llm.KV {
|
||||
kv := p.AdapterParameters.KV()
|
||||
kv["general.architecture"] = "gemma2"
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *gemma2Adapter) Tensors(ts []Tensor) []llm.Tensor {
|
||||
var out []llm.Tensor
|
||||
for _, t := range ts {
|
||||
shape := t.Shape()
|
||||
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
|
||||
(strings.HasSuffix(t.Name(), "weight.lora_b") && shape[0] < shape[1]) {
|
||||
shape[0], shape[1] = shape[1], shape[0]
|
||||
t.SetRepacker(p.repack)
|
||||
}
|
||||
|
||||
out = append(out, llm.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *gemma2Adapter) Replacements() []string {
|
||||
return []string{
|
||||
"base_model.model.", "",
|
||||
"model.layers", "blk",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"lora_A.weight", "weight.lora_a",
|
||||
"lora_B.weight", "weight.lora_b",
|
||||
"lora_a", "weight.lora_a",
|
||||
"lora_b", "weight.lora_b",
|
||||
}
|
||||
}
|
||||
|
||||
func (p *gemma2Adapter) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||
dims := []int{int(shape[1]), int(shape[0])}
|
||||
|
||||
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
|
||||
if err := n.T(1, 0); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Reshape(dims...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Transpose(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ts, err := native.SelectF32(n, 1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var f32s []float32
|
||||
for _, t := range ts {
|
||||
f32s = append(f32s, t...)
|
||||
}
|
||||
|
||||
return f32s, nil
|
||||
}
|
|
@ -12,8 +12,8 @@ import (
|
|||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type llama struct {
|
||||
Parameters
|
||||
type llamaModel struct {
|
||||
ModelParameters
|
||||
NLayers uint32 `json:"n_layers"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
NLayer uint32 `json:"n_layer"`
|
||||
|
@ -44,10 +44,10 @@ type llama struct {
|
|||
HeadDim uint32 `json:"head_dim"`
|
||||
}
|
||||
|
||||
var _ Converter = (*llama)(nil)
|
||||
var _ ModelConverter = (*llamaModel)(nil)
|
||||
|
||||
func (p *llama) KV(t *Tokenizer) llm.KV {
|
||||
kv := p.Parameters.KV(t)
|
||||
func (p *llamaModel) KV(t *Tokenizer) llm.KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "llama"
|
||||
kv["llama.vocab_size"] = p.VocabSize
|
||||
|
||||
|
@ -120,7 +120,7 @@ func (p *llama) KV(t *Tokenizer) llm.KV {
|
|||
return kv
|
||||
}
|
||||
|
||||
func (p *llama) Tensors(ts []Tensor) []llm.Tensor {
|
||||
func (p *llamaModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||
var out []llm.Tensor
|
||||
|
||||
if p.RopeScaling.factors != nil {
|
||||
|
@ -149,7 +149,7 @@ func (p *llama) Tensors(ts []Tensor) []llm.Tensor {
|
|||
return out
|
||||
}
|
||||
|
||||
func (p *llama) Replacements() []string {
|
||||
func (p *llamaModel) Replacements() []string {
|
||||
return []string{
|
||||
"lm_head", "output",
|
||||
"model.embed_tokens", "token_embd",
|
||||
|
@ -167,7 +167,7 @@ func (p *llama) Replacements() []string {
|
|||
}
|
||||
}
|
||||
|
||||
func (p *llama) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||
func (p *llamaModel) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||
var dims []int
|
||||
for _, dim := range shape {
|
||||
dims = append(dims, int(dim))
|
||||
|
|
169
convert/convert_llama_adapter.go
Normal file
169
convert/convert_llama_adapter.go
Normal file
|
@ -0,0 +1,169 @@
|
|||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"strings"
|
||||
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type llamaAdapter struct {
|
||||
AdapterParameters
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
}
|
||||
|
||||
var _ AdapterConverter = (*llamaAdapter)(nil)
|
||||
|
||||
func (p *llamaAdapter) KV(baseKV llm.KV) llm.KV {
|
||||
kv := p.AdapterParameters.KV()
|
||||
kv["general.architecture"] = "llama"
|
||||
kv["llama.attention.head_count"] = baseKV["llama.attention.head_count"]
|
||||
kv["llama.attention.head_count_kv"] = baseKV["llama.attention.head_count_kv"]
|
||||
|
||||
p.NumAttentionHeads = baseKV["llama.attention.head_count"].(uint32)
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *llamaAdapter) Tensors(ts []Tensor) []llm.Tensor {
|
||||
var out []llm.Tensor
|
||||
for _, t := range ts {
|
||||
shape := t.Shape()
|
||||
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
|
||||
(strings.HasSuffix(t.Name(), "weight.lora_b") && shape[0] < shape[1]) {
|
||||
shape[0], shape[1] = shape[1], shape[0]
|
||||
t.SetRepacker(p.repackAndTranspose)
|
||||
} else {
|
||||
t.SetRepacker(p.repack)
|
||||
}
|
||||
|
||||
out = append(out, llm.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: shape,
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *llamaAdapter) Replacements() []string {
|
||||
return []string{
|
||||
"base_model.model.", "",
|
||||
"model.layers", "blk",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"lora_A.weight", "weight.lora_a",
|
||||
"lora_B.weight", "weight.lora_b",
|
||||
"lora_a", "weight.lora_a",
|
||||
"lora_b", "weight.lora_b",
|
||||
}
|
||||
}
|
||||
|
||||
func (p *llamaAdapter) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||
dims := []int{int(shape[1]), int(shape[0])}
|
||||
|
||||
var heads uint32
|
||||
if strings.HasSuffix(name, "attn_q.weight.lora_a") {
|
||||
heads = p.NumAttentionHeads
|
||||
} else if strings.HasSuffix(name, "attn_k.weight.lora_a") {
|
||||
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
|
||||
} else {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
|
||||
if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.T(0, 2, 1, 3); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Reshape(dims...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Transpose(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ts, err := native.SelectF32(n, 1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var f32s []float32
|
||||
for _, t := range ts {
|
||||
f32s = append(f32s, t...)
|
||||
}
|
||||
|
||||
return f32s, nil
|
||||
}
|
||||
|
||||
func (p *llamaAdapter) repackAndTranspose(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||
dims := []int{int(shape[1]), int(shape[0])}
|
||||
|
||||
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
|
||||
var heads uint32
|
||||
if strings.HasSuffix(name, "attn_q.weight.lora_a") {
|
||||
heads = p.NumAttentionHeads
|
||||
} else if strings.HasSuffix(name, "attn_k.weight.lora_a") {
|
||||
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
|
||||
}
|
||||
|
||||
if heads > 0 {
|
||||
if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.T(0, 2, 1, 3); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Reshape(dims...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Transpose(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if err := n.T(1, 0); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Reshape(dims...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Transpose(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ts, err := native.SelectF32(n, 1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var f32s []float32
|
||||
for _, t := range ts {
|
||||
f32s = append(f32s, t...)
|
||||
}
|
||||
|
||||
return f32s, nil
|
||||
}
|
|
@ -9,14 +9,14 @@ import (
|
|||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type mixtral struct {
|
||||
llama
|
||||
type mixtralModel struct {
|
||||
llamaModel
|
||||
NumLocalExperts uint32 `json:"num_local_experts"`
|
||||
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
|
||||
}
|
||||
|
||||
func (p *mixtral) KV(t *Tokenizer) llm.KV {
|
||||
kv := p.llama.KV(t)
|
||||
func (p *mixtralModel) KV(t *Tokenizer) llm.KV {
|
||||
kv := p.llamaModel.KV(t)
|
||||
|
||||
if p.NumLocalExperts > 0 {
|
||||
kv["llama.expert_count"] = p.NumLocalExperts
|
||||
|
@ -29,7 +29,7 @@ func (p *mixtral) KV(t *Tokenizer) llm.KV {
|
|||
return kv
|
||||
}
|
||||
|
||||
func (p *mixtral) Tensors(ts []Tensor) []llm.Tensor {
|
||||
func (p *mixtralModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||
oldnew := []string{
|
||||
"model.layers", "blk",
|
||||
"w1", "ffn_gate_exps",
|
||||
|
@ -67,12 +67,12 @@ func (p *mixtral) Tensors(ts []Tensor) []llm.Tensor {
|
|||
})
|
||||
}
|
||||
|
||||
return append(out, p.llama.Tensors(ts)...)
|
||||
return append(out, p.llamaModel.Tensors(ts)...)
|
||||
}
|
||||
|
||||
func (p *mixtral) Replacements() []string {
|
||||
func (p *mixtralModel) Replacements() []string {
|
||||
return append(
|
||||
p.llama.Replacements(),
|
||||
p.llamaModel.Replacements(),
|
||||
"block_sparse_moe.gate", "ffn_gate_inp",
|
||||
)
|
||||
}
|
||||
|
|
|
@ -11,8 +11,8 @@ import (
|
|||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type phi3 struct {
|
||||
Parameters
|
||||
type phi3Model struct {
|
||||
ModelParameters
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
NLayers uint32 `json:"n_layers"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
|
@ -35,10 +35,10 @@ type phi3 struct {
|
|||
SlidingWindow uint32 `json:"sliding_window"`
|
||||
}
|
||||
|
||||
var _ Converter = (*phi3)(nil)
|
||||
var _ ModelConverter = (*phi3Model)(nil)
|
||||
|
||||
func (p *phi3) KV(t *Tokenizer) llm.KV {
|
||||
kv := p.Parameters.KV(t)
|
||||
func (p *phi3Model) KV(t *Tokenizer) llm.KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "phi3"
|
||||
kv["phi3.context_length"] = p.MaxPositionEmbeddings
|
||||
kv["phi3.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
|
||||
|
@ -68,7 +68,7 @@ func (p *phi3) KV(t *Tokenizer) llm.KV {
|
|||
return kv
|
||||
}
|
||||
|
||||
func (p *phi3) Tensors(ts []Tensor) []llm.Tensor {
|
||||
func (p *phi3Model) Tensors(ts []Tensor) []llm.Tensor {
|
||||
var addRopeFactors sync.Once
|
||||
|
||||
out := make([]llm.Tensor, 0, len(ts)+2)
|
||||
|
@ -100,7 +100,7 @@ func (p *phi3) Tensors(ts []Tensor) []llm.Tensor {
|
|||
return out
|
||||
}
|
||||
|
||||
func (p *phi3) Replacements() []string {
|
||||
func (p *phi3Model) Replacements() []string {
|
||||
return []string{
|
||||
"lm_head", "output",
|
||||
"model.embed_tokens", "token_embd",
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
package convert
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/sha256"
|
||||
"encoding/binary"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
|
@ -29,7 +31,7 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
|
|||
}
|
||||
defer f.Close()
|
||||
|
||||
if err := Convert(fsys, f); err != nil {
|
||||
if err := ConvertModel(fsys, f); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
|
@ -51,6 +53,34 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
|
|||
return r, m.KV(), m.Tensors()
|
||||
}
|
||||
|
||||
func generateResultsJSON(t *testing.T, f *os.File, kv llm.KV, tensors llm.Tensors) map[string]string {
|
||||
actual := make(map[string]string)
|
||||
for k, v := range kv {
|
||||
if s, ok := v.(json.Marshaler); !ok {
|
||||
actual[k] = fmt.Sprintf("%v", v)
|
||||
} else {
|
||||
bts, err := json.Marshal(s)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
actual[k] = fmt.Sprintf("%x", sha256.Sum256(bts))
|
||||
}
|
||||
}
|
||||
|
||||
for _, tensor := range tensors.Items {
|
||||
sha256sum := sha256.New()
|
||||
sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size()))
|
||||
if _, err := io.Copy(sha256sum, sr); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
actual[tensor.Name] = hex.EncodeToString(sha256sum.Sum(nil))
|
||||
}
|
||||
|
||||
return actual
|
||||
}
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
var level slog.Level
|
||||
flag.TextVar(&level, "level", slog.LevelInfo, "log level")
|
||||
|
@ -85,29 +115,7 @@ func TestConvertFull(t *testing.T) {
|
|||
}
|
||||
|
||||
f, kv, tensors := convertFull(t, os.DirFS(p))
|
||||
actual := make(map[string]string)
|
||||
for k, v := range kv {
|
||||
if s, ok := v.(json.Marshaler); !ok {
|
||||
actual[k] = fmt.Sprintf("%v", v)
|
||||
} else {
|
||||
bts, err := json.Marshal(s)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
actual[k] = fmt.Sprintf("%x", sha256.Sum256(bts))
|
||||
}
|
||||
}
|
||||
|
||||
for _, tensor := range tensors.Items {
|
||||
sha256sum := sha256.New()
|
||||
sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size()))
|
||||
if _, err := io.Copy(sha256sum, sr); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
actual[tensor.Name] = hex.EncodeToString(sha256sum.Sum(nil))
|
||||
}
|
||||
actual := generateResultsJSON(t, f, kv, tensors)
|
||||
|
||||
expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt)))
|
||||
if err != nil {
|
||||
|
@ -131,3 +139,209 @@ func TestConvertFull(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertAdapter(t *testing.T) {
|
||||
type AdapterCase struct {
|
||||
Name string
|
||||
BaseKV map[string]any
|
||||
Expected map[string]string
|
||||
}
|
||||
|
||||
cases := []AdapterCase{
|
||||
{
|
||||
Name: "discollama",
|
||||
BaseKV: map[string]any{
|
||||
"general.architecture": "llama",
|
||||
"llama.attention.head_count": uint32(32),
|
||||
"llama.attention.head_count_kv": uint32(8),
|
||||
},
|
||||
Expected: map[string]string{
|
||||
"general.architecture": "llama",
|
||||
"general.file_type": "1",
|
||||
"general.parameter_count": "106496",
|
||||
"general.type": "adapter",
|
||||
"general.version": "v0.2",
|
||||
"adapter.lora.alpha": "16",
|
||||
"adapter.type": "lora",
|
||||
"llama.attention.head_count": "32",
|
||||
"llama.attention.head_count_kv": "8",
|
||||
"blk.31.attn_q.weight.lora_a": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
|
||||
"blk.31.attn_q.weight.lora_b": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
|
||||
"blk.31.attn_v.weight.lora_a": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
|
||||
"blk.31.attn_v.weight.lora_b": "071dcafe89df065d6e1c935ecb8fdf6479b3c202eb912e7da938597673ff5857",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
t.Run(c.Name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
f, err := os.CreateTemp(t.TempDir(), "f16")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
tempDir := t.TempDir()
|
||||
generateLoraTestData(t, tempDir)
|
||||
|
||||
if err = ConvertAdapter(os.DirFS(tempDir), f, c.BaseKV); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
r, err := os.Open(f.Name())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer r.Close()
|
||||
|
||||
m, _, err := llm.DecodeGGML(r, math.MaxInt)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, err := r.Seek(0, io.SeekStart); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
actual := generateResultsJSON(t, r, m.KV(), m.Tensors())
|
||||
|
||||
keys := maps.Keys(c.Expected)
|
||||
slices.Sort(keys)
|
||||
for _, k := range keys {
|
||||
if v, ok := actual[k]; !ok {
|
||||
t.Errorf("missing %s", k)
|
||||
} else if v != c.Expected[k] {
|
||||
t.Errorf("unexpected %s: want %s, got %s", k, c.Expected[k], v)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func generateLoraTestData(t *testing.T, tempDir string) {
|
||||
type tensorData struct {
|
||||
Offsets []int `json:"data_offsets"`
|
||||
Type string `json:"dtype"`
|
||||
Shape []int `json:"shape"`
|
||||
}
|
||||
offset := 4096 * 8 * 4
|
||||
|
||||
td := map[string]*tensorData{"__metadata__": nil}
|
||||
td["model.layers.31.self_attn.q_proj.lora_a"] = &tensorData{
|
||||
Offsets: []int{0, offset},
|
||||
Type: "F32",
|
||||
Shape: []int{4096, 8},
|
||||
}
|
||||
td["model.layers.31.self_attn.q_proj.lora_b"] = &tensorData{
|
||||
Offsets: []int{offset, offset * 2},
|
||||
Type: "F32",
|
||||
Shape: []int{8, 4096},
|
||||
}
|
||||
td["model.layers.31.self_attn.v_proj.lora_a"] = &tensorData{
|
||||
Offsets: []int{offset * 2, offset * 3},
|
||||
Type: "F32",
|
||||
Shape: []int{4096, 8},
|
||||
}
|
||||
td["model.layers.31.self_attn.v_proj.lora_b"] = &tensorData{
|
||||
Offsets: []int{offset * 3, offset*3 + 8*1024*4},
|
||||
Type: "F32",
|
||||
Shape: []int{8, 1024},
|
||||
}
|
||||
|
||||
data, err := json.Marshal(td)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
|
||||
l := int64(len(data))
|
||||
err = binary.Write(&buf, binary.LittleEndian, l)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
_, err = buf.Write(data)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// write some data for the tensors
|
||||
|
||||
ones := make([]float32, 4096*8)
|
||||
for i := range ones {
|
||||
ones[i] = float32(1)
|
||||
}
|
||||
|
||||
for range 3 {
|
||||
err = binary.Write(&buf, binary.LittleEndian, ones)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
ones = make([]float32, 1024*8)
|
||||
for i := range ones {
|
||||
ones[i] = float32(1)
|
||||
}
|
||||
|
||||
err = binary.Write(&buf, binary.LittleEndian, ones)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
fdata, err := os.Create(filepath.Join(tempDir, "adapters.safetensors"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer fdata.Close()
|
||||
|
||||
_, err = fdata.Write(buf.Bytes())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
configData := `
|
||||
{
|
||||
"adapter_path": "adapters-test",
|
||||
"batch_size": 8,
|
||||
"config": "config-tiny.json",
|
||||
"data": "../discollama-completion",
|
||||
"grad_checkpoint": null,
|
||||
"iters": 1000,
|
||||
"learning_rate": 1e-05,
|
||||
"lora_layers": 1,
|
||||
"lora_parameters": {
|
||||
"rank": 8,
|
||||
"alpha": 16,
|
||||
"dropout": 0.0,
|
||||
"scale": 2.0
|
||||
},
|
||||
"lr_schedule": null,
|
||||
"max_seq_length": 2048,
|
||||
"model": "/Users/pdevine/git/Meta-Llama-3-8B-Instruct",
|
||||
"resume_adapter_file": null,
|
||||
"save_every": 100,
|
||||
"seed": 0,
|
||||
"steps_per_eval": 200,
|
||||
"steps_per_report": 10,
|
||||
"test": false,
|
||||
"test_batches": 500,
|
||||
"train": true,
|
||||
"use_dora": false,
|
||||
"val_batches": 25
|
||||
}
|
||||
`
|
||||
f, err := os.Create(filepath.Join(tempDir, "adapter_config.json"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
_, err = f.WriteString(configData)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -64,6 +64,8 @@ func parseTensors(fsys fs.FS, replacer *strings.Replacer) ([]Tensor, error) {
|
|||
}{
|
||||
{"model-*-of-*.safetensors", parseSafetensors},
|
||||
{"model.safetensors", parseSafetensors},
|
||||
{"adapters.safetensors", parseSafetensors},
|
||||
{"adapter_model.safetensors", parseSafetensors},
|
||||
{"pytorch_model-*-of-*.bin", parseTorch},
|
||||
{"pytorch_model.bin", parseTorch},
|
||||
{"consolidated.*.pth", parseTorch},
|
||||
|
|
|
@ -43,6 +43,14 @@ func (kv KV) Architecture() string {
|
|||
return "unknown"
|
||||
}
|
||||
|
||||
func (kv KV) Kind() string {
|
||||
if s, ok := kv["general.type"].(string); ok {
|
||||
return s
|
||||
}
|
||||
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
func (kv KV) ParameterCount() uint64 {
|
||||
return kv.u64("general.parameter_count")
|
||||
}
|
||||
|
|
|
@ -369,13 +369,14 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
|
|||
parameters := make(map[string]any)
|
||||
|
||||
var layers []Layer
|
||||
var baseLayers []*layerGGML
|
||||
for _, c := range modelfile.Commands {
|
||||
mediatype := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name)
|
||||
command := c.Name
|
||||
|
||||
switch c.Name {
|
||||
switch command {
|
||||
case "model", "adapter":
|
||||
var baseLayers []*layerGGML
|
||||
if name := model.ParseName(c.Args); name.IsValid() {
|
||||
if name := model.ParseName(c.Args); name.IsValid() && command == "model" {
|
||||
baseLayers, err = parseFromModel(ctx, name, fn)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -409,14 +410,14 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
|
|||
}
|
||||
defer blob.Close()
|
||||
|
||||
baseLayers, err = parseFromFile(ctx, blob, digest, fn)
|
||||
baseLayers, err = parseFromFile(ctx, command, baseLayers, blob, digest, fn)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else if file, err := os.Open(realpath(modelFileDir, c.Args)); err == nil {
|
||||
defer file.Close()
|
||||
|
||||
baseLayers, err = parseFromFile(ctx, file, "", fn)
|
||||
baseLayers, err = parseFromFile(ctx, command, baseLayers, file, "", fn)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -81,7 +81,7 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe
|
|||
return layers, nil
|
||||
}
|
||||
|
||||
func parseFromZipFile(_ context.Context, f *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
|
||||
func parseFromZipFile(_ context.Context, command string, baseLayers []*layerGGML, f *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -108,16 +108,38 @@ func parseFromZipFile(_ context.Context, f *os.File, digest string, fn func(api.
|
|||
defer t.Close()
|
||||
defer os.Remove(t.Name())
|
||||
|
||||
fn(api.ProgressResponse{Status: "converting model"})
|
||||
if err := convert.Convert(convert.NewZipReader(r, p, 32<<20), t); err != nil {
|
||||
return nil, err
|
||||
var layerType string
|
||||
|
||||
switch command {
|
||||
case "adapter":
|
||||
var baseModel *llm.GGML
|
||||
for _, l := range baseLayers {
|
||||
if l.GGML != nil {
|
||||
baseModel = l.GGML
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if baseModel == nil {
|
||||
return nil, fmt.Errorf("no base model specified for the adapter")
|
||||
}
|
||||
|
||||
if err := convert.ConvertAdapter(convert.NewZipReader(r, p, 32<<20), t, baseModel.KV()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
layerType = "application/vnd.ollama.image.adapter"
|
||||
case "model":
|
||||
if err := convert.ConvertModel(convert.NewZipReader(r, p, 32<<20), t); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
layerType = "application/vnd.ollama.image.model"
|
||||
}
|
||||
|
||||
if _, err := t.Seek(0, io.SeekStart); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
layer, err := NewLayer(t, "application/vnd.ollama.image.model")
|
||||
layer, err := NewLayer(t, layerType)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -139,7 +161,7 @@ func parseFromZipFile(_ context.Context, f *os.File, digest string, fn func(api.
|
|||
return detectChatTemplate(layers)
|
||||
}
|
||||
|
||||
func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
|
||||
func parseFromFile(ctx context.Context, command string, baseLayers []*layerGGML, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
|
||||
sr := io.NewSectionReader(file, 0, 512)
|
||||
contentType, err := detectContentType(sr)
|
||||
if err != nil {
|
||||
|
@ -150,7 +172,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
|
|||
case "gguf", "ggla":
|
||||
// noop
|
||||
case "application/zip":
|
||||
return parseFromZipFile(ctx, file, digest, fn)
|
||||
return parseFromZipFile(ctx, command, baseLayers, file, digest, fn)
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported content type: %s", contentType)
|
||||
}
|
||||
|
@ -170,7 +192,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
|
|||
}
|
||||
|
||||
mediatype := "application/vnd.ollama.image.model"
|
||||
if ggml.Name() == "ggla" {
|
||||
if ggml.Name() == "ggla" || ggml.KV().Kind() == "adapter" {
|
||||
mediatype = "application/vnd.ollama.image.adapter"
|
||||
} else if ggml.KV().Architecture() == "clip" {
|
||||
mediatype = "application/vnd.ollama.image.projector"
|
||||
|
|
|
@ -153,7 +153,7 @@ func TestParseFromFileFromLayer(t *testing.T) {
|
|||
t.Fatalf("failed to seek to start: %v", err)
|
||||
}
|
||||
|
||||
layers, err := parseFromFile(context.Background(), file, "", func(api.ProgressResponse) {})
|
||||
layers, err := parseFromFile(context.Background(), "model", []*layerGGML{}, file, "", func(api.ProgressResponse) {})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse from file: %v", err)
|
||||
}
|
||||
|
@ -166,7 +166,7 @@ func TestParseFromFileFromLayer(t *testing.T) {
|
|||
t.Fatalf("failed to seek to start: %v", err)
|
||||
}
|
||||
|
||||
layers2, err := parseFromFile(context.Background(), file, layers[0].Digest, func(api.ProgressResponse) {})
|
||||
layers2, err := parseFromFile(context.Background(), "model", []*layerGGML{}, file, layers[0].Digest, func(api.ProgressResponse) {})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse from file: %v", err)
|
||||
}
|
||||
|
@ -206,7 +206,7 @@ func TestParseLayerFromCopy(t *testing.T) {
|
|||
t.Fatalf("failed to seek to start: %v", err)
|
||||
}
|
||||
|
||||
layers, err := parseFromFile(context.Background(), file2, "", func(api.ProgressResponse) {})
|
||||
layers, err := parseFromFile(context.Background(), "model", []*layerGGML{}, file2, "", func(api.ProgressResponse) {})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse from file: %v", err)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue