309 lines
6.9 KiB
Go
309 lines
6.9 KiB
Go
package convert
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/binary"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"regexp"
|
|
"slices"
|
|
"strings"
|
|
|
|
"github.com/d4l3k/go-bfloat16"
|
|
"github.com/x448/float16"
|
|
|
|
"github.com/ollama/ollama/llm"
|
|
)
|
|
|
|
type safetensorWriterTo struct {
|
|
t *llm.Tensor
|
|
|
|
params *Params
|
|
bo ByteOrder
|
|
|
|
filename string
|
|
dtype string
|
|
|
|
offset, size int64
|
|
repacker func(string, []float32, []uint64) ([]float32, error)
|
|
}
|
|
|
|
type safetensorMetadata struct {
|
|
Type string `json:"dtype"`
|
|
Shape []uint64 `json:"shape"`
|
|
Offsets []int64 `json:"data_offsets"`
|
|
}
|
|
|
|
type SafetensorFormat struct{}
|
|
|
|
func (m *SafetensorFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
|
|
var tensors []llm.Tensor
|
|
matches, err := filepath.Glob(filepath.Join(dirpath, "*.safetensors"))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var offset uint64
|
|
for _, f := range matches {
|
|
var t []llm.Tensor
|
|
var err error
|
|
t, offset, err = m.readTensors(f, offset, params)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
tensors = append(tensors, t...)
|
|
}
|
|
return tensors, nil
|
|
}
|
|
|
|
func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params) ([]llm.Tensor, uint64, error) {
|
|
f, err := os.Open(fn)
|
|
if err != nil {
|
|
return nil, 0, err
|
|
}
|
|
defer f.Close()
|
|
|
|
var n int64
|
|
if err := binary.Read(f, binary.LittleEndian, &n); err != nil {
|
|
return nil, 0, err
|
|
}
|
|
|
|
b := bytes.NewBuffer(make([]byte, 0, n))
|
|
if _, err = io.CopyN(b, f, n); err != nil {
|
|
return nil, 0, err
|
|
}
|
|
|
|
var headers map[string]safetensorMetadata
|
|
if err := json.NewDecoder(b).Decode(&headers); err != nil {
|
|
return nil, 0, err
|
|
}
|
|
|
|
var keys []string
|
|
for key := range headers {
|
|
if !strings.HasSuffix(key, "self_attn.rotary_embd.inv_freq") {
|
|
keys = append(keys, key)
|
|
}
|
|
}
|
|
|
|
slices.Sort(keys)
|
|
|
|
var tensors []llm.Tensor
|
|
for _, key := range keys {
|
|
value := headers[key]
|
|
|
|
var kind uint32
|
|
switch len(value.Shape) {
|
|
case 0:
|
|
// valuedata
|
|
continue
|
|
case 2:
|
|
kind = 1
|
|
}
|
|
|
|
name, err := m.GetLayerName(key)
|
|
if err != nil {
|
|
return nil, 0, err
|
|
}
|
|
|
|
shape := make([]uint64, len(value.Shape))
|
|
copy(shape, value.Shape)
|
|
|
|
pad := func(s int64) int64 {
|
|
return 8 + n + s
|
|
}
|
|
|
|
t := llm.Tensor{
|
|
Name: name,
|
|
Kind: kind,
|
|
Offset: offset,
|
|
Shape: shape[:],
|
|
}
|
|
|
|
t.WriterTo = safetensorWriterTo{
|
|
t: &t,
|
|
params: params,
|
|
bo: params.ByteOrder,
|
|
filename: fn,
|
|
dtype: value.Type,
|
|
offset: pad(value.Offsets[0]),
|
|
size: pad(value.Offsets[1]) - pad(value.Offsets[0]),
|
|
}
|
|
|
|
offset += t.Size()
|
|
tensors = append(tensors, t)
|
|
}
|
|
|
|
return tensors, offset, nil
|
|
}
|
|
|
|
func (m *SafetensorFormat) GetParams(dirpath string) (*Params, error) {
|
|
f, err := os.Open(filepath.Join(dirpath, "config.json"))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer f.Close()
|
|
|
|
var params Params
|
|
|
|
if err := json.NewDecoder(f).Decode(¶ms); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
params.ByteOrder = binary.LittleEndian
|
|
return ¶ms, nil
|
|
}
|
|
|
|
func (m *SafetensorFormat) GetLayerName(n string) (string, error) {
|
|
directMap := map[string]string{
|
|
"model.embed_tokens.weight": "token_embd.weight",
|
|
"lm_head.weight": "output.weight",
|
|
"model.norm.weight": "output_norm.weight",
|
|
}
|
|
|
|
tMap := map[string]string{
|
|
"model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight",
|
|
"model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight",
|
|
"model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight",
|
|
"model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight",
|
|
"model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
|
|
"model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight",
|
|
"model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight",
|
|
"model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight",
|
|
"model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight",
|
|
"model.layers.(\\d+).block_sparse_moe.gate.weight": "blk.$1.ffn_gate_inp.weight",
|
|
"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w1.weight": "blk.$1.ffn_gate.$2.weight",
|
|
"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w2.weight": "blk.$1.ffn_down.$2.weight",
|
|
"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w3.weight": "blk.$1.ffn_up.$2.weight",
|
|
}
|
|
|
|
v, ok := directMap[n]
|
|
if ok {
|
|
return v, nil
|
|
}
|
|
|
|
// quick hack to rename the layers to gguf format
|
|
for k, v := range tMap {
|
|
re := regexp.MustCompile(k)
|
|
newName := re.ReplaceAllString(n, v)
|
|
if newName != n {
|
|
return newName, nil
|
|
}
|
|
}
|
|
|
|
return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
|
|
}
|
|
|
|
func (r safetensorWriterTo) WriteTo(w io.Writer) (n int64, err error) {
|
|
f, err := os.Open(r.filename)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
defer f.Close()
|
|
|
|
if _, err = f.Seek(r.offset, io.SeekStart); err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
var f32s []float32
|
|
switch r.dtype {
|
|
case "F32":
|
|
f32s = make([]float32, r.size/4)
|
|
if err = binary.Read(f, r.bo, f32s); err != nil {
|
|
return 0, err
|
|
}
|
|
case "F16":
|
|
u16s := make([]uint16, r.size/2)
|
|
if err = binary.Read(f, r.bo, u16s); err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
for _, b := range u16s {
|
|
f32s = append(f32s, float16.Frombits(b).Float32())
|
|
}
|
|
|
|
case "BF16":
|
|
u8s := make([]uint8, r.size)
|
|
if err = binary.Read(f, r.bo, u8s); err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
f32s = bfloat16.DecodeFloat32(u8s)
|
|
default:
|
|
return 0, fmt.Errorf("unknown data type: %s", r.dtype)
|
|
}
|
|
|
|
if r.repacker != nil {
|
|
f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
}
|
|
|
|
switch r.t.Kind {
|
|
case 0:
|
|
return 0, binary.Write(w, r.bo, f32s)
|
|
case 1:
|
|
f16s := make([]uint16, len(f32s))
|
|
for i := range f32s {
|
|
f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
|
|
}
|
|
|
|
return 0, binary.Write(w, r.bo, f16s)
|
|
default:
|
|
return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind)
|
|
}
|
|
}
|
|
|
|
func (m *SafetensorFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
|
|
switch len(params.Architectures) {
|
|
case 0:
|
|
return nil, fmt.Errorf("No architecture specified to convert")
|
|
case 1:
|
|
switch params.Architectures[0] {
|
|
case "LlamaForCausalLM":
|
|
return &LlamaModel{
|
|
ModelData{
|
|
Name: name,
|
|
Path: dirPath,
|
|
Params: params,
|
|
Format: m,
|
|
},
|
|
}, nil
|
|
case "MistralForCausalLM":
|
|
return &MistralModel{
|
|
ModelData{
|
|
Name: name,
|
|
Path: dirPath,
|
|
Params: params,
|
|
Format: m,
|
|
},
|
|
}, nil
|
|
case "MixtralForCausalLM":
|
|
return &MixtralModel{
|
|
ModelData{
|
|
Name: name,
|
|
Path: dirPath,
|
|
Params: params,
|
|
Format: m,
|
|
},
|
|
}, nil
|
|
case "GemmaForCausalLM":
|
|
return &GemmaModel{
|
|
ModelData{
|
|
Name: name,
|
|
Path: dirPath,
|
|
Params: params,
|
|
Format: m,
|
|
},
|
|
}, nil
|
|
default:
|
|
return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
|
|
}
|
|
}
|
|
|
|
return nil, fmt.Errorf("Unknown error")
|
|
}
|