960 lines
19 KiB
Go
960 lines
19 KiB
Go
package llm
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/binary"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"os"
|
|
"regexp"
|
|
|
|
"github.com/d4l3k/go-bfloat16"
|
|
"github.com/pdevine/tensor"
|
|
"github.com/pdevine/tensor/native"
|
|
"github.com/x448/float16"
|
|
|
|
"github.com/jmorganca/ollama/format"
|
|
)
|
|
|
|
type ContainerGGUF struct {
|
|
ByteOrder binary.ByteOrder
|
|
|
|
Version uint32
|
|
|
|
V1 struct {
|
|
NumTensor uint32
|
|
NumKV uint32
|
|
}
|
|
|
|
V2 struct {
|
|
NumTensor uint64
|
|
NumKV uint64
|
|
}
|
|
|
|
V3 struct {
|
|
NumTensor uint64
|
|
NumKV uint64
|
|
}
|
|
}
|
|
|
|
func (c *ContainerGGUF) Name() string {
|
|
return "gguf"
|
|
}
|
|
|
|
func (c *ContainerGGUF) Decode(rso *readSeekOffset) (model, error) {
|
|
binary.Read(rso, c.ByteOrder, &c.Version)
|
|
|
|
switch c.Version {
|
|
case 1:
|
|
binary.Read(rso, c.ByteOrder, &c.V1)
|
|
default:
|
|
binary.Read(rso, c.ByteOrder, &c.V2)
|
|
}
|
|
|
|
model := NewGGUFModel(c)
|
|
if err := model.Decode(rso); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return model, nil
|
|
}
|
|
|
|
const (
|
|
_ uint32 = iota
|
|
GGUFTokenNormal
|
|
GGUFTokenUnknown
|
|
GGUFTokenControl
|
|
GGUFTokenUserDefined
|
|
GGUFTokenUnused
|
|
GGUFTokenByte
|
|
)
|
|
|
|
const (
|
|
GGUFTypeUint8 uint32 = iota
|
|
GGUFTypeInt8
|
|
GGUFTypeUint16
|
|
GGUFTypeInt16
|
|
GGUFTypeUint32
|
|
GGUFTypeInt32
|
|
GGUFTypeFloat32
|
|
GGUFTypeBool
|
|
GGUFTypeString
|
|
GGUFTypeArray
|
|
GGUFTypeUint64
|
|
GGUFTypeInt64
|
|
GGUFTypeFloat64
|
|
)
|
|
|
|
type KV map[string]any
|
|
|
|
type Tensor struct {
|
|
Name string
|
|
Kind uint32
|
|
Offset uint64
|
|
|
|
// shape is the number of elements in each dimension
|
|
Shape [4]uint64
|
|
|
|
FileName string
|
|
OffsetPadding uint64
|
|
FileOffsets []uint64
|
|
}
|
|
|
|
func (t Tensor) BlockSize() uint64 {
|
|
switch {
|
|
case t.Kind < 2:
|
|
return 1
|
|
case t.Kind < 10:
|
|
return 32
|
|
default:
|
|
return 256
|
|
}
|
|
}
|
|
|
|
func (t Tensor) TypeSize() uint64 {
|
|
blockSize := t.BlockSize()
|
|
|
|
switch t.Kind {
|
|
case 0: // FP32
|
|
return 4
|
|
case 1: // FP16
|
|
return 2
|
|
case 2: // Q4_0
|
|
return 2 + blockSize/2
|
|
case 3: // Q4_1
|
|
return 2 + 2 + blockSize/2
|
|
case 6: // Q5_0
|
|
return 2 + 4 + blockSize/2
|
|
case 7: // Q5_1
|
|
return 2 + 2 + 4 + blockSize/2
|
|
case 8: // Q8_0
|
|
return 2 + blockSize
|
|
case 9: // Q8_1
|
|
return 4 + 4 + blockSize
|
|
case 10: // Q2_K
|
|
return blockSize/16 + blockSize/4 + 2 + 2
|
|
case 11: // Q3_K
|
|
return blockSize/8 + blockSize/4 + 12 + 2
|
|
case 12: // Q4_K
|
|
return 2 + 2 + 12 + blockSize/2
|
|
case 13: // Q5_K
|
|
return 2 + 2 + 12 + blockSize/8 + blockSize/2
|
|
case 14: // Q6_K
|
|
return blockSize/2 + blockSize/4 + blockSize/16 + 2
|
|
case 15: // Q8_K
|
|
return 2 + blockSize + 2*blockSize/16
|
|
case 16: // IQ2_XXS
|
|
return 2 + 2*blockSize/8
|
|
case 17: // IQ2_XS
|
|
return 2 + 2*blockSize/8 + blockSize/32
|
|
case 18: // IQ3_XXS
|
|
return 2 + 3*blockSize/8
|
|
default:
|
|
return 0
|
|
}
|
|
}
|
|
|
|
func (t Tensor) Parameters() uint64 {
|
|
return t.Shape[0] * t.Shape[1] * t.Shape[2] * t.Shape[3]
|
|
}
|
|
|
|
func (t Tensor) Size() uint64 {
|
|
return t.Parameters() * t.TypeSize() / t.BlockSize()
|
|
}
|
|
|
|
func (t Tensor) Repack(data []uint16, heads int) ([]uint16, error) {
|
|
n := tensor.New(tensor.WithShape(int(t.Shape[0]), int(t.Shape[1])), tensor.WithBacking(data))
|
|
origShape := n.Shape().Clone()
|
|
|
|
// reshape the tensor and swap axes 1 and 2 to unpack the layer for gguf
|
|
if err := n.Reshape(heads, 2, origShape[0]/heads/2, origShape[1]); err != nil {
|
|
return []uint16{}, err
|
|
}
|
|
|
|
if err := n.T(0, 2, 1, 3); err != nil {
|
|
return []uint16{}, err
|
|
}
|
|
|
|
if err := n.Reshape(origShape...); err != nil {
|
|
return []uint16{}, err
|
|
}
|
|
|
|
if err := n.Transpose(); err != nil {
|
|
return []uint16{}, err
|
|
}
|
|
newN, err := native.SelectU16(n, 1)
|
|
if err != nil {
|
|
return []uint16{}, err
|
|
}
|
|
|
|
var fullTensor []uint16
|
|
for _, v := range newN {
|
|
fullTensor = append(fullTensor, v...)
|
|
}
|
|
return fullTensor, nil
|
|
}
|
|
|
|
type GGUFModel struct {
|
|
*ContainerGGUF
|
|
|
|
KV
|
|
Tensors []Tensor
|
|
|
|
parameters uint64
|
|
}
|
|
|
|
func NewGGUFModel(container *ContainerGGUF) *GGUFModel {
|
|
return &GGUFModel{
|
|
ContainerGGUF: container,
|
|
KV: make(KV),
|
|
}
|
|
}
|
|
|
|
func (llm *GGUFModel) NumTensor() uint64 {
|
|
if llm.Version == 1 {
|
|
return uint64(llm.V1.NumTensor)
|
|
}
|
|
|
|
return llm.V2.NumTensor
|
|
}
|
|
|
|
func (llm *GGUFModel) NumKV() uint64 {
|
|
if llm.Version == 1 {
|
|
return uint64(llm.V1.NumKV)
|
|
}
|
|
|
|
return llm.V2.NumKV
|
|
}
|
|
|
|
func (llm *GGUFModel) ModelFamily() string {
|
|
if t, ok := llm.KV["general.architecture"].(string); ok {
|
|
return t
|
|
}
|
|
|
|
return "unknown"
|
|
}
|
|
|
|
func (llm *GGUFModel) ModelType() string {
|
|
if llm.parameters > 0 {
|
|
return format.HumanNumber(llm.parameters)
|
|
}
|
|
|
|
return "unknown"
|
|
}
|
|
|
|
func (llm *GGUFModel) FileType() string {
|
|
if t, ok := llm.KV["general.file_type"].(uint32); ok {
|
|
return fileType(t)
|
|
}
|
|
|
|
return "unknown"
|
|
}
|
|
|
|
func (llm *GGUFModel) Encode(f *os.File) error {
|
|
// this mimics the order of the llama.cpp convert script
|
|
kOrder := []string{
|
|
"general.architecture",
|
|
"general.name",
|
|
"llama.context_length",
|
|
"llama.embedding_length",
|
|
"llama.block_count",
|
|
"llama.feed_forward_length",
|
|
"llama.rope.dimension_count",
|
|
"llama.attention.head_count",
|
|
"llama.attention.head_count_kv",
|
|
"llama.attention.layer_norm_rms_epsilon",
|
|
"llama.rope.freq_base",
|
|
"general.file_type",
|
|
"tokenizer.ggml.model",
|
|
"tokenizer.ggml.tokens",
|
|
"tokenizer.ggml.scores",
|
|
"tokenizer.ggml.token_type",
|
|
"tokenizer.ggml.bos_token_id",
|
|
"tokenizer.ggml.eos_token_id",
|
|
"tokenizer.ggml.unknown_token_id",
|
|
"tokenizer.ggml.add_bos_token",
|
|
"tokenizer.ggml.add_eos_token",
|
|
"tokenizer.chat_template",
|
|
}
|
|
|
|
if err := binary.Write(f, llm.ByteOrder, []byte("GGUF")); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(f, llm.ByteOrder, uint32(3)); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(f, llm.ByteOrder, uint64(llm.V3.NumTensor)); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(f, llm.ByteOrder, uint64(llm.V3.NumKV)); err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, k := range kOrder {
|
|
val, ok := llm.KV[k]
|
|
if !ok {
|
|
continue
|
|
}
|
|
|
|
if err := binary.Write(f, llm.ByteOrder, uint64(len(k))); err != nil {
|
|
return err
|
|
}
|
|
if err := binary.Write(f, llm.ByteOrder, []byte(k)); err != nil {
|
|
return err
|
|
}
|
|
|
|
switch v := val.(type) {
|
|
case uint32:
|
|
if err := binary.Write(f, llm.ByteOrder, GGUFTypeUint32); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := llm.writeUint32(f, v); err != nil {
|
|
return err
|
|
}
|
|
case float32:
|
|
if err := binary.Write(f, llm.ByteOrder, GGUFTypeFloat32); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := llm.writeF32(f, v); err != nil {
|
|
return err
|
|
}
|
|
case bool:
|
|
if err := binary.Write(f, llm.ByteOrder, GGUFTypeBool); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := llm.writeBool(f, v); err != nil {
|
|
return err
|
|
}
|
|
case string:
|
|
if err := binary.Write(f, llm.ByteOrder, GGUFTypeString); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := llm.writeString(f, v); err != nil {
|
|
return err
|
|
}
|
|
case []int32:
|
|
if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(f, llm.ByteOrder, GGUFTypeInt32); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
|
|
return err
|
|
}
|
|
for _, i := range v {
|
|
if err := llm.writeInt32(f, i); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
case []uint32:
|
|
if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(f, llm.ByteOrder, GGUFTypeUint32); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
|
|
return err
|
|
}
|
|
for _, i := range v {
|
|
if err := llm.writeUint32(f, i); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
case []float32:
|
|
if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(f, llm.ByteOrder, GGUFTypeFloat32); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
|
|
return err
|
|
}
|
|
for _, fl := range v {
|
|
if err := llm.writeF32(f, fl); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
case []string:
|
|
if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(f, llm.ByteOrder, GGUFTypeString); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, s := range v {
|
|
if err := llm.writeString(f, s); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// write layer metadata
|
|
for _, t := range llm.Tensors {
|
|
if err := llm.writeString(f, t.Name); err != nil {
|
|
return err
|
|
}
|
|
|
|
// the dimensions of the tensor
|
|
dims := 1
|
|
if t.Shape[1] > 0 {
|
|
dims = 2
|
|
}
|
|
|
|
if err := binary.Write(f, llm.ByteOrder, uint32(dims)); err != nil {
|
|
return err
|
|
}
|
|
|
|
for i := 0; i < dims; i++ {
|
|
if err := binary.Write(f, llm.ByteOrder, uint64(t.Shape[dims-1-i])); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if err := binary.Write(f, llm.ByteOrder, uint32(t.Kind)); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(f, llm.ByteOrder, uint64(t.Offset)); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
offset, terr := f.Seek(0, io.SeekCurrent)
|
|
if terr != nil {
|
|
return terr
|
|
}
|
|
slog.Debug(fmt.Sprintf("tensors offset = %x", offset))
|
|
|
|
if err := llm.writePadding(f, 32); err != nil {
|
|
return err
|
|
}
|
|
|
|
var dataFile *os.File
|
|
var currentFile string
|
|
var err error
|
|
for _, t := range llm.Tensors {
|
|
if currentFile != t.FileName {
|
|
if f != nil {
|
|
dataFile.Close()
|
|
}
|
|
currentFile = t.FileName
|
|
dataFile, err = os.Open(t.FileName)
|
|
if err != nil {
|
|
fmt.Println(err)
|
|
return err
|
|
}
|
|
}
|
|
|
|
dataFile.Seek(int64(t.OffsetPadding+t.FileOffsets[0]), 0)
|
|
|
|
pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
|
|
re, err := regexp.Compile(pattern)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
matches := re.FindAllStringSubmatch(t.Name, -1)
|
|
if len(matches) > 0 {
|
|
layerSize := t.FileOffsets[1] - t.FileOffsets[0]
|
|
|
|
var err error
|
|
tData := make([]uint16, layerSize/2)
|
|
if err = binary.Read(dataFile, llm.ByteOrder, tData); err != nil {
|
|
return err
|
|
}
|
|
|
|
layerType := matches[0][re.SubexpIndex("layer")]
|
|
var heads uint32
|
|
switch layerType {
|
|
case "q":
|
|
heads = llm.KV["llama.attention.head_count"].(uint32)
|
|
case "k":
|
|
heads = llm.KV["llama.attention.head_count_kv"].(uint32)
|
|
if heads == 0 {
|
|
heads = llm.KV["llama.attention.head_count"].(uint32)
|
|
}
|
|
}
|
|
|
|
tData, err = t.Repack(tData, int(heads))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var buf []byte
|
|
for _, n := range tData {
|
|
buf = binary.LittleEndian.AppendUint16(buf, n)
|
|
}
|
|
|
|
tempBuf := make([]uint16, len(tData))
|
|
tDataF32 := bfloat16.DecodeFloat32(buf)
|
|
for cnt, v := range tDataF32 {
|
|
tDataF16 := float16.Fromfloat32(v)
|
|
tempBuf[cnt] = uint16(tDataF16)
|
|
}
|
|
|
|
if err = binary.Write(f, llm.ByteOrder, tempBuf); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := llm.writePadding(f, 32); err != nil {
|
|
return err
|
|
}
|
|
continue
|
|
}
|
|
|
|
remaining := t.FileOffsets[1] - t.FileOffsets[0]
|
|
|
|
bufSize := uint64(10240)
|
|
var finished bool
|
|
for {
|
|
data := make([]byte, min(bufSize, remaining))
|
|
|
|
b, err := io.ReadFull(dataFile, data)
|
|
remaining -= uint64(b)
|
|
|
|
if err == io.EOF || remaining <= 0 {
|
|
finished = true
|
|
} else if err != nil {
|
|
return err
|
|
}
|
|
|
|
// convert bfloat16 -> ieee float32
|
|
tDataF32 := bfloat16.DecodeFloat32(data)
|
|
|
|
switch t.Kind {
|
|
case 0:
|
|
if err := binary.Write(f, llm.ByteOrder, tDataF32); err != nil {
|
|
return err
|
|
}
|
|
case 1:
|
|
// convert float32 -> float16
|
|
tempBuf := make([]uint16, len(data)/2)
|
|
for cnt, v := range tDataF32 {
|
|
tDataF16 := float16.Fromfloat32(v)
|
|
tempBuf[cnt] = uint16(tDataF16)
|
|
}
|
|
if err := binary.Write(f, llm.ByteOrder, tempBuf); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if finished {
|
|
break
|
|
}
|
|
}
|
|
|
|
if err := llm.writePadding(f, 32); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
f.Close()
|
|
|
|
return nil
|
|
}
|
|
|
|
func (llm *GGUFModel) writePadding(f *os.File, align int64) error {
|
|
// gguf file padding is defined in https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#file-structure
|
|
offset, err := f.Seek(0, io.SeekCurrent)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
padding := ((offset + align - 1) / align) * align
|
|
buf := make([]byte, padding-offset)
|
|
if err := binary.Write(f, llm.ByteOrder, buf); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (llm *GGUFModel) writeInt32(f *os.File, v int32) error {
|
|
if err := binary.Write(f, llm.ByteOrder, v); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (llm *GGUFModel) writeUint32(f *os.File, v uint32) error {
|
|
if err := binary.Write(f, llm.ByteOrder, v); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (llm *GGUFModel) writeF32(f *os.File, v float32) error {
|
|
if err := binary.Write(f, llm.ByteOrder, v); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (llm *GGUFModel) writeBool(f *os.File, b bool) error {
|
|
if err := binary.Write(f, llm.ByteOrder, b); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (llm *GGUFModel) writeString(f *os.File, s string) error {
|
|
if err := binary.Write(f, llm.ByteOrder, uint64(len(s))); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(f, llm.ByteOrder, []byte(s)); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (llm *GGUFModel) Decode(rso *readSeekOffset) error {
|
|
// decode key-values
|
|
for i := 0; uint64(i) < llm.NumKV(); i++ {
|
|
k, err := llm.readString(rso)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
vtype := llm.readU32(rso)
|
|
|
|
var v any
|
|
switch vtype {
|
|
case GGUFTypeUint8:
|
|
v = llm.readU8(rso)
|
|
case GGUFTypeInt8:
|
|
v = llm.readI8(rso)
|
|
case GGUFTypeUint16:
|
|
v = llm.readU16(rso)
|
|
case GGUFTypeInt16:
|
|
v = llm.readI16(rso)
|
|
case GGUFTypeUint32:
|
|
v = llm.readU32(rso)
|
|
case GGUFTypeInt32:
|
|
v = llm.readI32(rso)
|
|
case GGUFTypeUint64:
|
|
v = llm.readU64(rso)
|
|
case GGUFTypeInt64:
|
|
v = llm.readI64(rso)
|
|
case GGUFTypeFloat32:
|
|
v = llm.readF32(rso)
|
|
case GGUFTypeFloat64:
|
|
v = llm.readF64(rso)
|
|
case GGUFTypeBool:
|
|
v = llm.readBool(rso)
|
|
case GGUFTypeString:
|
|
s, err := llm.readString(rso)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
v = s
|
|
case GGUFTypeArray:
|
|
a, err := llm.readArray(rso)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
v = a
|
|
default:
|
|
return fmt.Errorf("invalid type: %d", vtype)
|
|
}
|
|
|
|
llm.KV[k] = v
|
|
}
|
|
|
|
// decode tensors
|
|
for i := 0; uint64(i) < llm.NumTensor(); i++ {
|
|
name, err := llm.readString(rso)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// dims is the number of dimensions in the tensor
|
|
dims := llm.readU32(rso)
|
|
|
|
shape := [4]uint64{1, 1, 1, 1}
|
|
for i := 0; uint32(i) < dims; i++ {
|
|
shape[i] = llm.readU64(rso)
|
|
}
|
|
|
|
tensor := Tensor{
|
|
Name: name,
|
|
Kind: llm.readU32(rso),
|
|
Offset: llm.readU64(rso),
|
|
Shape: shape,
|
|
}
|
|
|
|
llm.Tensors = append(llm.Tensors, tensor)
|
|
llm.parameters += tensor.Parameters()
|
|
}
|
|
|
|
alignment, ok := llm.KV["general.alignment"].(uint32)
|
|
if !ok {
|
|
alignment = 32
|
|
}
|
|
|
|
rso.Seek(int64(alignment)-rso.offset%int64(alignment), io.SeekCurrent)
|
|
for _, tensor := range llm.Tensors {
|
|
padded := (int64(tensor.Size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
|
|
rso.Seek(padded, io.SeekCurrent)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (llm *GGUFModel) NumLayers() uint32 {
|
|
value, exists := llm.KV[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
|
|
if !exists {
|
|
return 0
|
|
}
|
|
|
|
return value.(uint32)
|
|
}
|
|
|
|
func (llm *GGUFModel) NumHead() uint32 {
|
|
value, exists := llm.KV[fmt.Sprintf("%s.attention.head_count", llm.ModelFamily())]
|
|
if !exists {
|
|
return 0
|
|
}
|
|
|
|
return value.(uint32)
|
|
}
|
|
|
|
func (llm *GGUFModel) NumEmbed() uint32 {
|
|
value, exists := llm.KV[fmt.Sprintf("%s.embedding_length", llm.ModelFamily())]
|
|
if !exists {
|
|
return 0
|
|
}
|
|
|
|
return value.(uint32)
|
|
}
|
|
|
|
func (llm *GGUFModel) NumHeadKv() uint32 {
|
|
value, exists := llm.KV[fmt.Sprintf("%s.attention.head_count_kv", llm.ModelFamily())]
|
|
if !exists {
|
|
return 0
|
|
}
|
|
|
|
return value.(uint32)
|
|
}
|
|
|
|
func (llm *GGUFModel) NumCtx() uint32 {
|
|
value, exists := llm.KV[fmt.Sprintf("%s.context_length", llm.ModelFamily())]
|
|
if !exists {
|
|
return 0
|
|
}
|
|
|
|
return value.(uint32)
|
|
}
|
|
|
|
func (llm *GGUFModel) NumGQA() uint32 {
|
|
numHeadKv := llm.NumHeadKv()
|
|
if numHeadKv == 0 {
|
|
return 0
|
|
}
|
|
|
|
return llm.NumHead() / numHeadKv
|
|
}
|
|
|
|
func (llm GGUFModel) readU8(r io.Reader) uint8 {
|
|
var u8 uint8
|
|
binary.Read(r, llm.ByteOrder, &u8)
|
|
return u8
|
|
}
|
|
|
|
func (llm GGUFModel) readI8(r io.Reader) int8 {
|
|
var i8 int8
|
|
binary.Read(r, llm.ByteOrder, &i8)
|
|
return i8
|
|
}
|
|
|
|
func (llm GGUFModel) readU16(r io.Reader) uint16 {
|
|
var u16 uint16
|
|
binary.Read(r, llm.ByteOrder, &u16)
|
|
return u16
|
|
}
|
|
|
|
func (llm GGUFModel) readI16(r io.Reader) int16 {
|
|
var i16 int16
|
|
binary.Read(r, llm.ByteOrder, &i16)
|
|
return i16
|
|
}
|
|
|
|
func (llm GGUFModel) readU32(r io.Reader) uint32 {
|
|
var u32 uint32
|
|
binary.Read(r, llm.ByteOrder, &u32)
|
|
return u32
|
|
}
|
|
|
|
func (llm GGUFModel) readI32(r io.Reader) int32 {
|
|
var i32 int32
|
|
binary.Read(r, llm.ByteOrder, &i32)
|
|
return i32
|
|
}
|
|
|
|
func (llm GGUFModel) readU64(r io.Reader) uint64 {
|
|
var u64 uint64
|
|
binary.Read(r, llm.ByteOrder, &u64)
|
|
return u64
|
|
}
|
|
|
|
func (llm GGUFModel) readI64(r io.Reader) int64 {
|
|
var i64 int64
|
|
binary.Read(r, llm.ByteOrder, &i64)
|
|
return i64
|
|
}
|
|
|
|
func (llm GGUFModel) readF32(r io.Reader) float32 {
|
|
var f32 float32
|
|
binary.Read(r, llm.ByteOrder, &f32)
|
|
return f32
|
|
}
|
|
|
|
func (llm GGUFModel) readF64(r io.Reader) float64 {
|
|
var f64 float64
|
|
binary.Read(r, llm.ByteOrder, &f64)
|
|
return f64
|
|
}
|
|
|
|
func (llm GGUFModel) readBool(r io.Reader) bool {
|
|
var b bool
|
|
binary.Read(r, llm.ByteOrder, &b)
|
|
return b
|
|
}
|
|
|
|
func (llm GGUFModel) readStringV1(r io.Reader) (string, error) {
|
|
var nameLength uint32
|
|
binary.Read(r, llm.ByteOrder, &nameLength)
|
|
|
|
var b bytes.Buffer
|
|
if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
|
|
return "", err
|
|
}
|
|
|
|
// gguf v1 strings are null-terminated
|
|
b.Truncate(b.Len() - 1)
|
|
|
|
return b.String(), nil
|
|
}
|
|
|
|
func (llm GGUFModel) readString(r io.Reader) (string, error) {
|
|
if llm.Version == 1 {
|
|
return llm.readStringV1(r)
|
|
}
|
|
|
|
var nameLength uint64
|
|
binary.Read(r, llm.ByteOrder, &nameLength)
|
|
|
|
var b bytes.Buffer
|
|
if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return b.String(), nil
|
|
}
|
|
|
|
func (llm *GGUFModel) readArrayV1(r io.Reader) (arr []any, err error) {
|
|
atype := llm.readU32(r)
|
|
n := llm.readU32(r)
|
|
|
|
for i := 0; uint32(i) < n; i++ {
|
|
switch atype {
|
|
case GGUFTypeUint8:
|
|
arr = append(arr, llm.readU8(r))
|
|
case GGUFTypeInt8:
|
|
arr = append(arr, llm.readI8(r))
|
|
case GGUFTypeUint16:
|
|
arr = append(arr, llm.readU16(r))
|
|
case GGUFTypeInt16:
|
|
arr = append(arr, llm.readI16(r))
|
|
case GGUFTypeUint32:
|
|
arr = append(arr, llm.readU32(r))
|
|
case GGUFTypeInt32:
|
|
arr = append(arr, llm.readI32(r))
|
|
case GGUFTypeFloat32:
|
|
arr = append(arr, llm.readF32(r))
|
|
case GGUFTypeBool:
|
|
arr = append(arr, llm.readBool(r))
|
|
case GGUFTypeString:
|
|
s, err := llm.readStringV1(r)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
arr = append(arr, s)
|
|
default:
|
|
return nil, fmt.Errorf("invalid array type: %d", atype)
|
|
}
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func (llm *GGUFModel) readArray(r io.Reader) (arr []any, err error) {
|
|
if llm.Version == 1 {
|
|
return llm.readArrayV1(r)
|
|
}
|
|
|
|
atype := llm.readU32(r)
|
|
n := llm.readU64(r)
|
|
|
|
for i := 0; uint64(i) < n; i++ {
|
|
switch atype {
|
|
case GGUFTypeUint8:
|
|
arr = append(arr, llm.readU8(r))
|
|
case GGUFTypeInt8:
|
|
arr = append(arr, llm.readI8(r))
|
|
case GGUFTypeUint16:
|
|
arr = append(arr, llm.readU16(r))
|
|
case GGUFTypeInt16:
|
|
arr = append(arr, llm.readI16(r))
|
|
case GGUFTypeUint32:
|
|
arr = append(arr, llm.readU32(r))
|
|
case GGUFTypeInt32:
|
|
arr = append(arr, llm.readI32(r))
|
|
case GGUFTypeUint64:
|
|
arr = append(arr, llm.readU64(r))
|
|
case GGUFTypeInt64:
|
|
arr = append(arr, llm.readI64(r))
|
|
case GGUFTypeFloat32:
|
|
arr = append(arr, llm.readF32(r))
|
|
case GGUFTypeFloat64:
|
|
arr = append(arr, llm.readF64(r))
|
|
case GGUFTypeBool:
|
|
arr = append(arr, llm.readBool(r))
|
|
case GGUFTypeString:
|
|
s, err := llm.readString(r)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
arr = append(arr, s)
|
|
default:
|
|
return nil, fmt.Errorf("invalid array type: %d", atype)
|
|
}
|
|
}
|
|
|
|
return
|
|
}
|