662 lines
13 KiB
Go
662 lines
13 KiB
Go
package llm
|
|
|
|
import (
|
|
"bytes"
|
|
"cmp"
|
|
"encoding/binary"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"slices"
|
|
"strings"
|
|
|
|
"golang.org/x/exp/maps"
|
|
)
|
|
|
|
type containerGGUF struct {
|
|
ByteOrder binary.ByteOrder
|
|
|
|
Version uint32
|
|
|
|
V1 struct {
|
|
NumTensor uint32
|
|
NumKV uint32
|
|
}
|
|
|
|
V2 struct {
|
|
NumTensor uint64
|
|
NumKV uint64
|
|
}
|
|
|
|
V3 struct {
|
|
NumTensor uint64
|
|
NumKV uint64
|
|
}
|
|
|
|
maxArraySize int
|
|
}
|
|
|
|
func (c *containerGGUF) canCollectArray(size int) bool {
|
|
return c.maxArraySize < 0 || size <= c.maxArraySize
|
|
}
|
|
|
|
func (c *containerGGUF) Name() string {
|
|
return "gguf"
|
|
}
|
|
|
|
func (c *containerGGUF) Decode(rs io.ReadSeeker) (model, error) {
|
|
if err := binary.Read(rs, c.ByteOrder, &c.Version); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var err error
|
|
switch c.Version {
|
|
case 1:
|
|
err = binary.Read(rs, c.ByteOrder, &c.V1)
|
|
case 2:
|
|
err = binary.Read(rs, c.ByteOrder, &c.V2)
|
|
default:
|
|
err = binary.Read(rs, c.ByteOrder, &c.V3)
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
model := newGGUF(c)
|
|
if err := model.Decode(rs); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return model, nil
|
|
}
|
|
|
|
const (
|
|
ggufTypeUint8 uint32 = iota
|
|
ggufTypeInt8
|
|
ggufTypeUint16
|
|
ggufTypeInt16
|
|
ggufTypeUint32
|
|
ggufTypeInt32
|
|
ggufTypeFloat32
|
|
ggufTypeBool
|
|
ggufTypeString
|
|
ggufTypeArray
|
|
ggufTypeUint64
|
|
ggufTypeInt64
|
|
ggufTypeFloat64
|
|
)
|
|
|
|
type gguf struct {
|
|
*containerGGUF
|
|
|
|
kv KV
|
|
tensors []*Tensor
|
|
|
|
parameters uint64
|
|
tensorOffset uint64
|
|
|
|
scratch [16 << 10]byte
|
|
}
|
|
|
|
func newGGUF(container *containerGGUF) *gguf {
|
|
return &gguf{
|
|
containerGGUF: container,
|
|
kv: make(KV),
|
|
}
|
|
}
|
|
|
|
func (llm *gguf) KV() KV {
|
|
return llm.kv
|
|
}
|
|
|
|
func (llm *gguf) Tensors() Tensors {
|
|
return Tensors{
|
|
Items: llm.tensors,
|
|
Offset: llm.tensorOffset,
|
|
}
|
|
}
|
|
|
|
func (llm *gguf) numTensor() uint64 {
|
|
switch llm.Version {
|
|
case 1:
|
|
return uint64(llm.V1.NumTensor)
|
|
case 2:
|
|
return llm.V2.NumTensor
|
|
default:
|
|
return llm.V3.NumTensor
|
|
}
|
|
}
|
|
|
|
func (llm *gguf) numKV() uint64 {
|
|
switch llm.Version {
|
|
case 1:
|
|
return uint64(llm.V1.NumKV)
|
|
case 2:
|
|
return llm.V2.NumKV
|
|
default:
|
|
return llm.V3.NumKV
|
|
}
|
|
}
|
|
|
|
func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
|
// decode key-values
|
|
for i := 0; uint64(i) < llm.numKV(); i++ {
|
|
k, err := readGGUFString(llm, rs)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
t, err := readGGUF[uint32](llm, rs)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var v any
|
|
switch t {
|
|
case ggufTypeUint8:
|
|
v, err = readGGUF[uint8](llm, rs)
|
|
case ggufTypeInt8:
|
|
v, err = readGGUF[int8](llm, rs)
|
|
case ggufTypeUint16:
|
|
v, err = readGGUF[uint16](llm, rs)
|
|
case ggufTypeInt16:
|
|
v, err = readGGUF[int16](llm, rs)
|
|
case ggufTypeUint32:
|
|
v, err = readGGUF[uint32](llm, rs)
|
|
case ggufTypeInt32:
|
|
v, err = readGGUF[int32](llm, rs)
|
|
case ggufTypeUint64:
|
|
v, err = readGGUF[uint64](llm, rs)
|
|
case ggufTypeInt64:
|
|
v, err = readGGUF[int64](llm, rs)
|
|
case ggufTypeFloat32:
|
|
v, err = readGGUF[float32](llm, rs)
|
|
case ggufTypeFloat64:
|
|
v, err = readGGUF[float64](llm, rs)
|
|
case ggufTypeBool:
|
|
v, err = readGGUF[bool](llm, rs)
|
|
case ggufTypeString:
|
|
v, err = readGGUFString(llm, rs)
|
|
case ggufTypeArray:
|
|
v, err = readGGUFArray(llm, rs)
|
|
default:
|
|
return fmt.Errorf("invalid type: %d", t)
|
|
}
|
|
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
llm.kv[k] = v
|
|
}
|
|
|
|
// decode tensors
|
|
for range llm.numTensor() {
|
|
name, err := readGGUFString(llm, rs)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to read tensor name: %w", err)
|
|
}
|
|
|
|
// dims is the number of dimensions in the tensor
|
|
dims, err := readGGUF[uint32](llm, rs)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to read tensor dimensions: %w", err)
|
|
}
|
|
|
|
shape := make([]uint64, dims)
|
|
for i := 0; uint32(i) < dims; i++ {
|
|
shape[i], err = readGGUF[uint64](llm, rs)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to read tensor shape: %w", err)
|
|
}
|
|
}
|
|
|
|
kind, err := readGGUF[uint32](llm, rs)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to read tensor kind: %w", err)
|
|
}
|
|
|
|
offset, err := readGGUF[uint64](llm, rs)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to read tensor offset: %w", err)
|
|
}
|
|
|
|
tensor := Tensor{
|
|
Name: name,
|
|
Kind: kind,
|
|
Offset: offset,
|
|
Shape: shape[:],
|
|
}
|
|
|
|
llm.tensors = append(llm.tensors, &tensor)
|
|
llm.parameters += tensor.parameters()
|
|
}
|
|
|
|
// patch KV with parameter count
|
|
llm.kv["general.parameter_count"] = llm.parameters
|
|
|
|
alignment, ok := llm.kv["general.alignment"].(uint32)
|
|
if !ok {
|
|
alignment = 32
|
|
}
|
|
|
|
offset, err := rs.Seek(0, io.SeekCurrent)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
padding := ggufPadding(offset, int64(alignment))
|
|
llm.tensorOffset = uint64(offset + padding)
|
|
|
|
for _, tensor := range llm.tensors {
|
|
offset, err := rs.Seek(0, io.SeekCurrent)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get current offset: %w", err)
|
|
}
|
|
|
|
padding := ggufPadding(offset, int64(alignment))
|
|
if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
|
|
return fmt.Errorf("failed to seek to init padding: %w", err)
|
|
}
|
|
|
|
if _, err := rs.Seek(int64(tensor.Size()), io.SeekCurrent); err != nil {
|
|
return fmt.Errorf("failed to seek to tensor: %w", err)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func readGGUF[T any](llm *gguf, r io.Reader) (T, error) {
|
|
var t T
|
|
err := binary.Read(r, llm.ByteOrder, &t)
|
|
return t, err
|
|
}
|
|
|
|
func writeGGUF[V any](w io.Writer, t uint32, v V) error {
|
|
if err := binary.Write(w, binary.LittleEndian, t); err != nil {
|
|
return err
|
|
}
|
|
|
|
return binary.Write(w, binary.LittleEndian, v)
|
|
}
|
|
|
|
func readGGUFV1String(llm *gguf, r io.Reader) (string, error) {
|
|
var length uint64
|
|
if err := binary.Read(r, llm.ByteOrder, &length); err != nil {
|
|
return "", err
|
|
}
|
|
|
|
var b bytes.Buffer
|
|
if _, err := io.CopyN(&b, r, int64(length)); err != nil {
|
|
return "", err
|
|
}
|
|
|
|
// gguf v1 strings are null-terminated
|
|
b.Truncate(b.Len() - 1)
|
|
|
|
return b.String(), nil
|
|
}
|
|
|
|
func discardGGUFString(llm *gguf, r io.Reader) error {
|
|
buf := llm.scratch[:8]
|
|
_, err := io.ReadFull(r, buf)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
size := int(llm.ByteOrder.Uint64(buf))
|
|
for size > 0 {
|
|
n, err := r.Read(llm.scratch[:min(size, cap(llm.scratch))])
|
|
if err != nil {
|
|
return err
|
|
}
|
|
size -= n
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func readGGUFString(llm *gguf, r io.Reader) (string, error) {
|
|
if llm.Version == 1 {
|
|
return readGGUFV1String(llm, r)
|
|
}
|
|
|
|
buf := llm.scratch[:8]
|
|
_, err := io.ReadFull(r, buf)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
length := int(llm.ByteOrder.Uint64(buf))
|
|
if length > len(llm.scratch) {
|
|
buf = make([]byte, length)
|
|
} else {
|
|
buf = llm.scratch[:length]
|
|
}
|
|
clear(buf)
|
|
|
|
_, err = io.ReadFull(r, buf)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return string(buf), nil
|
|
}
|
|
|
|
func writeGGUFString(w io.Writer, s string) error {
|
|
if err := binary.Write(w, binary.LittleEndian, ggufTypeString); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(w, binary.LittleEndian, uint64(len(s))); err != nil {
|
|
return err
|
|
}
|
|
|
|
_, err := io.Copy(w, strings.NewReader(s))
|
|
return err
|
|
}
|
|
|
|
type array struct {
|
|
size int
|
|
values []any
|
|
}
|
|
|
|
func (a *array) MarshalJSON() ([]byte, error) {
|
|
return json.Marshal(a.values)
|
|
}
|
|
|
|
func readGGUFV1Array(llm *gguf, r io.Reader) (*array, error) {
|
|
t, err := readGGUF[uint32](llm, r)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
n, err := readGGUF[uint32](llm, r)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
a := &array{size: int(n)}
|
|
if llm.canCollectArray(int(n)) {
|
|
a.values = make([]any, 0, int(n))
|
|
}
|
|
|
|
for i := range n {
|
|
var e any
|
|
switch t {
|
|
case ggufTypeUint8:
|
|
e, err = readGGUF[uint8](llm, r)
|
|
case ggufTypeInt8:
|
|
e, err = readGGUF[int8](llm, r)
|
|
case ggufTypeUint16:
|
|
e, err = readGGUF[uint16](llm, r)
|
|
case ggufTypeInt16:
|
|
e, err = readGGUF[int16](llm, r)
|
|
case ggufTypeUint32:
|
|
e, err = readGGUF[uint32](llm, r)
|
|
case ggufTypeInt32:
|
|
e, err = readGGUF[int32](llm, r)
|
|
case ggufTypeUint64:
|
|
e, err = readGGUF[uint64](llm, r)
|
|
case ggufTypeInt64:
|
|
e, err = readGGUF[int64](llm, r)
|
|
case ggufTypeFloat32:
|
|
e, err = readGGUF[float32](llm, r)
|
|
case ggufTypeFloat64:
|
|
e, err = readGGUF[float64](llm, r)
|
|
case ggufTypeBool:
|
|
e, err = readGGUF[bool](llm, r)
|
|
case ggufTypeString:
|
|
e, err = readGGUFV1String(llm, r)
|
|
default:
|
|
return nil, fmt.Errorf("invalid array type: %d", t)
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if a.values != nil {
|
|
a.values[i] = e
|
|
}
|
|
}
|
|
|
|
return a, nil
|
|
}
|
|
|
|
func readGGUFArray(llm *gguf, r io.Reader) (*array, error) {
|
|
if llm.Version == 1 {
|
|
return readGGUFV1Array(llm, r)
|
|
}
|
|
|
|
t, err := readGGUF[uint32](llm, r)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
n, err := readGGUF[uint64](llm, r)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
a := &array{size: int(n)}
|
|
if llm.canCollectArray(int(n)) {
|
|
a.values = make([]any, int(n))
|
|
}
|
|
|
|
for i := range n {
|
|
var e any
|
|
switch t {
|
|
case ggufTypeUint8:
|
|
e, err = readGGUF[uint8](llm, r)
|
|
case ggufTypeInt8:
|
|
e, err = readGGUF[int8](llm, r)
|
|
case ggufTypeUint16:
|
|
e, err = readGGUF[uint16](llm, r)
|
|
case ggufTypeInt16:
|
|
e, err = readGGUF[int16](llm, r)
|
|
case ggufTypeUint32:
|
|
e, err = readGGUF[uint32](llm, r)
|
|
case ggufTypeInt32:
|
|
e, err = readGGUF[int32](llm, r)
|
|
case ggufTypeUint64:
|
|
e, err = readGGUF[uint64](llm, r)
|
|
case ggufTypeInt64:
|
|
e, err = readGGUF[int64](llm, r)
|
|
case ggufTypeFloat32:
|
|
e, err = readGGUF[float32](llm, r)
|
|
case ggufTypeFloat64:
|
|
e, err = readGGUF[float64](llm, r)
|
|
case ggufTypeBool:
|
|
e, err = readGGUF[bool](llm, r)
|
|
case ggufTypeString:
|
|
if a.values != nil {
|
|
e, err = readGGUFString(llm, r)
|
|
} else {
|
|
err = discardGGUFString(llm, r)
|
|
}
|
|
default:
|
|
return nil, fmt.Errorf("invalid array type: %d", t)
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if a.values != nil {
|
|
a.values[i] = e
|
|
}
|
|
}
|
|
|
|
return a, nil
|
|
}
|
|
|
|
// writeGGUFArray writes a slice s of type E to the write with a gguf type of t
|
|
func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error {
|
|
if err := binary.Write(w, binary.LittleEndian, ggufTypeArray); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(w, binary.LittleEndian, t); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(w, binary.LittleEndian, uint64(len(s))); err != nil {
|
|
return err
|
|
}
|
|
|
|
return binary.Write(w, binary.LittleEndian, s)
|
|
}
|
|
|
|
func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
|
|
if err := binary.Write(ws, binary.LittleEndian, []byte("GGUF")); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(ws, binary.LittleEndian, uint32(3)); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(ws, binary.LittleEndian, uint64(len(ts))); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(ws, binary.LittleEndian, uint64(len(kv))); err != nil {
|
|
return err
|
|
}
|
|
|
|
keys := maps.Keys(kv)
|
|
slices.Sort(keys)
|
|
|
|
for _, key := range keys {
|
|
if err := ggufWriteKV(ws, key, kv[key]); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
slices.SortStableFunc(ts, func(a, b Tensor) int {
|
|
if i, j := a.block(), b.block(); i < 0 && j > 0 {
|
|
return 1
|
|
} else if i > 0 && j < 0 {
|
|
return -1
|
|
} else {
|
|
return cmp.Compare(i, j)
|
|
}
|
|
})
|
|
|
|
var s uint64
|
|
for _, t := range ts {
|
|
t.Offset = s
|
|
if err := ggufWriteTensorInfo(ws, t); err != nil {
|
|
return err
|
|
}
|
|
s += t.Size()
|
|
}
|
|
|
|
var alignment int64 = 32
|
|
for _, t := range ts {
|
|
if err := ggufWriteTensor(ws, t, alignment); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func ggufWriteKV(ws io.WriteSeeker, k string, v any) error {
|
|
slog.Debug(k, "type", fmt.Sprintf("%T", v))
|
|
if err := binary.Write(ws, binary.LittleEndian, uint64(len(k))); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(ws, binary.LittleEndian, []byte(k)); err != nil {
|
|
return err
|
|
}
|
|
|
|
var err error
|
|
switch v := v.(type) {
|
|
case uint32:
|
|
err = writeGGUF(ws, ggufTypeUint32, v)
|
|
case float32:
|
|
err = writeGGUF(ws, ggufTypeFloat32, v)
|
|
case bool:
|
|
err = writeGGUF(ws, ggufTypeBool, v)
|
|
case string:
|
|
err = writeGGUFString(ws, v)
|
|
case []int32:
|
|
err = writeGGUFArray(ws, ggufTypeInt32, v)
|
|
case []uint32:
|
|
err = writeGGUFArray(ws, ggufTypeUint32, v)
|
|
case []float32:
|
|
err = writeGGUFArray(ws, ggufTypeFloat32, v)
|
|
case []string:
|
|
if err := binary.Write(ws, binary.LittleEndian, ggufTypeArray); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(ws, binary.LittleEndian, ggufTypeString); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(ws, binary.LittleEndian, uint64(len(v))); err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, e := range v {
|
|
if err := binary.Write(ws, binary.LittleEndian, uint64(len(e))); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(ws, binary.LittleEndian, []byte(e)); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
default:
|
|
return fmt.Errorf("improper type for '%s'", k)
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
func ggufWriteTensorInfo(ws io.WriteSeeker, t Tensor) error {
|
|
slog.Debug(t.Name, "kind", t.Kind, "shape", t.Shape, "offset", t.Offset)
|
|
if err := binary.Write(ws, binary.LittleEndian, uint64(len(t.Name))); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(ws, binary.LittleEndian, []byte(t.Name)); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(ws, binary.LittleEndian, uint32(len(t.Shape))); err != nil {
|
|
return err
|
|
}
|
|
|
|
for i := range len(t.Shape) {
|
|
if err := binary.Write(ws, binary.LittleEndian, t.Shape[len(t.Shape)-i-1]); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if err := binary.Write(ws, binary.LittleEndian, t.Kind); err != nil {
|
|
return err
|
|
}
|
|
|
|
return binary.Write(ws, binary.LittleEndian, t.Offset)
|
|
}
|
|
|
|
func ggufWriteTensor(ws io.WriteSeeker, t Tensor, alignment int64) error {
|
|
offset, err := ws.Seek(0, io.SeekCurrent)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := binary.Write(ws, binary.LittleEndian, bytes.Repeat([]byte{0}, int(ggufPadding(offset, alignment)))); err != nil {
|
|
return err
|
|
}
|
|
|
|
_, err = t.WriteTo(ws)
|
|
return err
|
|
}
|
|
|
|
func ggufPadding(offset, align int64) int64 {
|
|
return (align - offset%align) % align
|
|
}
|