ollama/llm/ggla.go

150 lines
2.7 KiB
Go
Raw Normal View History

2024-03-08 23:38:53 +00:00
package llm
import (
"encoding/binary"
"errors"
"io"
"slices"
)
type containerGGLA struct {
2024-03-08 23:38:53 +00:00
version uint32
}
func (c *containerGGLA) Name() string {
2024-03-08 23:38:53 +00:00
return "ggla"
}
func (c *containerGGLA) Decode(rs io.ReadSeeker) (model, error) {
if err := binary.Read(rs, binary.LittleEndian, &c.version); err != nil {
return nil, err
}
2024-03-08 23:38:53 +00:00
switch c.version {
case 1:
default:
return nil, errors.New("invalid version")
}
model := newGGLA(c)
2024-03-09 20:28:36 +00:00
err := model.decode(rs)
2024-03-08 23:38:53 +00:00
return model, err
}
type ggla struct {
*containerGGLA
2024-03-08 23:38:53 +00:00
kv KV
2024-03-13 18:03:56 +00:00
tensors []*Tensor
tensorOffset uint64
2024-03-08 23:38:53 +00:00
}
func newGGLA(container *containerGGLA) *ggla {
return &ggla{
containerGGLA: container,
2024-03-08 23:38:53 +00:00
kv: make(KV),
}
}
2024-03-13 18:03:56 +00:00
func (llm *ggla) KV() KV {
return llm.kv
}
func (llm *ggla) Tensors() *Tensors {
return &Tensors{
Items: llm.tensors,
Offset: llm.tensorOffset,
}
2024-03-13 18:03:56 +00:00
}
func (llm *ggla) decode(rs io.ReadSeeker) (retErr error) {
2024-03-08 23:38:53 +00:00
var r uint32
2024-03-09 20:28:36 +00:00
if err := binary.Read(rs, binary.LittleEndian, &r); err != nil {
2024-03-08 23:38:53 +00:00
return err
}
2024-03-13 18:03:56 +00:00
llm.kv["r"] = r
2024-03-08 23:38:53 +00:00
var alpha uint32
2024-03-09 20:28:36 +00:00
if err := binary.Read(rs, binary.LittleEndian, &alpha); err != nil {
2024-03-08 23:38:53 +00:00
return err
}
2024-03-13 18:03:56 +00:00
llm.kv["alpha"] = alpha
2024-03-08 23:38:53 +00:00
offset, err := rs.Seek(0, io.SeekCurrent)
if err != nil {
return err
}
llm.tensorOffset = uint64(offset)
2024-03-08 23:38:53 +00:00
for {
var dims uint32
2024-03-09 20:28:36 +00:00
if err := binary.Read(rs, binary.LittleEndian, &dims); err != nil {
if errors.Is(err, io.EOF) {
return nil
}
2024-03-08 23:38:53 +00:00
return err
}
defer func() {
if errors.Is(retErr, io.EOF) {
retErr = io.ErrUnexpectedEOF
}
}()
2024-03-08 23:38:53 +00:00
var namesize uint32
2024-03-09 20:28:36 +00:00
if err := binary.Read(rs, binary.LittleEndian, &namesize); err != nil {
2024-03-08 23:38:53 +00:00
return err
}
var t Tensor
2024-03-09 20:28:36 +00:00
if err := binary.Read(rs, binary.LittleEndian, &t.Kind); err != nil {
2024-03-08 23:38:53 +00:00
return err
}
t.Shape = make([]uint64, dims)
for i := 0; uint32(i) < dims; i++ {
var shape32 uint32
2024-03-09 20:28:36 +00:00
if err := binary.Read(rs, binary.LittleEndian, &shape32); err != nil {
2024-03-08 23:38:53 +00:00
return err
}
t.Shape[i] = uint64(shape32)
}
// ggla tensor shape is reversed
// ref: https://github.com/ggerganov/llama.cpp/blob/29ae62d2ae163e2b68aa0ad3bf2ab4636de0c957/convert-lora-to-ggml.py#L44
slices.Reverse(t.Shape)
name := make([]byte, namesize)
2024-03-09 20:28:36 +00:00
if err := binary.Read(rs, binary.LittleEndian, &name); err != nil {
2024-03-08 23:38:53 +00:00
return err
}
t.Name = string(name)
2024-03-09 20:28:36 +00:00
offset, err := rs.Seek(0, io.SeekCurrent)
if err != nil {
2024-03-08 23:38:53 +00:00
return err
}
if _, err := rs.Seek((offset+31)&-32-offset, io.SeekCurrent); err != nil {
2024-03-09 20:28:36 +00:00
return err
}
offset, err = rs.Seek(0, io.SeekCurrent)
if err != nil {
return err
}
t.Offset = uint64(offset)
2024-03-08 23:38:53 +00:00
2024-05-20 16:47:01 +00:00
if _, err := rs.Seek(int64(t.Size()), io.SeekCurrent); err != nil {
2024-03-08 23:38:53 +00:00
return err
}
2024-03-13 18:03:56 +00:00
llm.tensors = append(llm.tensors, &t)
2024-03-08 23:38:53 +00:00
}
}