diff --git a/convert/convert.go b/convert/convert.go index 6607c833..fc1d2f7a 100644 --- a/convert/convert.go +++ b/convert/convert.go @@ -112,7 +112,7 @@ func ReadSafeTensors(fn string, offset uint64) ([]llm.Tensor, uint64, error) { Name: ggufName, Kind: kind, Offset: offset, - Shape: shape, + Shape: shape[:], FileName: fn, OffsetPadding: 8 + jsonSize, FileOffsets: []uint64{uint64(data.Offsets[0]), uint64(data.Offsets[1])}, diff --git a/llm/ggla.go b/llm/ggla.go new file mode 100644 index 00000000..e22dd59f --- /dev/null +++ b/llm/ggla.go @@ -0,0 +1,152 @@ +package llm + +import ( + "encoding/binary" + "errors" + "io" + "slices" +) + +type ContainerGGLA struct { + version uint32 +} + +func (c *ContainerGGLA) Name() string { + return "ggla" +} + +func (c *ContainerGGLA) Decode(rso *readSeekOffset) (model, error) { + binary.Read(rso, binary.LittleEndian, &c.version) + + switch c.version { + case 1: + default: + return nil, errors.New("invalid version") + } + + model := newModelGGLA(c) + err := model.decode(rso) + return model, err +} + +type ModelGGLA struct { + *ContainerGGLA + + kv KV + tensors []Tensor +} + +func newModelGGLA(container *ContainerGGLA) *ModelGGLA { + return &ModelGGLA{ + ContainerGGLA: container, + kv: make(KV), + } +} + +func (m *ModelGGLA) decode(rso *readSeekOffset) error { + var r uint32 + if err := binary.Read(rso, binary.LittleEndian, &r); err != nil { + return err + } + m.kv["r"] = r + + var alpha uint32 + if err := binary.Read(rso, binary.LittleEndian, &alpha); err != nil { + return err + } + m.kv["alpha"] = alpha + + for { + var dims uint32 + if err := binary.Read(rso, binary.LittleEndian, &dims); err != nil { + return err + } + + var namesize uint32 + if err := binary.Read(rso, binary.LittleEndian, &namesize); err != nil { + return err + } + + var t Tensor + if err := binary.Read(rso, binary.LittleEndian, &t.Kind); err != nil { + return err + } + + t.Shape = make([]uint64, dims) + for i := 0; uint32(i) < dims; i++ { + var shape32 uint32 + if err := binary.Read(rso, binary.LittleEndian, &shape32); err != nil { + return err + } + + t.Shape[i] = uint64(shape32) + } + + // ggla tensor shape is reversed + // ref: https://github.com/ggerganov/llama.cpp/blob/29ae62d2ae163e2b68aa0ad3bf2ab4636de0c957/convert-lora-to-ggml.py#L44 + slices.Reverse(t.Shape) + + name := make([]byte, namesize) + if err := binary.Read(rso, binary.LittleEndian, &name); err != nil { + return err + } + + t.Name = string(name) + + if _, err := rso.Seek((rso.offset+31)&-32, io.SeekStart); err != nil { + return err + } + + t.Offset = uint64(rso.offset) + + if _, err := rso.Seek(int64(t.Size()), io.SeekCurrent); err != nil { + return err + } + + m.tensors = append(m.tensors, t) + } +} + +func (m *ModelGGLA) KV() KV { + return m.kv +} + +func (m *ModelGGLA) Tensor() []Tensor { + return m.tensors +} + +func (*ModelGGLA) ModelFamily() string { + return "ggla" +} + +func (*ModelGGLA) ModelType() string { + panic("not implemented") +} + +func (*ModelGGLA) FileType() string { + panic("not implemented") +} + +func (*ModelGGLA) NumLayers() uint32 { + panic("not implemented") +} + +func (*ModelGGLA) NumGQA() uint32 { + panic("not implemented") +} + +func (*ModelGGLA) NumEmbed() uint32 { + panic("not implemented") +} + +func (*ModelGGLA) NumHead() uint32 { + panic("not implemented") +} + +func (*ModelGGLA) NumHeadKv() uint32 { + panic("not implemented") +} + +func (*ModelGGLA) NumCtx() uint32 { + panic("not implemented") +} diff --git a/llm/ggml.go b/llm/ggml.go index ddcf6ed7..88cd9e13 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -106,32 +106,6 @@ type container interface { Decode(*readSeekOffset) (model, error) } -type containerLORA struct { - version uint32 -} - -func (c *containerLORA) Name() string { - return "ggla" -} - -func (c *containerLORA) Decode(rso *readSeekOffset) (model, error) { - var version uint32 - binary.Read(rso, binary.LittleEndian, &version) - - switch version { - case 1: - default: - return nil, errors.New("invalid version") - } - - c.version = version - - // remaining file contents aren't decoded - rso.Seek(0, io.SeekEnd) - - return nil, nil -} - const ( // Magic constant for `ggml` files (unversioned). FILE_MAGIC_GGML = 0x67676d6c @@ -161,7 +135,7 @@ func DecodeGGML(r io.ReadSeeker) (*GGML, error) { case FILE_MAGIC_GGML, FILE_MAGIC_GGMF, FILE_MAGIC_GGJT: return nil, ErrUnsupportedFormat case FILE_MAGIC_GGLA: - c = &containerLORA{} + c = &ContainerGGLA{} case FILE_MAGIC_GGUF_LE: c = &ContainerGGUF{ByteOrder: binary.LittleEndian} case FILE_MAGIC_GGUF_BE: @@ -171,7 +145,9 @@ func DecodeGGML(r io.ReadSeeker) (*GGML, error) { } model, err := c.Decode(&ro) - if err != nil { + if errors.Is(err, io.EOF) { + // noop + } else if err != nil { return nil, err } diff --git a/llm/gguf.go b/llm/gguf.go index b01cd5d2..61c55148 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -94,7 +94,7 @@ type Tensor struct { Offset uint64 // shape is the number of elements in each dimension - Shape [4]uint64 + Shape []uint64 FileName string OffsetPadding uint64 @@ -156,7 +156,11 @@ func (t Tensor) TypeSize() uint64 { } func (t Tensor) Parameters() uint64 { - return t.Shape[0] * t.Shape[1] * t.Shape[2] * t.Shape[3] + var count uint64 = 1 + for _, n := range t.Shape { + count *= n + } + return count } func (t Tensor) Size() uint64 { @@ -703,7 +707,7 @@ func (llm *GGUFModel) Decode(rso *readSeekOffset) error { Name: name, Kind: llm.readU32(rso), Offset: llm.readU64(rso), - Shape: shape, + Shape: shape[:], } llm.Tensors = append(llm.Tensors, tensor) diff --git a/server/images.go b/server/images.go index 2ae2fadc..06f8ffd9 100644 --- a/server/images.go +++ b/server/images.go @@ -473,7 +473,13 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars } defer bin.Close() - layer, err := NewLayer(bin, mediatype) + ggml, err := llm.DecodeGGML(bin) + if err != nil { + return err + } + + sr := io.NewSectionReader(bin, 0, ggml.Size) + layer, err := NewLayer(sr, mediatype) if err != nil { return err }