2023-07-21 13:33:56 -07:00
|
|
|
package llm
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/binary"
|
|
|
|
"errors"
|
|
|
|
"io"
|
|
|
|
)
|
|
|
|
|
2023-09-12 10:01:20 -07:00
|
|
|
type GGML struct {
|
|
|
|
container
|
|
|
|
model
|
2023-11-24 11:57:20 -08:00
|
|
|
|
|
|
|
Size int64
|
2023-09-12 10:01:20 -07:00
|
|
|
}
|
2023-07-21 13:33:56 -07:00
|
|
|
|
|
|
|
const (
|
2023-09-12 10:01:20 -07:00
|
|
|
fileTypeF32 uint32 = iota
|
|
|
|
fileTypeF16
|
|
|
|
fileTypeQ4_0
|
|
|
|
fileTypeQ4_1
|
|
|
|
fileTypeQ4_1_F16
|
|
|
|
fileTypeQ8_0 uint32 = iota + 2
|
|
|
|
fileTypeQ5_0
|
|
|
|
fileTypeQ5_1
|
|
|
|
fileTypeQ2_K
|
|
|
|
fileTypeQ3_K_S
|
|
|
|
fileTypeQ3_K_M
|
|
|
|
fileTypeQ3_K_L
|
|
|
|
fileTypeQ4_K_S
|
|
|
|
fileTypeQ4_K_M
|
|
|
|
fileTypeQ5_K_S
|
|
|
|
fileTypeQ5_K_M
|
|
|
|
fileTypeQ6_K
|
2023-07-21 13:33:56 -07:00
|
|
|
)
|
|
|
|
|
2023-09-12 10:01:20 -07:00
|
|
|
func fileType(fileType uint32) string {
|
|
|
|
switch fileType {
|
|
|
|
case fileTypeF32:
|
|
|
|
return "F32"
|
|
|
|
case fileTypeF16:
|
|
|
|
return "F16"
|
|
|
|
case fileTypeQ4_0:
|
|
|
|
return "Q4_0"
|
|
|
|
case fileTypeQ4_1:
|
|
|
|
return "Q4_1"
|
|
|
|
case fileTypeQ4_1_F16:
|
|
|
|
return "Q4_1_F16"
|
|
|
|
case fileTypeQ8_0:
|
|
|
|
return "Q8_0"
|
|
|
|
case fileTypeQ5_0:
|
|
|
|
return "Q5_0"
|
|
|
|
case fileTypeQ5_1:
|
|
|
|
return "Q5_1"
|
|
|
|
case fileTypeQ2_K:
|
|
|
|
return "Q2_K"
|
|
|
|
case fileTypeQ3_K_S:
|
|
|
|
return "Q3_K_S"
|
|
|
|
case fileTypeQ3_K_M:
|
|
|
|
return "Q3_K_M"
|
|
|
|
case fileTypeQ3_K_L:
|
|
|
|
return "Q3_K_L"
|
|
|
|
case fileTypeQ4_K_S:
|
|
|
|
return "Q4_K_S"
|
|
|
|
case fileTypeQ4_K_M:
|
|
|
|
return "Q4_K_M"
|
|
|
|
case fileTypeQ5_K_S:
|
|
|
|
return "Q5_K_S"
|
|
|
|
case fileTypeQ5_K_M:
|
|
|
|
return "Q5_K_M"
|
|
|
|
case fileTypeQ6_K:
|
|
|
|
return "Q6_K"
|
2023-08-17 11:37:27 -07:00
|
|
|
default:
|
2023-10-02 19:52:25 -07:00
|
|
|
return "unknown"
|
2023-08-17 11:37:27 -07:00
|
|
|
}
|
|
|
|
}
|
2023-07-21 13:33:56 -07:00
|
|
|
|
2023-08-17 11:37:27 -07:00
|
|
|
type model interface {
|
2023-09-12 10:01:20 -07:00
|
|
|
ModelFamily() string
|
|
|
|
ModelType() string
|
|
|
|
FileType() string
|
2023-09-25 23:36:46 +01:00
|
|
|
NumLayers() int64
|
2023-07-21 13:33:56 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
type container interface {
|
|
|
|
Name() string
|
2023-11-29 10:31:58 -08:00
|
|
|
Decode(*readSeekOffset) (model, error)
|
2023-07-21 13:33:56 -07:00
|
|
|
}
|
|
|
|
|
2023-09-07 13:55:37 -04:00
|
|
|
type containerGGML struct{}
|
2023-07-21 13:33:56 -07:00
|
|
|
|
|
|
|
func (c *containerGGML) Name() string {
|
|
|
|
return "ggml"
|
|
|
|
}
|
|
|
|
|
2023-11-29 10:31:58 -08:00
|
|
|
func (c *containerGGML) Decode(ro *readSeekOffset) (model, error) {
|
2023-12-09 21:14:35 -05:00
|
|
|
// file contents aren't decoded
|
|
|
|
ro.Seek(0, io.SeekEnd)
|
2023-09-07 13:55:37 -04:00
|
|
|
return nil, nil
|
2023-07-21 13:33:56 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
type containerGGMF struct {
|
|
|
|
version uint32
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *containerGGMF) Name() string {
|
|
|
|
return "ggmf"
|
|
|
|
}
|
|
|
|
|
2023-11-29 10:31:58 -08:00
|
|
|
func (c *containerGGMF) Decode(ro *readSeekOffset) (model, error) {
|
2023-07-21 13:33:56 -07:00
|
|
|
var version uint32
|
2023-11-24 11:57:20 -08:00
|
|
|
binary.Read(ro, binary.LittleEndian, &version)
|
2023-07-21 13:33:56 -07:00
|
|
|
|
|
|
|
switch version {
|
|
|
|
case 1:
|
|
|
|
default:
|
2023-09-07 13:55:37 -04:00
|
|
|
return nil, errors.New("invalid version")
|
2023-07-21 13:33:56 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
c.version = version
|
2023-12-09 21:14:35 -05:00
|
|
|
|
|
|
|
// remaining file contents aren't decoded
|
|
|
|
ro.Seek(0, io.SeekEnd)
|
|
|
|
|
2023-09-07 13:55:37 -04:00
|
|
|
return nil, nil
|
2023-07-21 13:33:56 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
type containerGGJT struct {
|
|
|
|
version uint32
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *containerGGJT) Name() string {
|
|
|
|
return "ggjt"
|
|
|
|
}
|
|
|
|
|
2023-11-29 10:31:58 -08:00
|
|
|
func (c *containerGGJT) Decode(ro *readSeekOffset) (model, error) {
|
2023-07-21 13:33:56 -07:00
|
|
|
var version uint32
|
2023-11-24 11:57:20 -08:00
|
|
|
binary.Read(ro, binary.LittleEndian, &version)
|
2023-07-21 13:33:56 -07:00
|
|
|
|
|
|
|
switch version {
|
|
|
|
case 1, 2, 3:
|
|
|
|
default:
|
2023-09-07 13:55:37 -04:00
|
|
|
return nil, errors.New("invalid version")
|
2023-07-21 13:33:56 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
c.version = version
|
2023-09-07 13:55:37 -04:00
|
|
|
|
|
|
|
// different model types may have different layouts for hyperparameters
|
|
|
|
var llama llamaModel
|
2023-11-24 11:57:20 -08:00
|
|
|
binary.Read(ro, binary.LittleEndian, &llama.hyperparameters)
|
2023-12-09 20:48:57 -05:00
|
|
|
|
|
|
|
// remaining file contents aren't decoded
|
|
|
|
ro.Seek(0, io.SeekEnd)
|
|
|
|
|
2023-09-07 13:55:37 -04:00
|
|
|
return &llama, nil
|
2023-07-21 13:33:56 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
type containerLORA struct {
|
|
|
|
version uint32
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *containerLORA) Name() string {
|
|
|
|
return "ggla"
|
|
|
|
}
|
|
|
|
|
2023-11-29 10:31:58 -08:00
|
|
|
func (c *containerLORA) Decode(ro *readSeekOffset) (model, error) {
|
2023-07-21 13:33:56 -07:00
|
|
|
var version uint32
|
2023-11-24 11:57:20 -08:00
|
|
|
binary.Read(ro, binary.LittleEndian, &version)
|
2023-07-21 13:33:56 -07:00
|
|
|
|
|
|
|
switch version {
|
|
|
|
case 1:
|
|
|
|
default:
|
2023-09-07 13:55:37 -04:00
|
|
|
return nil, errors.New("invalid version")
|
2023-07-21 13:33:56 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
c.version = version
|
2023-12-09 21:14:35 -05:00
|
|
|
|
|
|
|
// remaining file contents aren't decoded
|
|
|
|
ro.Seek(0, io.SeekEnd)
|
|
|
|
|
2023-09-07 13:55:37 -04:00
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
2023-07-21 13:33:56 -07:00
|
|
|
const (
|
2023-09-07 13:55:37 -04:00
|
|
|
// Magic constant for `ggml` files (unversioned).
|
2023-07-21 13:33:56 -07:00
|
|
|
FILE_MAGIC_GGML = 0x67676d6c
|
2023-09-07 13:55:37 -04:00
|
|
|
// Magic constant for `ggml` files (versioned, ggmf).
|
2023-07-21 13:33:56 -07:00
|
|
|
FILE_MAGIC_GGMF = 0x67676d66
|
2023-09-07 13:55:37 -04:00
|
|
|
// Magic constant for `ggml` files (versioned, ggjt).
|
2023-07-21 13:33:56 -07:00
|
|
|
FILE_MAGIC_GGJT = 0x67676a74
|
2023-09-07 13:55:37 -04:00
|
|
|
// Magic constant for `ggla` files (LoRA adapter).
|
2023-07-21 13:33:56 -07:00
|
|
|
FILE_MAGIC_GGLA = 0x67676C61
|
2023-09-07 13:55:37 -04:00
|
|
|
// Magic constant for `gguf` files (versioned, gguf)
|
2023-10-23 09:33:13 -07:00
|
|
|
FILE_MAGIC_GGUF_LE = 0x46554747
|
|
|
|
FILE_MAGIC_GGUF_BE = 0x47475546
|
2023-07-21 13:33:56 -07:00
|
|
|
)
|
|
|
|
|
2023-11-29 10:31:58 -08:00
|
|
|
func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
|
|
|
|
ro := readSeekOffset{ReadSeeker: r}
|
2023-07-21 13:33:56 -07:00
|
|
|
|
2023-11-24 11:57:20 -08:00
|
|
|
var magic uint32
|
|
|
|
if err := binary.Read(&ro, binary.LittleEndian, &magic); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
var c container
|
|
|
|
switch magic {
|
2023-07-21 13:33:56 -07:00
|
|
|
case FILE_MAGIC_GGML:
|
2023-11-24 11:57:20 -08:00
|
|
|
c = &containerGGML{}
|
2023-07-21 13:33:56 -07:00
|
|
|
case FILE_MAGIC_GGMF:
|
2023-11-24 11:57:20 -08:00
|
|
|
c = &containerGGMF{}
|
2023-07-21 13:33:56 -07:00
|
|
|
case FILE_MAGIC_GGJT:
|
2023-11-24 11:57:20 -08:00
|
|
|
c = &containerGGJT{}
|
2023-07-21 13:33:56 -07:00
|
|
|
case FILE_MAGIC_GGLA:
|
2023-11-24 11:57:20 -08:00
|
|
|
c = &containerLORA{}
|
2023-10-23 09:33:13 -07:00
|
|
|
case FILE_MAGIC_GGUF_LE:
|
2023-11-24 11:57:20 -08:00
|
|
|
c = &containerGGUF{bo: binary.LittleEndian}
|
2023-10-23 09:33:13 -07:00
|
|
|
case FILE_MAGIC_GGUF_BE:
|
2023-11-24 11:57:20 -08:00
|
|
|
c = &containerGGUF{bo: binary.BigEndian}
|
2023-07-21 13:33:56 -07:00
|
|
|
default:
|
|
|
|
return nil, errors.New("invalid file magic")
|
|
|
|
}
|
|
|
|
|
2023-11-24 11:57:20 -08:00
|
|
|
model, err := c.Decode(&ro)
|
2023-09-07 13:55:37 -04:00
|
|
|
if err != nil {
|
2023-07-21 13:33:56 -07:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// final model type
|
2023-11-24 11:57:20 -08:00
|
|
|
return &GGML{
|
|
|
|
container: c,
|
|
|
|
model: model,
|
|
|
|
Size: ro.offset,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
2023-11-29 10:31:58 -08:00
|
|
|
type readSeekOffset struct {
|
|
|
|
io.ReadSeeker
|
2023-11-24 11:57:20 -08:00
|
|
|
offset int64
|
|
|
|
}
|
|
|
|
|
2023-11-29 10:31:58 -08:00
|
|
|
func (rso *readSeekOffset) Seek(offset int64, whence int) (int64, error) {
|
|
|
|
offset, err := rso.ReadSeeker.Seek(offset, whence)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
rso.offset = offset
|
|
|
|
return offset, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (rso *readSeekOffset) Read(p []byte) (int, error) {
|
|
|
|
n, err := rso.ReadSeeker.Read(p)
|
|
|
|
rso.offset += int64(n)
|
2023-11-24 11:57:20 -08:00
|
|
|
return n, err
|
2023-07-21 13:33:56 -07:00
|
|
|
}
|