Merge pull request #881 from jmorganca/mxyng/ggufv3

ggufv3
This commit is contained in:
Michael Yang 2023-10-23 10:50:45 -07:00 committed by GitHub
commit 36c160f1c3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 36 additions and 34 deletions

View file

@ -175,7 +175,8 @@ const (
// Magic constant for `ggla` files (LoRA adapter). // Magic constant for `ggla` files (LoRA adapter).
FILE_MAGIC_GGLA = 0x67676C61 FILE_MAGIC_GGLA = 0x67676C61
// Magic constant for `gguf` files (versioned, gguf) // Magic constant for `gguf` files (versioned, gguf)
FILE_MAGIC_GGUF = 0x46554747 FILE_MAGIC_GGUF_LE = 0x46554747
FILE_MAGIC_GGUF_BE = 0x47475546
) )
func DecodeGGML(r io.ReadSeeker) (*GGML, error) { func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
@ -191,8 +192,10 @@ func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
ggml.container = &containerGGJT{} ggml.container = &containerGGJT{}
case FILE_MAGIC_GGLA: case FILE_MAGIC_GGLA:
ggml.container = &containerLORA{} ggml.container = &containerLORA{}
case FILE_MAGIC_GGUF: case FILE_MAGIC_GGUF_LE:
ggml.container = &containerGGUF{} ggml.container = &containerGGUF{bo: binary.LittleEndian}
case FILE_MAGIC_GGUF_BE:
ggml.container = &containerGGUF{bo: binary.BigEndian}
default: default:
return nil, errors.New("invalid file magic") return nil, errors.New("invalid file magic")
} }

View file

@ -3,12 +3,13 @@ package llm
import ( import (
"bytes" "bytes"
"encoding/binary" "encoding/binary"
"errors"
"fmt" "fmt"
"io" "io"
) )
type containerGGUF struct { type containerGGUF struct {
bo binary.ByteOrder
Version uint32 Version uint32
V1 struct { V1 struct {
@ -27,15 +28,13 @@ func (c *containerGGUF) Name() string {
} }
func (c *containerGGUF) Decode(r io.Reader) (model, error) { func (c *containerGGUF) Decode(r io.Reader) (model, error) {
binary.Read(r, binary.LittleEndian, &c.Version) binary.Read(r, c.bo, &c.Version)
switch c.Version { switch c.Version {
case 1: case 1:
binary.Read(r, binary.LittleEndian, &c.V1) binary.Read(r, c.bo, &c.V1)
case 2:
binary.Read(r, binary.LittleEndian, &c.V2)
default: default:
return nil, errors.New("invalid version") binary.Read(r, c.bo, &c.V2)
} }
model := newGGUFModel(c) model := newGGUFModel(c)
@ -209,75 +208,75 @@ func (llm *ggufModel) NumLayers() int64 {
return int64(v) return int64(v)
} }
func (ggufModel) readU8(r io.Reader) uint8 { func (llm ggufModel) readU8(r io.Reader) uint8 {
var u8 uint8 var u8 uint8
binary.Read(r, binary.LittleEndian, &u8) binary.Read(r, llm.bo, &u8)
return u8 return u8
} }
func (ggufModel) readI8(r io.Reader) int8 { func (llm ggufModel) readI8(r io.Reader) int8 {
var i8 int8 var i8 int8
binary.Read(r, binary.LittleEndian, &i8) binary.Read(r, llm.bo, &i8)
return i8 return i8
} }
func (ggufModel) readU16(r io.Reader) uint16 { func (llm ggufModel) readU16(r io.Reader) uint16 {
var u16 uint16 var u16 uint16
binary.Read(r, binary.LittleEndian, &u16) binary.Read(r, llm.bo, &u16)
return u16 return u16
} }
func (ggufModel) readI16(r io.Reader) int16 { func (llm ggufModel) readI16(r io.Reader) int16 {
var i16 int16 var i16 int16
binary.Read(r, binary.LittleEndian, &i16) binary.Read(r, llm.bo, &i16)
return i16 return i16
} }
func (ggufModel) readU32(r io.Reader) uint32 { func (llm ggufModel) readU32(r io.Reader) uint32 {
var u32 uint32 var u32 uint32
binary.Read(r, binary.LittleEndian, &u32) binary.Read(r, llm.bo, &u32)
return u32 return u32
} }
func (ggufModel) readI32(r io.Reader) int32 { func (llm ggufModel) readI32(r io.Reader) int32 {
var i32 int32 var i32 int32
binary.Read(r, binary.LittleEndian, &i32) binary.Read(r, llm.bo, &i32)
return i32 return i32
} }
func (ggufModel) readU64(r io.Reader) uint64 { func (llm ggufModel) readU64(r io.Reader) uint64 {
var u64 uint64 var u64 uint64
binary.Read(r, binary.LittleEndian, &u64) binary.Read(r, llm.bo, &u64)
return u64 return u64
} }
func (ggufModel) readI64(r io.Reader) int64 { func (llm ggufModel) readI64(r io.Reader) int64 {
var i64 int64 var i64 int64
binary.Read(r, binary.LittleEndian, &i64) binary.Read(r, llm.bo, &i64)
return i64 return i64
} }
func (ggufModel) readF32(r io.Reader) float32 { func (llm ggufModel) readF32(r io.Reader) float32 {
var f32 float32 var f32 float32
binary.Read(r, binary.LittleEndian, &f32) binary.Read(r, llm.bo, &f32)
return f32 return f32
} }
func (ggufModel) readF64(r io.Reader) float64 { func (llm ggufModel) readF64(r io.Reader) float64 {
var f64 float64 var f64 float64
binary.Read(r, binary.LittleEndian, &f64) binary.Read(r, llm.bo, &f64)
return f64 return f64
} }
func (ggufModel) readBool(r io.Reader) bool { func (llm ggufModel) readBool(r io.Reader) bool {
var b bool var b bool
binary.Read(r, binary.LittleEndian, &b) binary.Read(r, llm.bo, &b)
return b return b
} }
func (ggufModel) readStringV1(r io.Reader) (string, error) { func (llm ggufModel) readStringV1(r io.Reader) (string, error) {
var nameLength uint32 var nameLength uint32
binary.Read(r, binary.LittleEndian, &nameLength) binary.Read(r, llm.bo, &nameLength)
var b bytes.Buffer var b bytes.Buffer
if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil { if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
@ -292,7 +291,7 @@ func (ggufModel) readStringV1(r io.Reader) (string, error) {
func (llm ggufModel) readString(r io.Reader) (string, error) { func (llm ggufModel) readString(r io.Reader) (string, error) {
var nameLength uint64 var nameLength uint64
binary.Read(r, binary.LittleEndian, &nameLength) binary.Read(r, llm.bo, &nameLength)
var b bytes.Buffer var b bytes.Buffer
if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil { if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {