ggufv3
ggufv3 adds support for big endianness, mainly for s390x architecture. while that's not currently supported for ollama, the change is simple. loosen version check to be more forward compatible. unless specified, gguf versions other v1 will be decoded into v2.
This commit is contained in:
parent
ccff9ca09c
commit
125d0a013a
2 changed files with 36 additions and 34 deletions
|
@ -175,7 +175,8 @@ const (
|
||||||
// Magic constant for `ggla` files (LoRA adapter).
|
// Magic constant for `ggla` files (LoRA adapter).
|
||||||
FILE_MAGIC_GGLA = 0x67676C61
|
FILE_MAGIC_GGLA = 0x67676C61
|
||||||
// Magic constant for `gguf` files (versioned, gguf)
|
// Magic constant for `gguf` files (versioned, gguf)
|
||||||
FILE_MAGIC_GGUF = 0x46554747
|
FILE_MAGIC_GGUF_LE = 0x46554747
|
||||||
|
FILE_MAGIC_GGUF_BE = 0x47475546
|
||||||
)
|
)
|
||||||
|
|
||||||
func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
|
func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
|
||||||
|
@ -191,8 +192,10 @@ func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
|
||||||
ggml.container = &containerGGJT{}
|
ggml.container = &containerGGJT{}
|
||||||
case FILE_MAGIC_GGLA:
|
case FILE_MAGIC_GGLA:
|
||||||
ggml.container = &containerLORA{}
|
ggml.container = &containerLORA{}
|
||||||
case FILE_MAGIC_GGUF:
|
case FILE_MAGIC_GGUF_LE:
|
||||||
ggml.container = &containerGGUF{}
|
ggml.container = &containerGGUF{bo: binary.LittleEndian}
|
||||||
|
case FILE_MAGIC_GGUF_BE:
|
||||||
|
ggml.container = &containerGGUF{bo: binary.BigEndian}
|
||||||
default:
|
default:
|
||||||
return nil, errors.New("invalid file magic")
|
return nil, errors.New("invalid file magic")
|
||||||
}
|
}
|
||||||
|
|
61
llm/gguf.go
61
llm/gguf.go
|
@ -3,12 +3,13 @@ package llm
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
)
|
)
|
||||||
|
|
||||||
type containerGGUF struct {
|
type containerGGUF struct {
|
||||||
|
bo binary.ByteOrder
|
||||||
|
|
||||||
Version uint32
|
Version uint32
|
||||||
|
|
||||||
V1 struct {
|
V1 struct {
|
||||||
|
@ -27,15 +28,13 @@ func (c *containerGGUF) Name() string {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *containerGGUF) Decode(r io.Reader) (model, error) {
|
func (c *containerGGUF) Decode(r io.Reader) (model, error) {
|
||||||
binary.Read(r, binary.LittleEndian, &c.Version)
|
binary.Read(r, c.bo, &c.Version)
|
||||||
|
|
||||||
switch c.Version {
|
switch c.Version {
|
||||||
case 1:
|
case 1:
|
||||||
binary.Read(r, binary.LittleEndian, &c.V1)
|
binary.Read(r, c.bo, &c.V1)
|
||||||
case 2:
|
|
||||||
binary.Read(r, binary.LittleEndian, &c.V2)
|
|
||||||
default:
|
default:
|
||||||
return nil, errors.New("invalid version")
|
binary.Read(r, c.bo, &c.V2)
|
||||||
}
|
}
|
||||||
|
|
||||||
model := newGGUFModel(c)
|
model := newGGUFModel(c)
|
||||||
|
@ -209,75 +208,75 @@ func (llm *ggufModel) NumLayers() int64 {
|
||||||
return int64(v)
|
return int64(v)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ggufModel) readU8(r io.Reader) uint8 {
|
func (llm ggufModel) readU8(r io.Reader) uint8 {
|
||||||
var u8 uint8
|
var u8 uint8
|
||||||
binary.Read(r, binary.LittleEndian, &u8)
|
binary.Read(r, llm.bo, &u8)
|
||||||
return u8
|
return u8
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ggufModel) readI8(r io.Reader) int8 {
|
func (llm ggufModel) readI8(r io.Reader) int8 {
|
||||||
var i8 int8
|
var i8 int8
|
||||||
binary.Read(r, binary.LittleEndian, &i8)
|
binary.Read(r, llm.bo, &i8)
|
||||||
return i8
|
return i8
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ggufModel) readU16(r io.Reader) uint16 {
|
func (llm ggufModel) readU16(r io.Reader) uint16 {
|
||||||
var u16 uint16
|
var u16 uint16
|
||||||
binary.Read(r, binary.LittleEndian, &u16)
|
binary.Read(r, llm.bo, &u16)
|
||||||
return u16
|
return u16
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ggufModel) readI16(r io.Reader) int16 {
|
func (llm ggufModel) readI16(r io.Reader) int16 {
|
||||||
var i16 int16
|
var i16 int16
|
||||||
binary.Read(r, binary.LittleEndian, &i16)
|
binary.Read(r, llm.bo, &i16)
|
||||||
return i16
|
return i16
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ggufModel) readU32(r io.Reader) uint32 {
|
func (llm ggufModel) readU32(r io.Reader) uint32 {
|
||||||
var u32 uint32
|
var u32 uint32
|
||||||
binary.Read(r, binary.LittleEndian, &u32)
|
binary.Read(r, llm.bo, &u32)
|
||||||
return u32
|
return u32
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ggufModel) readI32(r io.Reader) int32 {
|
func (llm ggufModel) readI32(r io.Reader) int32 {
|
||||||
var i32 int32
|
var i32 int32
|
||||||
binary.Read(r, binary.LittleEndian, &i32)
|
binary.Read(r, llm.bo, &i32)
|
||||||
return i32
|
return i32
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ggufModel) readU64(r io.Reader) uint64 {
|
func (llm ggufModel) readU64(r io.Reader) uint64 {
|
||||||
var u64 uint64
|
var u64 uint64
|
||||||
binary.Read(r, binary.LittleEndian, &u64)
|
binary.Read(r, llm.bo, &u64)
|
||||||
return u64
|
return u64
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ggufModel) readI64(r io.Reader) int64 {
|
func (llm ggufModel) readI64(r io.Reader) int64 {
|
||||||
var i64 int64
|
var i64 int64
|
||||||
binary.Read(r, binary.LittleEndian, &i64)
|
binary.Read(r, llm.bo, &i64)
|
||||||
return i64
|
return i64
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ggufModel) readF32(r io.Reader) float32 {
|
func (llm ggufModel) readF32(r io.Reader) float32 {
|
||||||
var f32 float32
|
var f32 float32
|
||||||
binary.Read(r, binary.LittleEndian, &f32)
|
binary.Read(r, llm.bo, &f32)
|
||||||
return f32
|
return f32
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ggufModel) readF64(r io.Reader) float64 {
|
func (llm ggufModel) readF64(r io.Reader) float64 {
|
||||||
var f64 float64
|
var f64 float64
|
||||||
binary.Read(r, binary.LittleEndian, &f64)
|
binary.Read(r, llm.bo, &f64)
|
||||||
return f64
|
return f64
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ggufModel) readBool(r io.Reader) bool {
|
func (llm ggufModel) readBool(r io.Reader) bool {
|
||||||
var b bool
|
var b bool
|
||||||
binary.Read(r, binary.LittleEndian, &b)
|
binary.Read(r, llm.bo, &b)
|
||||||
return b
|
return b
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ggufModel) readStringV1(r io.Reader) (string, error) {
|
func (llm ggufModel) readStringV1(r io.Reader) (string, error) {
|
||||||
var nameLength uint32
|
var nameLength uint32
|
||||||
binary.Read(r, binary.LittleEndian, &nameLength)
|
binary.Read(r, llm.bo, &nameLength)
|
||||||
|
|
||||||
var b bytes.Buffer
|
var b bytes.Buffer
|
||||||
if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
|
if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
|
||||||
|
@ -292,7 +291,7 @@ func (ggufModel) readStringV1(r io.Reader) (string, error) {
|
||||||
|
|
||||||
func (llm ggufModel) readString(r io.Reader) (string, error) {
|
func (llm ggufModel) readString(r io.Reader) (string, error) {
|
||||||
var nameLength uint64
|
var nameLength uint64
|
||||||
binary.Read(r, binary.LittleEndian, &nameLength)
|
binary.Read(r, llm.bo, &nameLength)
|
||||||
|
|
||||||
var b bytes.Buffer
|
var b bytes.Buffer
|
||||||
if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
|
if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
|
||||||
|
|
Loading…
Add table
Reference in a new issue