ggufv3 adds support for big endianness, mainly for s390x architecture.
while that's not currently supported for ollama, the change is simple.

loosen version check to be more forward compatible. unless specified,
gguf versions other v1 will be decoded into v2.
This commit is contained in:
Michael Yang 2023-10-23 09:33:13 -07:00
parent ccff9ca09c
commit 125d0a013a
2 changed files with 36 additions and 34 deletions

View file

@ -175,7 +175,8 @@ const (
// Magic constant for `ggla` files (LoRA adapter). // Magic constant for `ggla` files (LoRA adapter).
FILE_MAGIC_GGLA = 0x67676C61 FILE_MAGIC_GGLA = 0x67676C61
// Magic constant for `gguf` files (versioned, gguf) // Magic constant for `gguf` files (versioned, gguf)
FILE_MAGIC_GGUF = 0x46554747 FILE_MAGIC_GGUF_LE = 0x46554747
FILE_MAGIC_GGUF_BE = 0x47475546
) )
func DecodeGGML(r io.ReadSeeker) (*GGML, error) { func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
@ -191,8 +192,10 @@ func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
ggml.container = &containerGGJT{} ggml.container = &containerGGJT{}
case FILE_MAGIC_GGLA: case FILE_MAGIC_GGLA:
ggml.container = &containerLORA{} ggml.container = &containerLORA{}
case FILE_MAGIC_GGUF: case FILE_MAGIC_GGUF_LE:
ggml.container = &containerGGUF{} ggml.container = &containerGGUF{bo: binary.LittleEndian}
case FILE_MAGIC_GGUF_BE:
ggml.container = &containerGGUF{bo: binary.BigEndian}
default: default:
return nil, errors.New("invalid file magic") return nil, errors.New("invalid file magic")
} }

View file

@ -3,12 +3,13 @@ package llm
import ( import (
"bytes" "bytes"
"encoding/binary" "encoding/binary"
"errors"
"fmt" "fmt"
"io" "io"
) )
type containerGGUF struct { type containerGGUF struct {
bo binary.ByteOrder
Version uint32 Version uint32
V1 struct { V1 struct {
@ -27,15 +28,13 @@ func (c *containerGGUF) Name() string {
} }
func (c *containerGGUF) Decode(r io.Reader) (model, error) { func (c *containerGGUF) Decode(r io.Reader) (model, error) {
binary.Read(r, binary.LittleEndian, &c.Version) binary.Read(r, c.bo, &c.Version)
switch c.Version { switch c.Version {
case 1: case 1:
binary.Read(r, binary.LittleEndian, &c.V1) binary.Read(r, c.bo, &c.V1)
case 2:
binary.Read(r, binary.LittleEndian, &c.V2)
default: default:
return nil, errors.New("invalid version") binary.Read(r, c.bo, &c.V2)
} }
model := newGGUFModel(c) model := newGGUFModel(c)
@ -209,75 +208,75 @@ func (llm *ggufModel) NumLayers() int64 {
return int64(v) return int64(v)
} }
func (ggufModel) readU8(r io.Reader) uint8 { func (llm ggufModel) readU8(r io.Reader) uint8 {
var u8 uint8 var u8 uint8
binary.Read(r, binary.LittleEndian, &u8) binary.Read(r, llm.bo, &u8)
return u8 return u8
} }
func (ggufModel) readI8(r io.Reader) int8 { func (llm ggufModel) readI8(r io.Reader) int8 {
var i8 int8 var i8 int8
binary.Read(r, binary.LittleEndian, &i8) binary.Read(r, llm.bo, &i8)
return i8 return i8
} }
func (ggufModel) readU16(r io.Reader) uint16 { func (llm ggufModel) readU16(r io.Reader) uint16 {
var u16 uint16 var u16 uint16
binary.Read(r, binary.LittleEndian, &u16) binary.Read(r, llm.bo, &u16)
return u16 return u16
} }
func (ggufModel) readI16(r io.Reader) int16 { func (llm ggufModel) readI16(r io.Reader) int16 {
var i16 int16 var i16 int16
binary.Read(r, binary.LittleEndian, &i16) binary.Read(r, llm.bo, &i16)
return i16 return i16
} }
func (ggufModel) readU32(r io.Reader) uint32 { func (llm ggufModel) readU32(r io.Reader) uint32 {
var u32 uint32 var u32 uint32
binary.Read(r, binary.LittleEndian, &u32) binary.Read(r, llm.bo, &u32)
return u32 return u32
} }
func (ggufModel) readI32(r io.Reader) int32 { func (llm ggufModel) readI32(r io.Reader) int32 {
var i32 int32 var i32 int32
binary.Read(r, binary.LittleEndian, &i32) binary.Read(r, llm.bo, &i32)
return i32 return i32
} }
func (ggufModel) readU64(r io.Reader) uint64 { func (llm ggufModel) readU64(r io.Reader) uint64 {
var u64 uint64 var u64 uint64
binary.Read(r, binary.LittleEndian, &u64) binary.Read(r, llm.bo, &u64)
return u64 return u64
} }
func (ggufModel) readI64(r io.Reader) int64 { func (llm ggufModel) readI64(r io.Reader) int64 {
var i64 int64 var i64 int64
binary.Read(r, binary.LittleEndian, &i64) binary.Read(r, llm.bo, &i64)
return i64 return i64
} }
func (ggufModel) readF32(r io.Reader) float32 { func (llm ggufModel) readF32(r io.Reader) float32 {
var f32 float32 var f32 float32
binary.Read(r, binary.LittleEndian, &f32) binary.Read(r, llm.bo, &f32)
return f32 return f32
} }
func (ggufModel) readF64(r io.Reader) float64 { func (llm ggufModel) readF64(r io.Reader) float64 {
var f64 float64 var f64 float64
binary.Read(r, binary.LittleEndian, &f64) binary.Read(r, llm.bo, &f64)
return f64 return f64
} }
func (ggufModel) readBool(r io.Reader) bool { func (llm ggufModel) readBool(r io.Reader) bool {
var b bool var b bool
binary.Read(r, binary.LittleEndian, &b) binary.Read(r, llm.bo, &b)
return b return b
} }
func (ggufModel) readStringV1(r io.Reader) (string, error) { func (llm ggufModel) readStringV1(r io.Reader) (string, error) {
var nameLength uint32 var nameLength uint32
binary.Read(r, binary.LittleEndian, &nameLength) binary.Read(r, llm.bo, &nameLength)
var b bytes.Buffer var b bytes.Buffer
if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil { if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
@ -292,7 +291,7 @@ func (ggufModel) readStringV1(r io.Reader) (string, error) {
func (llm ggufModel) readString(r io.Reader) (string, error) { func (llm ggufModel) readString(r io.Reader) (string, error) {
var nameLength uint64 var nameLength uint64
binary.Read(r, binary.LittleEndian, &nameLength) binary.Read(r, llm.bo, &nameLength)
var b bytes.Buffer var b bytes.Buffer
if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil { if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {