From 125d0a013a5daada452eba03adafd8134dc6fef0 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Mon, 23 Oct 2023 09:33:13 -0700 Subject: [PATCH] ggufv3 ggufv3 adds support for big endianness, mainly for s390x architecture. while that's not currently supported for ollama, the change is simple. loosen version check to be more forward compatible. unless specified, gguf versions other v1 will be decoded into v2. --- llm/ggml.go | 9 +++++--- llm/gguf.go | 61 ++++++++++++++++++++++++++--------------------------- 2 files changed, 36 insertions(+), 34 deletions(-) diff --git a/llm/ggml.go b/llm/ggml.go index dc0d2cd3..9cb49225 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -175,7 +175,8 @@ const ( // Magic constant for `ggla` files (LoRA adapter). FILE_MAGIC_GGLA = 0x67676C61 // Magic constant for `gguf` files (versioned, gguf) - FILE_MAGIC_GGUF = 0x46554747 + FILE_MAGIC_GGUF_LE = 0x46554747 + FILE_MAGIC_GGUF_BE = 0x47475546 ) func DecodeGGML(r io.ReadSeeker) (*GGML, error) { @@ -191,8 +192,10 @@ func DecodeGGML(r io.ReadSeeker) (*GGML, error) { ggml.container = &containerGGJT{} case FILE_MAGIC_GGLA: ggml.container = &containerLORA{} - case FILE_MAGIC_GGUF: - ggml.container = &containerGGUF{} + case FILE_MAGIC_GGUF_LE: + ggml.container = &containerGGUF{bo: binary.LittleEndian} + case FILE_MAGIC_GGUF_BE: + ggml.container = &containerGGUF{bo: binary.BigEndian} default: return nil, errors.New("invalid file magic") } diff --git a/llm/gguf.go b/llm/gguf.go index 1c5e73b3..02e03ef5 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -3,12 +3,13 @@ package llm import ( "bytes" "encoding/binary" - "errors" "fmt" "io" ) type containerGGUF struct { + bo binary.ByteOrder + Version uint32 V1 struct { @@ -27,15 +28,13 @@ func (c *containerGGUF) Name() string { } func (c *containerGGUF) Decode(r io.Reader) (model, error) { - binary.Read(r, binary.LittleEndian, &c.Version) + binary.Read(r, c.bo, &c.Version) switch c.Version { case 1: - binary.Read(r, binary.LittleEndian, &c.V1) - case 2: - binary.Read(r, binary.LittleEndian, &c.V2) + binary.Read(r, c.bo, &c.V1) default: - return nil, errors.New("invalid version") + binary.Read(r, c.bo, &c.V2) } model := newGGUFModel(c) @@ -209,75 +208,75 @@ func (llm *ggufModel) NumLayers() int64 { return int64(v) } -func (ggufModel) readU8(r io.Reader) uint8 { +func (llm ggufModel) readU8(r io.Reader) uint8 { var u8 uint8 - binary.Read(r, binary.LittleEndian, &u8) + binary.Read(r, llm.bo, &u8) return u8 } -func (ggufModel) readI8(r io.Reader) int8 { +func (llm ggufModel) readI8(r io.Reader) int8 { var i8 int8 - binary.Read(r, binary.LittleEndian, &i8) + binary.Read(r, llm.bo, &i8) return i8 } -func (ggufModel) readU16(r io.Reader) uint16 { +func (llm ggufModel) readU16(r io.Reader) uint16 { var u16 uint16 - binary.Read(r, binary.LittleEndian, &u16) + binary.Read(r, llm.bo, &u16) return u16 } -func (ggufModel) readI16(r io.Reader) int16 { +func (llm ggufModel) readI16(r io.Reader) int16 { var i16 int16 - binary.Read(r, binary.LittleEndian, &i16) + binary.Read(r, llm.bo, &i16) return i16 } -func (ggufModel) readU32(r io.Reader) uint32 { +func (llm ggufModel) readU32(r io.Reader) uint32 { var u32 uint32 - binary.Read(r, binary.LittleEndian, &u32) + binary.Read(r, llm.bo, &u32) return u32 } -func (ggufModel) readI32(r io.Reader) int32 { +func (llm ggufModel) readI32(r io.Reader) int32 { var i32 int32 - binary.Read(r, binary.LittleEndian, &i32) + binary.Read(r, llm.bo, &i32) return i32 } -func (ggufModel) readU64(r io.Reader) uint64 { +func (llm ggufModel) readU64(r io.Reader) uint64 { var u64 uint64 - binary.Read(r, binary.LittleEndian, &u64) + binary.Read(r, llm.bo, &u64) return u64 } -func (ggufModel) readI64(r io.Reader) int64 { +func (llm ggufModel) readI64(r io.Reader) int64 { var i64 int64 - binary.Read(r, binary.LittleEndian, &i64) + binary.Read(r, llm.bo, &i64) return i64 } -func (ggufModel) readF32(r io.Reader) float32 { +func (llm ggufModel) readF32(r io.Reader) float32 { var f32 float32 - binary.Read(r, binary.LittleEndian, &f32) + binary.Read(r, llm.bo, &f32) return f32 } -func (ggufModel) readF64(r io.Reader) float64 { +func (llm ggufModel) readF64(r io.Reader) float64 { var f64 float64 - binary.Read(r, binary.LittleEndian, &f64) + binary.Read(r, llm.bo, &f64) return f64 } -func (ggufModel) readBool(r io.Reader) bool { +func (llm ggufModel) readBool(r io.Reader) bool { var b bool - binary.Read(r, binary.LittleEndian, &b) + binary.Read(r, llm.bo, &b) return b } -func (ggufModel) readStringV1(r io.Reader) (string, error) { +func (llm ggufModel) readStringV1(r io.Reader) (string, error) { var nameLength uint32 - binary.Read(r, binary.LittleEndian, &nameLength) + binary.Read(r, llm.bo, &nameLength) var b bytes.Buffer if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil { @@ -292,7 +291,7 @@ func (ggufModel) readStringV1(r io.Reader) (string, error) { func (llm ggufModel) readString(r io.Reader) (string, error) { var nameLength uint64 - binary.Read(r, binary.LittleEndian, &nameLength) + binary.Read(r, llm.bo, &nameLength) var b bytes.Buffer if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {