diff --git a/llm/ggml.go b/llm/ggml.go index dc0d2cd3..9cb49225 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -175,7 +175,8 @@ const ( // Magic constant for `ggla` files (LoRA adapter). FILE_MAGIC_GGLA = 0x67676C61 // Magic constant for `gguf` files (versioned, gguf) - FILE_MAGIC_GGUF = 0x46554747 + FILE_MAGIC_GGUF_LE = 0x46554747 + FILE_MAGIC_GGUF_BE = 0x47475546 ) func DecodeGGML(r io.ReadSeeker) (*GGML, error) { @@ -191,8 +192,10 @@ func DecodeGGML(r io.ReadSeeker) (*GGML, error) { ggml.container = &containerGGJT{} case FILE_MAGIC_GGLA: ggml.container = &containerLORA{} - case FILE_MAGIC_GGUF: - ggml.container = &containerGGUF{} + case FILE_MAGIC_GGUF_LE: + ggml.container = &containerGGUF{bo: binary.LittleEndian} + case FILE_MAGIC_GGUF_BE: + ggml.container = &containerGGUF{bo: binary.BigEndian} default: return nil, errors.New("invalid file magic") } diff --git a/llm/gguf.go b/llm/gguf.go index 1c5e73b3..02e03ef5 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -3,12 +3,13 @@ package llm import ( "bytes" "encoding/binary" - "errors" "fmt" "io" ) type containerGGUF struct { + bo binary.ByteOrder + Version uint32 V1 struct { @@ -27,15 +28,13 @@ func (c *containerGGUF) Name() string { } func (c *containerGGUF) Decode(r io.Reader) (model, error) { - binary.Read(r, binary.LittleEndian, &c.Version) + binary.Read(r, c.bo, &c.Version) switch c.Version { case 1: - binary.Read(r, binary.LittleEndian, &c.V1) - case 2: - binary.Read(r, binary.LittleEndian, &c.V2) + binary.Read(r, c.bo, &c.V1) default: - return nil, errors.New("invalid version") + binary.Read(r, c.bo, &c.V2) } model := newGGUFModel(c) @@ -209,75 +208,75 @@ func (llm *ggufModel) NumLayers() int64 { return int64(v) } -func (ggufModel) readU8(r io.Reader) uint8 { +func (llm ggufModel) readU8(r io.Reader) uint8 { var u8 uint8 - binary.Read(r, binary.LittleEndian, &u8) + binary.Read(r, llm.bo, &u8) return u8 } -func (ggufModel) readI8(r io.Reader) int8 { +func (llm ggufModel) readI8(r io.Reader) int8 { var i8 int8 - binary.Read(r, binary.LittleEndian, &i8) + binary.Read(r, llm.bo, &i8) return i8 } -func (ggufModel) readU16(r io.Reader) uint16 { +func (llm ggufModel) readU16(r io.Reader) uint16 { var u16 uint16 - binary.Read(r, binary.LittleEndian, &u16) + binary.Read(r, llm.bo, &u16) return u16 } -func (ggufModel) readI16(r io.Reader) int16 { +func (llm ggufModel) readI16(r io.Reader) int16 { var i16 int16 - binary.Read(r, binary.LittleEndian, &i16) + binary.Read(r, llm.bo, &i16) return i16 } -func (ggufModel) readU32(r io.Reader) uint32 { +func (llm ggufModel) readU32(r io.Reader) uint32 { var u32 uint32 - binary.Read(r, binary.LittleEndian, &u32) + binary.Read(r, llm.bo, &u32) return u32 } -func (ggufModel) readI32(r io.Reader) int32 { +func (llm ggufModel) readI32(r io.Reader) int32 { var i32 int32 - binary.Read(r, binary.LittleEndian, &i32) + binary.Read(r, llm.bo, &i32) return i32 } -func (ggufModel) readU64(r io.Reader) uint64 { +func (llm ggufModel) readU64(r io.Reader) uint64 { var u64 uint64 - binary.Read(r, binary.LittleEndian, &u64) + binary.Read(r, llm.bo, &u64) return u64 } -func (ggufModel) readI64(r io.Reader) int64 { +func (llm ggufModel) readI64(r io.Reader) int64 { var i64 int64 - binary.Read(r, binary.LittleEndian, &i64) + binary.Read(r, llm.bo, &i64) return i64 } -func (ggufModel) readF32(r io.Reader) float32 { +func (llm ggufModel) readF32(r io.Reader) float32 { var f32 float32 - binary.Read(r, binary.LittleEndian, &f32) + binary.Read(r, llm.bo, &f32) return f32 } -func (ggufModel) readF64(r io.Reader) float64 { +func (llm ggufModel) readF64(r io.Reader) float64 { var f64 float64 - binary.Read(r, binary.LittleEndian, &f64) + binary.Read(r, llm.bo, &f64) return f64 } -func (ggufModel) readBool(r io.Reader) bool { +func (llm ggufModel) readBool(r io.Reader) bool { var b bool - binary.Read(r, binary.LittleEndian, &b) + binary.Read(r, llm.bo, &b) return b } -func (ggufModel) readStringV1(r io.Reader) (string, error) { +func (llm ggufModel) readStringV1(r io.Reader) (string, error) { var nameLength uint32 - binary.Read(r, binary.LittleEndian, &nameLength) + binary.Read(r, llm.bo, &nameLength) var b bytes.Buffer if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil { @@ -292,7 +291,7 @@ func (ggufModel) readStringV1(r io.Reader) (string, error) { func (llm ggufModel) readString(r io.Reader) (string, error) { var nameLength uint64 - binary.Read(r, binary.LittleEndian, &nameLength) + binary.Read(r, llm.bo, &nameLength) var b bytes.Buffer if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {