split from into one or more models

This commit is contained in:
Michael Yang 2023-11-24 11:57:20 -08:00
parent 7232f1fa41
commit 2cb0fa7d40
3 changed files with 179 additions and 71 deletions

View file

@ -7,9 +7,10 @@ import (
)
type GGML struct {
magic uint32
container
model
Size int64
}
const (
@ -82,7 +83,7 @@ type model interface {
type container interface {
Name() string
Decode(io.Reader) (model, error)
Decode(*readOffset) (model, error)
}
type containerGGML struct{}
@ -91,7 +92,7 @@ func (c *containerGGML) Name() string {
return "ggml"
}
func (c *containerGGML) Decode(r io.Reader) (model, error) {
func (c *containerGGML) Decode(ro *readOffset) (model, error) {
return nil, nil
}
@ -103,9 +104,9 @@ func (c *containerGGMF) Name() string {
return "ggmf"
}
func (c *containerGGMF) Decode(r io.Reader) (model, error) {
func (c *containerGGMF) Decode(ro *readOffset) (model, error) {
var version uint32
binary.Read(r, binary.LittleEndian, &version)
binary.Read(ro, binary.LittleEndian, &version)
switch version {
case 1:
@ -125,9 +126,9 @@ func (c *containerGGJT) Name() string {
return "ggjt"
}
func (c *containerGGJT) Decode(r io.Reader) (model, error) {
func (c *containerGGJT) Decode(ro *readOffset) (model, error) {
var version uint32
binary.Read(r, binary.LittleEndian, &version)
binary.Read(ro, binary.LittleEndian, &version)
switch version {
case 1, 2, 3:
@ -139,7 +140,7 @@ func (c *containerGGJT) Decode(r io.Reader) (model, error) {
// different model types may have different layouts for hyperparameters
var llama llamaModel
binary.Read(r, binary.LittleEndian, &llama.hyperparameters)
binary.Read(ro, binary.LittleEndian, &llama.hyperparameters)
return &llama, nil
}
@ -151,9 +152,9 @@ func (c *containerLORA) Name() string {
return "ggla"
}
func (c *containerLORA) Decode(r io.Reader) (model, error) {
func (c *containerLORA) Decode(ro *readOffset) (model, error) {
var version uint32
binary.Read(r, binary.LittleEndian, &version)
binary.Read(ro, binary.LittleEndian, &version)
switch version {
case 1:
@ -180,33 +181,51 @@ const (
)
func DecodeGGML(r io.Reader) (*GGML, error) {
var ggml GGML
binary.Read(r, binary.LittleEndian, &ggml.magic)
ro := readOffset{Reader: r}
switch ggml.magic {
var magic uint32
if err := binary.Read(&ro, binary.LittleEndian, &magic); err != nil {
return nil, err
}
var c container
switch magic {
case FILE_MAGIC_GGML:
ggml.container = &containerGGML{}
c = &containerGGML{}
case FILE_MAGIC_GGMF:
ggml.container = &containerGGMF{}
c = &containerGGMF{}
case FILE_MAGIC_GGJT:
ggml.container = &containerGGJT{}
c = &containerGGJT{}
case FILE_MAGIC_GGLA:
ggml.container = &containerLORA{}
c = &containerLORA{}
case FILE_MAGIC_GGUF_LE:
ggml.container = &containerGGUF{bo: binary.LittleEndian}
c = &containerGGUF{bo: binary.LittleEndian}
case FILE_MAGIC_GGUF_BE:
ggml.container = &containerGGUF{bo: binary.BigEndian}
c = &containerGGUF{bo: binary.BigEndian}
default:
return nil, errors.New("invalid file magic")
}
model, err := ggml.Decode(r)
model, err := c.Decode(&ro)
if err != nil {
return nil, err
}
ggml.model = model
// final model type
return &ggml, nil
return &GGML{
container: c,
model: model,
Size: ro.offset,
}, nil
}
type readOffset struct {
io.Reader
offset int64
}
func (r *readOffset) Read(p []byte) (int, error) {
n, err := r.Reader.Read(p)
r.offset += int64(n)
return n, err
}

View file

@ -23,26 +23,24 @@ type containerGGUF struct {
NumTensor uint64
NumKV uint64
}
parameters uint64
}
func (c *containerGGUF) Name() string {
return "gguf"
}
func (c *containerGGUF) Decode(r io.Reader) (model, error) {
binary.Read(r, c.bo, &c.Version)
func (c *containerGGUF) Decode(ro *readOffset) (model, error) {
binary.Read(ro, c.bo, &c.Version)
switch c.Version {
case 1:
binary.Read(r, c.bo, &c.V1)
binary.Read(ro, c.bo, &c.V1)
default:
binary.Read(r, c.bo, &c.V2)
binary.Read(ro, c.bo, &c.V2)
}
model := newGGUFModel(c)
if err := model.Decode(r); err != nil {
if err := model.Decode(ro); err != nil {
return nil, err
}
@ -67,9 +65,23 @@ const (
type kv map[string]any
type tensor struct {
name string
kind uint32
offset uint64
size uint64
// shape is the number of elements in each dimension
shape [4]uint64
}
type ggufModel struct {
*containerGGUF
kv
tensors []tensor
parameters uint64
}
func newGGUFModel(container *containerGGUF) *ggufModel {
@ -142,49 +154,49 @@ func (llm *ggufModel) FileType() string {
return "unknown"
}
func (llm *ggufModel) Decode(r io.Reader) error {
func (llm *ggufModel) Decode(ro *readOffset) error {
// decode key-values
for i := 0; uint64(i) < llm.NumKV(); i++ {
k, err := llm.readString(r)
k, err := llm.readString(ro)
if err != nil {
return err
}
vtype := llm.readU32(r)
vtype := llm.readU32(ro)
var v any
switch vtype {
case ggufTypeUint8:
v = llm.readU8(r)
v = llm.readU8(ro)
case ggufTypeInt8:
v = llm.readI8(r)
v = llm.readI8(ro)
case ggufTypeUint16:
v = llm.readU16(r)
v = llm.readU16(ro)
case ggufTypeInt16:
v = llm.readI16(r)
v = llm.readI16(ro)
case ggufTypeUint32:
v = llm.readU32(r)
v = llm.readU32(ro)
case ggufTypeInt32:
v = llm.readI32(r)
v = llm.readI32(ro)
case ggufTypeUint64:
v = llm.readU64(r)
v = llm.readU64(ro)
case ggufTypeInt64:
v = llm.readI64(r)
v = llm.readI64(ro)
case ggufTypeFloat32:
v = llm.readF32(r)
v = llm.readF32(ro)
case ggufTypeFloat64:
v = llm.readF64(r)
v = llm.readF64(ro)
case ggufTypeBool:
v = llm.readBool(r)
v = llm.readBool(ro)
case ggufTypeString:
s, err := llm.readString(r)
s, err := llm.readString(ro)
if err != nil {
return err
}
v = s
case ggufTypeArray:
a, err := llm.readArray(r)
a, err := llm.readArray(ro)
if err != nil {
return err
}
@ -199,21 +211,84 @@ func (llm *ggufModel) Decode(r io.Reader) error {
// decode tensors
for i := 0; uint64(i) < llm.NumTensor(); i++ {
if _, err := llm.readString(r); err != nil {
name, err := llm.readString(ro)
if err != nil {
return err
}
dimensions := llm.readU32(r)
dims := llm.readU32(ro)
var elements uint64 = 1
for i := 0; uint32(i) < dimensions; i++ {
elements *= llm.readU64(r)
shape := [4]uint64{1, 1, 1, 1}
for i := 0; uint32(i) < dims; i++ {
shape[i] = llm.readU64(ro)
}
llm.readU32(r) // type
llm.readU64(r) // offset
kind := llm.readU32(ro)
offset := llm.readU64(ro)
llm.parameters += elements
var blockSize uint64
switch {
case kind < 2:
blockSize = 1
case kind < 10:
blockSize = 32
default:
blockSize = 256
}
var typeSize uint64
switch kind {
case 0: // FP32
typeSize = 4
case 1: // FP16
typeSize = 2
case 2: // Q4_0
typeSize = 2 + blockSize/2
case 3: // Q4_1
typeSize = 2 + 2 + blockSize/2
case 6: // Q5_0
typeSize = 2 + 4 + blockSize/2
case 7: // Q5_1
typeSize = 2 + 2 + 4 + blockSize/2
case 8: // Q8_0
typeSize = 2 + blockSize
case 9: // Q8_1
typeSize = 4 + 4 + blockSize
case 10: // Q2_K
typeSize = blockSize/16 + blockSize/4 + 2 + 2
case 11: // Q3_K
typeSize = blockSize/8 + blockSize/4 + 12 + 2
case 12: // Q4_K
typeSize = 2 + 2 + 12 + blockSize/2
case 13: // Q5_K
typeSize = 2 + 2 + 12 + blockSize/8 + blockSize/2
case 14: // Q6_K
typeSize = blockSize/2 + blockSize/4 + blockSize/16 + 2
}
parameters := shape[0] * shape[1] * shape[2] * shape[3]
size := parameters * typeSize / blockSize
llm.tensors = append(llm.tensors, tensor{
name: name,
kind: kind,
offset: offset,
size: size,
shape: shape,
})
llm.parameters += parameters
}
alignment, ok := llm.kv["general.alignment"].(uint32)
if !ok {
alignment = 32
}
io.CopyN(io.Discard, ro, int64(alignment)-ro.offset%int64(alignment))
for _, tensor := range llm.tensors {
padded := (int64(tensor.size) + int64(alignment) - 1) & ^(int64(alignment) - 1)
io.CopyN(io.Discard, ro, padded)
}
return nil

View file

@ -388,24 +388,38 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
}
defer bin.Close()
fn(api.ProgressResponse{Status: "creating model layer"})
ggml, err := llm.DecodeGGML(bin)
if err != nil {
return err
var offset int64
for {
fn(api.ProgressResponse{Status: "creating model layer"})
bin.Seek(offset, io.SeekStart)
ggml, err := llm.DecodeGGML(bin)
if errors.Is(err, io.EOF) {
break
} else if err != nil {
return err
}
config.ModelFormat = ggml.Name()
config.ModelFamily = ggml.ModelFamily()
config.ModelType = ggml.ModelType()
config.FileType = ggml.FileType()
mediatype := mediatype
if ggml.ModelFamily() == "clip" {
mediatype = "application/vnd.ollama.image.projector"
}
sr := io.NewSectionReader(bin, offset, ggml.Size)
layer, err := NewLayer(sr, mediatype)
if err != nil {
return err
}
layers.Add(layer)
offset += ggml.Size
}
config.ModelFormat = ggml.Name()
config.ModelFamily = ggml.ModelFamily()
config.ModelType = ggml.ModelType()
config.FileType = ggml.FileType()
bin.Seek(0, io.SeekStart)
layer, err := NewLayer(bin, mediatype)
if err != nil {
return err
}
layers.Add(layer)
case "adapter":
if strings.HasPrefix(c.Args, "@") {
blobPath, err := GetBlobsPath(strings.TrimPrefix(c.Args, "@"))