split from into one or more models

This commit is contained in:
Michael Yang 2023-11-24 11:57:20 -08:00
parent 7232f1fa41
commit 2cb0fa7d40
3 changed files with 179 additions and 71 deletions

View file

@ -7,9 +7,10 @@ import (
) )
type GGML struct { type GGML struct {
magic uint32
container container
model model
Size int64
} }
const ( const (
@ -82,7 +83,7 @@ type model interface {
type container interface { type container interface {
Name() string Name() string
Decode(io.Reader) (model, error) Decode(*readOffset) (model, error)
} }
type containerGGML struct{} type containerGGML struct{}
@ -91,7 +92,7 @@ func (c *containerGGML) Name() string {
return "ggml" return "ggml"
} }
func (c *containerGGML) Decode(r io.Reader) (model, error) { func (c *containerGGML) Decode(ro *readOffset) (model, error) {
return nil, nil return nil, nil
} }
@ -103,9 +104,9 @@ func (c *containerGGMF) Name() string {
return "ggmf" return "ggmf"
} }
func (c *containerGGMF) Decode(r io.Reader) (model, error) { func (c *containerGGMF) Decode(ro *readOffset) (model, error) {
var version uint32 var version uint32
binary.Read(r, binary.LittleEndian, &version) binary.Read(ro, binary.LittleEndian, &version)
switch version { switch version {
case 1: case 1:
@ -125,9 +126,9 @@ func (c *containerGGJT) Name() string {
return "ggjt" return "ggjt"
} }
func (c *containerGGJT) Decode(r io.Reader) (model, error) { func (c *containerGGJT) Decode(ro *readOffset) (model, error) {
var version uint32 var version uint32
binary.Read(r, binary.LittleEndian, &version) binary.Read(ro, binary.LittleEndian, &version)
switch version { switch version {
case 1, 2, 3: case 1, 2, 3:
@ -139,7 +140,7 @@ func (c *containerGGJT) Decode(r io.Reader) (model, error) {
// different model types may have different layouts for hyperparameters // different model types may have different layouts for hyperparameters
var llama llamaModel var llama llamaModel
binary.Read(r, binary.LittleEndian, &llama.hyperparameters) binary.Read(ro, binary.LittleEndian, &llama.hyperparameters)
return &llama, nil return &llama, nil
} }
@ -151,9 +152,9 @@ func (c *containerLORA) Name() string {
return "ggla" return "ggla"
} }
func (c *containerLORA) Decode(r io.Reader) (model, error) { func (c *containerLORA) Decode(ro *readOffset) (model, error) {
var version uint32 var version uint32
binary.Read(r, binary.LittleEndian, &version) binary.Read(ro, binary.LittleEndian, &version)
switch version { switch version {
case 1: case 1:
@ -180,33 +181,51 @@ const (
) )
func DecodeGGML(r io.Reader) (*GGML, error) { func DecodeGGML(r io.Reader) (*GGML, error) {
var ggml GGML ro := readOffset{Reader: r}
binary.Read(r, binary.LittleEndian, &ggml.magic)
switch ggml.magic { var magic uint32
if err := binary.Read(&ro, binary.LittleEndian, &magic); err != nil {
return nil, err
}
var c container
switch magic {
case FILE_MAGIC_GGML: case FILE_MAGIC_GGML:
ggml.container = &containerGGML{} c = &containerGGML{}
case FILE_MAGIC_GGMF: case FILE_MAGIC_GGMF:
ggml.container = &containerGGMF{} c = &containerGGMF{}
case FILE_MAGIC_GGJT: case FILE_MAGIC_GGJT:
ggml.container = &containerGGJT{} c = &containerGGJT{}
case FILE_MAGIC_GGLA: case FILE_MAGIC_GGLA:
ggml.container = &containerLORA{} c = &containerLORA{}
case FILE_MAGIC_GGUF_LE: case FILE_MAGIC_GGUF_LE:
ggml.container = &containerGGUF{bo: binary.LittleEndian} c = &containerGGUF{bo: binary.LittleEndian}
case FILE_MAGIC_GGUF_BE: case FILE_MAGIC_GGUF_BE:
ggml.container = &containerGGUF{bo: binary.BigEndian} c = &containerGGUF{bo: binary.BigEndian}
default: default:
return nil, errors.New("invalid file magic") return nil, errors.New("invalid file magic")
} }
model, err := ggml.Decode(r) model, err := c.Decode(&ro)
if err != nil { if err != nil {
return nil, err return nil, err
} }
ggml.model = model
// final model type // final model type
return &ggml, nil return &GGML{
container: c,
model: model,
Size: ro.offset,
}, nil
}
type readOffset struct {
io.Reader
offset int64
}
func (r *readOffset) Read(p []byte) (int, error) {
n, err := r.Reader.Read(p)
r.offset += int64(n)
return n, err
} }

View file

@ -23,26 +23,24 @@ type containerGGUF struct {
NumTensor uint64 NumTensor uint64
NumKV uint64 NumKV uint64
} }
parameters uint64
} }
func (c *containerGGUF) Name() string { func (c *containerGGUF) Name() string {
return "gguf" return "gguf"
} }
func (c *containerGGUF) Decode(r io.Reader) (model, error) { func (c *containerGGUF) Decode(ro *readOffset) (model, error) {
binary.Read(r, c.bo, &c.Version) binary.Read(ro, c.bo, &c.Version)
switch c.Version { switch c.Version {
case 1: case 1:
binary.Read(r, c.bo, &c.V1) binary.Read(ro, c.bo, &c.V1)
default: default:
binary.Read(r, c.bo, &c.V2) binary.Read(ro, c.bo, &c.V2)
} }
model := newGGUFModel(c) model := newGGUFModel(c)
if err := model.Decode(r); err != nil { if err := model.Decode(ro); err != nil {
return nil, err return nil, err
} }
@ -67,9 +65,23 @@ const (
type kv map[string]any type kv map[string]any
type tensor struct {
name string
kind uint32
offset uint64
size uint64
// shape is the number of elements in each dimension
shape [4]uint64
}
type ggufModel struct { type ggufModel struct {
*containerGGUF *containerGGUF
kv kv
tensors []tensor
parameters uint64
} }
func newGGUFModel(container *containerGGUF) *ggufModel { func newGGUFModel(container *containerGGUF) *ggufModel {
@ -142,49 +154,49 @@ func (llm *ggufModel) FileType() string {
return "unknown" return "unknown"
} }
func (llm *ggufModel) Decode(r io.Reader) error { func (llm *ggufModel) Decode(ro *readOffset) error {
// decode key-values // decode key-values
for i := 0; uint64(i) < llm.NumKV(); i++ { for i := 0; uint64(i) < llm.NumKV(); i++ {
k, err := llm.readString(r) k, err := llm.readString(ro)
if err != nil { if err != nil {
return err return err
} }
vtype := llm.readU32(r) vtype := llm.readU32(ro)
var v any var v any
switch vtype { switch vtype {
case ggufTypeUint8: case ggufTypeUint8:
v = llm.readU8(r) v = llm.readU8(ro)
case ggufTypeInt8: case ggufTypeInt8:
v = llm.readI8(r) v = llm.readI8(ro)
case ggufTypeUint16: case ggufTypeUint16:
v = llm.readU16(r) v = llm.readU16(ro)
case ggufTypeInt16: case ggufTypeInt16:
v = llm.readI16(r) v = llm.readI16(ro)
case ggufTypeUint32: case ggufTypeUint32:
v = llm.readU32(r) v = llm.readU32(ro)
case ggufTypeInt32: case ggufTypeInt32:
v = llm.readI32(r) v = llm.readI32(ro)
case ggufTypeUint64: case ggufTypeUint64:
v = llm.readU64(r) v = llm.readU64(ro)
case ggufTypeInt64: case ggufTypeInt64:
v = llm.readI64(r) v = llm.readI64(ro)
case ggufTypeFloat32: case ggufTypeFloat32:
v = llm.readF32(r) v = llm.readF32(ro)
case ggufTypeFloat64: case ggufTypeFloat64:
v = llm.readF64(r) v = llm.readF64(ro)
case ggufTypeBool: case ggufTypeBool:
v = llm.readBool(r) v = llm.readBool(ro)
case ggufTypeString: case ggufTypeString:
s, err := llm.readString(r) s, err := llm.readString(ro)
if err != nil { if err != nil {
return err return err
} }
v = s v = s
case ggufTypeArray: case ggufTypeArray:
a, err := llm.readArray(r) a, err := llm.readArray(ro)
if err != nil { if err != nil {
return err return err
} }
@ -199,21 +211,84 @@ func (llm *ggufModel) Decode(r io.Reader) error {
// decode tensors // decode tensors
for i := 0; uint64(i) < llm.NumTensor(); i++ { for i := 0; uint64(i) < llm.NumTensor(); i++ {
if _, err := llm.readString(r); err != nil { name, err := llm.readString(ro)
if err != nil {
return err return err
} }
dimensions := llm.readU32(r) dims := llm.readU32(ro)
var elements uint64 = 1 shape := [4]uint64{1, 1, 1, 1}
for i := 0; uint32(i) < dimensions; i++ { for i := 0; uint32(i) < dims; i++ {
elements *= llm.readU64(r) shape[i] = llm.readU64(ro)
} }
llm.readU32(r) // type kind := llm.readU32(ro)
llm.readU64(r) // offset offset := llm.readU64(ro)
llm.parameters += elements var blockSize uint64
switch {
case kind < 2:
blockSize = 1
case kind < 10:
blockSize = 32
default:
blockSize = 256
}
var typeSize uint64
switch kind {
case 0: // FP32
typeSize = 4
case 1: // FP16
typeSize = 2
case 2: // Q4_0
typeSize = 2 + blockSize/2
case 3: // Q4_1
typeSize = 2 + 2 + blockSize/2
case 6: // Q5_0
typeSize = 2 + 4 + blockSize/2
case 7: // Q5_1
typeSize = 2 + 2 + 4 + blockSize/2
case 8: // Q8_0
typeSize = 2 + blockSize
case 9: // Q8_1
typeSize = 4 + 4 + blockSize
case 10: // Q2_K
typeSize = blockSize/16 + blockSize/4 + 2 + 2
case 11: // Q3_K
typeSize = blockSize/8 + blockSize/4 + 12 + 2
case 12: // Q4_K
typeSize = 2 + 2 + 12 + blockSize/2
case 13: // Q5_K
typeSize = 2 + 2 + 12 + blockSize/8 + blockSize/2
case 14: // Q6_K
typeSize = blockSize/2 + blockSize/4 + blockSize/16 + 2
}
parameters := shape[0] * shape[1] * shape[2] * shape[3]
size := parameters * typeSize / blockSize
llm.tensors = append(llm.tensors, tensor{
name: name,
kind: kind,
offset: offset,
size: size,
shape: shape,
})
llm.parameters += parameters
}
alignment, ok := llm.kv["general.alignment"].(uint32)
if !ok {
alignment = 32
}
io.CopyN(io.Discard, ro, int64(alignment)-ro.offset%int64(alignment))
for _, tensor := range llm.tensors {
padded := (int64(tensor.size) + int64(alignment) - 1) & ^(int64(alignment) - 1)
io.CopyN(io.Discard, ro, padded)
} }
return nil return nil

View file

@ -388,9 +388,15 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
} }
defer bin.Close() defer bin.Close()
var offset int64
for {
fn(api.ProgressResponse{Status: "creating model layer"}) fn(api.ProgressResponse{Status: "creating model layer"})
bin.Seek(offset, io.SeekStart)
ggml, err := llm.DecodeGGML(bin) ggml, err := llm.DecodeGGML(bin)
if err != nil { if errors.Is(err, io.EOF) {
break
} else if err != nil {
return err return err
} }
@ -399,13 +405,21 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
config.ModelType = ggml.ModelType() config.ModelType = ggml.ModelType()
config.FileType = ggml.FileType() config.FileType = ggml.FileType()
bin.Seek(0, io.SeekStart) mediatype := mediatype
layer, err := NewLayer(bin, mediatype) if ggml.ModelFamily() == "clip" {
mediatype = "application/vnd.ollama.image.projector"
}
sr := io.NewSectionReader(bin, offset, ggml.Size)
layer, err := NewLayer(sr, mediatype)
if err != nil { if err != nil {
return err return err
} }
layers.Add(layer) layers.Add(layer)
offset += ggml.Size
}
case "adapter": case "adapter":
if strings.HasPrefix(c.Args, "@") { if strings.HasPrefix(c.Args, "@") {
blobPath, err := GetBlobsPath(strings.TrimPrefix(c.Args, "@")) blobPath, err := GetBlobsPath(strings.TrimPrefix(c.Args, "@"))