Merge pull request #1308 from jmorganca/mxyng/split-from

split from into one or more models
This commit is contained in:
Michael Yang 2023-12-05 14:33:03 -08:00 committed by GitHub
commit 409bb9674e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 235 additions and 85 deletions

View file

@ -7,9 +7,10 @@ import (
) )
type GGML struct { type GGML struct {
magic uint32
container container
model model
Size int64
} }
const ( const (
@ -82,7 +83,7 @@ type model interface {
type container interface { type container interface {
Name() string Name() string
Decode(io.Reader) (model, error) Decode(*readSeekOffset) (model, error)
} }
type containerGGML struct{} type containerGGML struct{}
@ -91,7 +92,7 @@ func (c *containerGGML) Name() string {
return "ggml" return "ggml"
} }
func (c *containerGGML) Decode(r io.Reader) (model, error) { func (c *containerGGML) Decode(ro *readSeekOffset) (model, error) {
return nil, nil return nil, nil
} }
@ -103,9 +104,9 @@ func (c *containerGGMF) Name() string {
return "ggmf" return "ggmf"
} }
func (c *containerGGMF) Decode(r io.Reader) (model, error) { func (c *containerGGMF) Decode(ro *readSeekOffset) (model, error) {
var version uint32 var version uint32
binary.Read(r, binary.LittleEndian, &version) binary.Read(ro, binary.LittleEndian, &version)
switch version { switch version {
case 1: case 1:
@ -125,9 +126,9 @@ func (c *containerGGJT) Name() string {
return "ggjt" return "ggjt"
} }
func (c *containerGGJT) Decode(r io.Reader) (model, error) { func (c *containerGGJT) Decode(ro *readSeekOffset) (model, error) {
var version uint32 var version uint32
binary.Read(r, binary.LittleEndian, &version) binary.Read(ro, binary.LittleEndian, &version)
switch version { switch version {
case 1, 2, 3: case 1, 2, 3:
@ -139,7 +140,7 @@ func (c *containerGGJT) Decode(r io.Reader) (model, error) {
// different model types may have different layouts for hyperparameters // different model types may have different layouts for hyperparameters
var llama llamaModel var llama llamaModel
binary.Read(r, binary.LittleEndian, &llama.hyperparameters) binary.Read(ro, binary.LittleEndian, &llama.hyperparameters)
return &llama, nil return &llama, nil
} }
@ -151,9 +152,9 @@ func (c *containerLORA) Name() string {
return "ggla" return "ggla"
} }
func (c *containerLORA) Decode(r io.Reader) (model, error) { func (c *containerLORA) Decode(ro *readSeekOffset) (model, error) {
var version uint32 var version uint32
binary.Read(r, binary.LittleEndian, &version) binary.Read(ro, binary.LittleEndian, &version)
switch version { switch version {
case 1: case 1:
@ -179,34 +180,62 @@ const (
FILE_MAGIC_GGUF_BE = 0x47475546 FILE_MAGIC_GGUF_BE = 0x47475546
) )
func DecodeGGML(r io.Reader) (*GGML, error) { func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
var ggml GGML ro := readSeekOffset{ReadSeeker: r}
binary.Read(r, binary.LittleEndian, &ggml.magic)
switch ggml.magic { var magic uint32
if err := binary.Read(&ro, binary.LittleEndian, &magic); err != nil {
return nil, err
}
var c container
switch magic {
case FILE_MAGIC_GGML: case FILE_MAGIC_GGML:
ggml.container = &containerGGML{} c = &containerGGML{}
case FILE_MAGIC_GGMF: case FILE_MAGIC_GGMF:
ggml.container = &containerGGMF{} c = &containerGGMF{}
case FILE_MAGIC_GGJT: case FILE_MAGIC_GGJT:
ggml.container = &containerGGJT{} c = &containerGGJT{}
case FILE_MAGIC_GGLA: case FILE_MAGIC_GGLA:
ggml.container = &containerLORA{} c = &containerLORA{}
case FILE_MAGIC_GGUF_LE: case FILE_MAGIC_GGUF_LE:
ggml.container = &containerGGUF{bo: binary.LittleEndian} c = &containerGGUF{bo: binary.LittleEndian}
case FILE_MAGIC_GGUF_BE: case FILE_MAGIC_GGUF_BE:
ggml.container = &containerGGUF{bo: binary.BigEndian} c = &containerGGUF{bo: binary.BigEndian}
default: default:
return nil, errors.New("invalid file magic") return nil, errors.New("invalid file magic")
} }
model, err := ggml.Decode(r) model, err := c.Decode(&ro)
if err != nil { if err != nil {
return nil, err return nil, err
} }
ggml.model = model
// final model type // final model type
return &ggml, nil return &GGML{
container: c,
model: model,
Size: ro.offset,
}, nil
}
type readSeekOffset struct {
io.ReadSeeker
offset int64
}
func (rso *readSeekOffset) Seek(offset int64, whence int) (int64, error) {
offset, err := rso.ReadSeeker.Seek(offset, whence)
if err != nil {
return 0, err
}
rso.offset = offset
return offset, nil
}
func (rso *readSeekOffset) Read(p []byte) (int, error) {
n, err := rso.ReadSeeker.Read(p)
rso.offset += int64(n)
return n, err
} }

View file

@ -23,26 +23,24 @@ type containerGGUF struct {
NumTensor uint64 NumTensor uint64
NumKV uint64 NumKV uint64
} }
parameters uint64
} }
func (c *containerGGUF) Name() string { func (c *containerGGUF) Name() string {
return "gguf" return "gguf"
} }
func (c *containerGGUF) Decode(r io.Reader) (model, error) { func (c *containerGGUF) Decode(rso *readSeekOffset) (model, error) {
binary.Read(r, c.bo, &c.Version) binary.Read(rso, c.bo, &c.Version)
switch c.Version { switch c.Version {
case 1: case 1:
binary.Read(r, c.bo, &c.V1) binary.Read(rso, c.bo, &c.V1)
default: default:
binary.Read(r, c.bo, &c.V2) binary.Read(rso, c.bo, &c.V2)
} }
model := newGGUFModel(c) model := newGGUFModel(c)
if err := model.Decode(r); err != nil { if err := model.Decode(rso); err != nil {
return nil, err return nil, err
} }
@ -67,9 +65,23 @@ const (
type kv map[string]any type kv map[string]any
type tensor struct {
name string
kind uint32
offset uint64
size uint64
// shape is the number of elements in each dimension
shape [4]uint64
}
type ggufModel struct { type ggufModel struct {
*containerGGUF *containerGGUF
kv kv
tensors []tensor
parameters uint64
} }
func newGGUFModel(container *containerGGUF) *ggufModel { func newGGUFModel(container *containerGGUF) *ggufModel {
@ -96,8 +108,7 @@ func (llm *ggufModel) NumKV() uint64 {
} }
func (llm *ggufModel) ModelFamily() string { func (llm *ggufModel) ModelFamily() string {
t, ok := llm.kv["general.architecture"].(string) if t, ok := llm.kv["general.architecture"].(string); ok {
if ok {
return t return t
} }
@ -134,57 +145,56 @@ func (llm *ggufModel) ModelType() string {
} }
func (llm *ggufModel) FileType() string { func (llm *ggufModel) FileType() string {
t, ok := llm.kv["general.file_type"].(uint32) if t, ok := llm.kv["general.file_type"].(uint32); ok {
if ok {
return fileType(t) return fileType(t)
} }
return "unknown" return "unknown"
} }
func (llm *ggufModel) Decode(r io.Reader) error { func (llm *ggufModel) Decode(rso *readSeekOffset) error {
// decode key-values // decode key-values
for i := 0; uint64(i) < llm.NumKV(); i++ { for i := 0; uint64(i) < llm.NumKV(); i++ {
k, err := llm.readString(r) k, err := llm.readString(rso)
if err != nil { if err != nil {
return err return err
} }
vtype := llm.readU32(r) vtype := llm.readU32(rso)
var v any var v any
switch vtype { switch vtype {
case ggufTypeUint8: case ggufTypeUint8:
v = llm.readU8(r) v = llm.readU8(rso)
case ggufTypeInt8: case ggufTypeInt8:
v = llm.readI8(r) v = llm.readI8(rso)
case ggufTypeUint16: case ggufTypeUint16:
v = llm.readU16(r) v = llm.readU16(rso)
case ggufTypeInt16: case ggufTypeInt16:
v = llm.readI16(r) v = llm.readI16(rso)
case ggufTypeUint32: case ggufTypeUint32:
v = llm.readU32(r) v = llm.readU32(rso)
case ggufTypeInt32: case ggufTypeInt32:
v = llm.readI32(r) v = llm.readI32(rso)
case ggufTypeUint64: case ggufTypeUint64:
v = llm.readU64(r) v = llm.readU64(rso)
case ggufTypeInt64: case ggufTypeInt64:
v = llm.readI64(r) v = llm.readI64(rso)
case ggufTypeFloat32: case ggufTypeFloat32:
v = llm.readF32(r) v = llm.readF32(rso)
case ggufTypeFloat64: case ggufTypeFloat64:
v = llm.readF64(r) v = llm.readF64(rso)
case ggufTypeBool: case ggufTypeBool:
v = llm.readBool(r) v = llm.readBool(rso)
case ggufTypeString: case ggufTypeString:
s, err := llm.readString(r) s, err := llm.readString(rso)
if err != nil { if err != nil {
return err return err
} }
v = s v = s
case ggufTypeArray: case ggufTypeArray:
a, err := llm.readArray(r) a, err := llm.readArray(rso)
if err != nil { if err != nil {
return err return err
} }
@ -199,21 +209,85 @@ func (llm *ggufModel) Decode(r io.Reader) error {
// decode tensors // decode tensors
for i := 0; uint64(i) < llm.NumTensor(); i++ { for i := 0; uint64(i) < llm.NumTensor(); i++ {
if _, err := llm.readString(r); err != nil { name, err := llm.readString(rso)
if err != nil {
return err return err
} }
dimensions := llm.readU32(r) // dims is the number of dimensions in the tensor
dims := llm.readU32(rso)
var elements uint64 = 1 shape := [4]uint64{1, 1, 1, 1}
for i := 0; uint32(i) < dimensions; i++ { for i := 0; uint32(i) < dims; i++ {
elements *= llm.readU64(r) shape[i] = llm.readU64(rso)
} }
llm.readU32(r) // type kind := llm.readU32(rso)
llm.readU64(r) // offset offset := llm.readU64(rso)
llm.parameters += elements var blockSize uint64
switch {
case kind < 2:
blockSize = 1
case kind < 10:
blockSize = 32
default:
blockSize = 256
}
var typeSize uint64
switch kind {
case 0: // FP32
typeSize = 4
case 1: // FP16
typeSize = 2
case 2: // Q4_0
typeSize = 2 + blockSize/2
case 3: // Q4_1
typeSize = 2 + 2 + blockSize/2
case 6: // Q5_0
typeSize = 2 + 4 + blockSize/2
case 7: // Q5_1
typeSize = 2 + 2 + 4 + blockSize/2
case 8: // Q8_0
typeSize = 2 + blockSize
case 9: // Q8_1
typeSize = 4 + 4 + blockSize
case 10: // Q2_K
typeSize = blockSize/16 + blockSize/4 + 2 + 2
case 11: // Q3_K
typeSize = blockSize/8 + blockSize/4 + 12 + 2
case 12: // Q4_K
typeSize = 2 + 2 + 12 + blockSize/2
case 13: // Q5_K
typeSize = 2 + 2 + 12 + blockSize/8 + blockSize/2
case 14: // Q6_K
typeSize = blockSize/2 + blockSize/4 + blockSize/16 + 2
}
parameters := shape[0] * shape[1] * shape[2] * shape[3]
size := parameters * typeSize / blockSize
llm.tensors = append(llm.tensors, tensor{
name: name,
kind: kind,
offset: offset,
size: size,
shape: shape,
})
llm.parameters += parameters
}
alignment, ok := llm.kv["general.alignment"].(uint32)
if !ok {
alignment = 32
}
rso.Seek(int64(alignment)-rso.offset%int64(alignment), io.SeekCurrent)
for _, tensor := range llm.tensors {
padded := (int64(tensor.size) + int64(alignment) - 1) & ^(int64(alignment) - 1)
rso.Seek(padded, io.SeekCurrent)
} }
return nil return nil

View file

@ -19,6 +19,8 @@ import (
"strings" "strings"
"text/template" "text/template"
"golang.org/x/exp/slices"
"github.com/jmorganca/ollama/api" "github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/llm" "github.com/jmorganca/ollama/llm"
"github.com/jmorganca/ollama/parser" "github.com/jmorganca/ollama/parser"
@ -134,17 +136,48 @@ type ManifestV2 struct {
} }
type ConfigV2 struct { type ConfigV2 struct {
ModelFormat string `json:"model_format"` ModelFormat string `json:"model_format"`
ModelFamily string `json:"model_family"` ModelFamily string `json:"model_family"`
ModelType string `json:"model_type"` ModelFamilies []string `json:"model_families"`
FileType string `json:"file_type"` ModelType string `json:"model_type"`
RootFS RootFS `json:"rootfs"` FileType string `json:"file_type"`
RootFS RootFS `json:"rootfs"`
// required by spec // required by spec
Architecture string `json:"architecture"` Architecture string `json:"architecture"`
OS string `json:"os"` OS string `json:"os"`
} }
func (c *ConfigV2) SetModelFormat(format string) {
if c.ModelFormat == "" {
c.ModelFormat = format
}
}
func (c *ConfigV2) SetModelFamily(families ...string) {
for _, family := range families {
if c.ModelFamily == "" {
c.ModelFamily = family
}
if !slices.Contains(c.ModelFamilies, family) {
c.ModelFamilies = append(c.ModelFamilies, family)
}
}
}
func (c *ConfigV2) SetModelType(modelType string) {
if c.ModelType == "" {
c.ModelType = modelType
}
}
func (c *ConfigV2) SetFileType(fileType string) {
if c.FileType == "" {
c.FileType = fileType
}
}
type RootFS struct { type RootFS struct {
Type string `json:"type"` Type string `json:"type"`
DiffIDs []string `json:"diff_ids"` DiffIDs []string `json:"diff_ids"`
@ -354,10 +387,10 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
return err return err
} }
config.ModelFormat = fromConfig.ModelFormat config.SetModelFormat(fromConfig.ModelFormat)
config.ModelFamily = fromConfig.ModelFamily config.SetModelFamily(append(fromConfig.ModelFamilies, fromConfig.ModelFamily)...)
config.ModelType = fromConfig.ModelType config.SetModelType(fromConfig.ModelType)
config.FileType = fromConfig.FileType config.SetFileType(fromConfig.FileType)
for _, layer := range manifest.Layers { for _, layer := range manifest.Layers {
deleteMap[layer.Digest] = struct{}{} deleteMap[layer.Digest] = struct{}{}
@ -391,24 +424,38 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
} }
defer bin.Close() defer bin.Close()
fn(api.ProgressResponse{Status: "creating model layer"}) var offset int64
ggml, err := llm.DecodeGGML(bin) for {
if err != nil { fn(api.ProgressResponse{Status: "creating model layer"})
return err
bin.Seek(offset, io.SeekStart)
ggml, err := llm.DecodeGGML(bin)
if errors.Is(err, io.EOF) {
break
} else if err != nil {
return err
}
config.SetModelFormat(ggml.Name())
config.SetModelFamily(ggml.ModelFamily())
config.SetModelType(ggml.ModelType())
config.SetFileType(ggml.FileType())
mediatype := mediatype
if ggml.ModelFamily() == "clip" {
mediatype = "application/vnd.ollama.image.projector"
}
sr := io.NewSectionReader(bin, offset, ggml.Size)
layer, err := NewLayer(sr, mediatype)
if err != nil {
return err
}
layers.Add(layer)
offset += ggml.Size
} }
config.ModelFormat = ggml.Name()
config.ModelFamily = ggml.ModelFamily()
config.ModelType = ggml.ModelType()
config.FileType = ggml.FileType()
bin.Seek(0, io.SeekStart)
layer, err := NewLayer(bin, mediatype)
if err != nil {
return err
}
layers.Add(layer)
case "adapter": case "adapter":
if strings.HasPrefix(c.Args, "@") { if strings.HasPrefix(c.Args, "@") {
blobPath, err := GetBlobsPath(strings.TrimPrefix(c.Args, "@")) blobPath, err := GetBlobsPath(strings.TrimPrefix(c.Args, "@"))