ollama/llm/ggml.go

228 lines
4.1 KiB
Go
Raw Normal View History

2023-07-21 13:33:56 -07:00
package llm
import (
"encoding/binary"
"errors"
"io"
2023-09-07 13:55:37 -04:00
"path"
"sync"
2023-07-21 13:33:56 -07:00
)
type GGML struct {
magic uint32
container
model
}
2023-07-21 13:33:56 -07:00
const (
fileTypeF32 uint32 = iota
fileTypeF16
fileTypeQ4_0
fileTypeQ4_1
fileTypeQ4_1_F16
fileTypeQ8_0 uint32 = iota + 2
fileTypeQ5_0
fileTypeQ5_1
fileTypeQ2_K
fileTypeQ3_K_S
fileTypeQ3_K_M
fileTypeQ3_K_L
fileTypeQ4_K_S
fileTypeQ4_K_M
fileTypeQ5_K_S
fileTypeQ5_K_M
fileTypeQ6_K
2023-07-21 13:33:56 -07:00
)
func fileType(fileType uint32) string {
switch fileType {
case fileTypeF32:
return "F32"
case fileTypeF16:
return "F16"
case fileTypeQ4_0:
return "Q4_0"
case fileTypeQ4_1:
return "Q4_1"
case fileTypeQ4_1_F16:
return "Q4_1_F16"
case fileTypeQ8_0:
return "Q8_0"
case fileTypeQ5_0:
return "Q5_0"
case fileTypeQ5_1:
return "Q5_1"
case fileTypeQ2_K:
return "Q2_K"
case fileTypeQ3_K_S:
return "Q3_K_S"
case fileTypeQ3_K_M:
return "Q3_K_M"
case fileTypeQ3_K_L:
return "Q3_K_L"
case fileTypeQ4_K_S:
return "Q4_K_S"
case fileTypeQ4_K_M:
return "Q4_K_M"
case fileTypeQ5_K_S:
return "Q5_K_S"
case fileTypeQ5_K_M:
return "Q5_K_M"
case fileTypeQ6_K:
return "Q6_K"
2023-08-17 11:37:27 -07:00
default:
return "Unknown"
}
}
2023-07-21 13:33:56 -07:00
2023-08-17 11:37:27 -07:00
type model interface {
ModelFamily() string
ModelType() string
FileType() string
2023-07-21 13:33:56 -07:00
}
type container interface {
Name() string
2023-09-07 13:55:37 -04:00
Decode(io.Reader) (model, error)
2023-07-21 13:33:56 -07:00
}
2023-09-07 13:55:37 -04:00
type containerGGML struct{}
2023-07-21 13:33:56 -07:00
func (c *containerGGML) Name() string {
return "ggml"
}
2023-09-07 13:55:37 -04:00
func (c *containerGGML) Decode(r io.Reader) (model, error) {
return nil, nil
2023-07-21 13:33:56 -07:00
}
type containerGGMF struct {
version uint32
}
func (c *containerGGMF) Name() string {
return "ggmf"
}
2023-09-07 13:55:37 -04:00
func (c *containerGGMF) Decode(r io.Reader) (model, error) {
2023-07-21 13:33:56 -07:00
var version uint32
binary.Read(r, binary.LittleEndian, &version)
switch version {
case 1:
default:
2023-09-07 13:55:37 -04:00
return nil, errors.New("invalid version")
2023-07-21 13:33:56 -07:00
}
c.version = version
2023-09-07 13:55:37 -04:00
return nil, nil
2023-07-21 13:33:56 -07:00
}
type containerGGJT struct {
version uint32
}
func (c *containerGGJT) Name() string {
return "ggjt"
}
2023-09-07 13:55:37 -04:00
func (c *containerGGJT) Decode(r io.Reader) (model, error) {
2023-07-21 13:33:56 -07:00
var version uint32
binary.Read(r, binary.LittleEndian, &version)
switch version {
case 1, 2, 3:
default:
2023-09-07 13:55:37 -04:00
return nil, errors.New("invalid version")
2023-07-21 13:33:56 -07:00
}
c.version = version
2023-09-07 13:55:37 -04:00
// different model types may have different layouts for hyperparameters
var llama llamaModel
binary.Read(r, binary.LittleEndian, &llama.hyperparameters)
return &llama, nil
2023-07-21 13:33:56 -07:00
}
type containerLORA struct {
version uint32
}
func (c *containerLORA) Name() string {
return "ggla"
}
2023-09-07 13:55:37 -04:00
func (c *containerLORA) Decode(r io.Reader) (model, error) {
2023-07-21 13:33:56 -07:00
var version uint32
binary.Read(r, binary.LittleEndian, &version)
switch version {
case 1:
default:
2023-09-07 13:55:37 -04:00
return nil, errors.New("invalid version")
2023-07-21 13:33:56 -07:00
}
c.version = version
2023-09-07 13:55:37 -04:00
return nil, nil
}
var (
ggmlGPU = path.Join("llama.cpp", "ggml", "build", "gpu", "bin")
ggmlCPU = path.Join("llama.cpp", "ggml", "build", "cpu", "bin")
)
var (
ggmlInit sync.Once
ggmlRunnerPath string
)
func ggmlRunner() ModelRunner {
ggmlInit.Do(func() {
ggmlRunnerPath = chooseRunner(ggmlGPU, ggmlCPU)
})
return ModelRunner{Path: ggmlRunnerPath}
2023-07-21 13:33:56 -07:00
}
const (
2023-09-07 13:55:37 -04:00
// Magic constant for `ggml` files (unversioned).
2023-07-21 13:33:56 -07:00
FILE_MAGIC_GGML = 0x67676d6c
2023-09-07 13:55:37 -04:00
// Magic constant for `ggml` files (versioned, ggmf).
2023-07-21 13:33:56 -07:00
FILE_MAGIC_GGMF = 0x67676d66
2023-09-07 13:55:37 -04:00
// Magic constant for `ggml` files (versioned, ggjt).
2023-07-21 13:33:56 -07:00
FILE_MAGIC_GGJT = 0x67676a74
2023-09-07 13:55:37 -04:00
// Magic constant for `ggla` files (LoRA adapter).
2023-07-21 13:33:56 -07:00
FILE_MAGIC_GGLA = 0x67676C61
2023-09-07 13:55:37 -04:00
// Magic constant for `gguf` files (versioned, gguf)
FILE_MAGIC_GGUF = 0x46554747
2023-07-21 13:33:56 -07:00
)
2023-09-07 13:55:37 -04:00
func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
2023-07-21 13:33:56 -07:00
var ggml GGML
binary.Read(r, binary.LittleEndian, &ggml.magic)
switch ggml.magic {
case FILE_MAGIC_GGML:
ggml.container = &containerGGML{}
case FILE_MAGIC_GGMF:
ggml.container = &containerGGMF{}
case FILE_MAGIC_GGJT:
ggml.container = &containerGGJT{}
case FILE_MAGIC_GGLA:
ggml.container = &containerLORA{}
2023-09-07 13:55:37 -04:00
case FILE_MAGIC_GGUF:
ggml.container = &containerGGUF{}
2023-07-21 13:33:56 -07:00
default:
return nil, errors.New("invalid file magic")
}
2023-09-07 13:55:37 -04:00
model, err := ggml.Decode(r)
if err != nil {
2023-07-21 13:33:56 -07:00
return nil, err
}
2023-09-07 13:55:37 -04:00
ggml.model = model
2023-07-21 13:33:56 -07:00
// final model type
return &ggml, nil
}