2024-04-12 20:55:12 +00:00
|
|
|
package server
|
|
|
|
|
|
|
|
import (
|
|
|
|
"archive/zip"
|
|
|
|
"bytes"
|
|
|
|
"context"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
2024-06-12 20:30:08 +00:00
|
|
|
"log/slog"
|
2024-04-12 20:55:12 +00:00
|
|
|
"net/http"
|
|
|
|
"os"
|
|
|
|
"path/filepath"
|
|
|
|
|
|
|
|
"github.com/ollama/ollama/api"
|
|
|
|
"github.com/ollama/ollama/convert"
|
|
|
|
"github.com/ollama/ollama/llm"
|
2024-06-12 20:30:08 +00:00
|
|
|
"github.com/ollama/ollama/templates"
|
2024-04-12 20:55:12 +00:00
|
|
|
"github.com/ollama/ollama/types/model"
|
|
|
|
)
|
|
|
|
|
2024-05-20 21:58:27 +00:00
|
|
|
var intermediateBlobs map[string]string = make(map[string]string)
|
2024-05-10 22:48:41 +00:00
|
|
|
|
2024-06-12 20:30:08 +00:00
|
|
|
type layerGGML struct {
|
2024-04-25 15:53:08 +00:00
|
|
|
*Layer
|
|
|
|
*llm.GGML
|
|
|
|
}
|
|
|
|
|
2024-06-12 20:30:08 +00:00
|
|
|
func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
|
2024-05-08 21:36:08 +00:00
|
|
|
m, err := ParseNamedManifest(name)
|
2024-04-12 20:55:12 +00:00
|
|
|
switch {
|
|
|
|
case errors.Is(err, os.ErrNotExist):
|
2024-05-01 17:34:39 +00:00
|
|
|
if err := PullModel(ctx, name.String(), ®istryOptions{}, fn); err != nil {
|
2024-04-12 20:55:12 +00:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2024-05-08 21:36:08 +00:00
|
|
|
m, err = ParseNamedManifest(name)
|
2024-04-23 22:18:45 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-04-12 20:55:12 +00:00
|
|
|
case err != nil:
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2024-05-08 21:36:08 +00:00
|
|
|
for _, layer := range m.Layers {
|
|
|
|
layer, err := NewLayerFromLayer(layer.Digest, layer.MediaType, name.DisplayShortest())
|
2024-04-12 20:55:12 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
switch layer.MediaType {
|
|
|
|
case "application/vnd.ollama.image.model",
|
|
|
|
"application/vnd.ollama.image.projector",
|
|
|
|
"application/vnd.ollama.image.adapter":
|
|
|
|
blobpath, err := GetBlobsPath(layer.Digest)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
blob, err := os.Open(blobpath)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
defer blob.Close()
|
|
|
|
|
|
|
|
ggml, _, err := llm.DecodeGGML(blob)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-04-25 15:53:08 +00:00
|
|
|
|
2024-06-12 20:30:08 +00:00
|
|
|
layers = append(layers, &layerGGML{layer, ggml})
|
2024-04-12 20:55:12 +00:00
|
|
|
default:
|
2024-06-12 20:30:08 +00:00
|
|
|
layers = append(layers, &layerGGML{layer, nil})
|
2024-04-12 20:55:12 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return layers, nil
|
|
|
|
}
|
|
|
|
|
2024-06-12 20:30:08 +00:00
|
|
|
func parseFromZipFile(_ context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
|
2024-04-12 20:55:12 +00:00
|
|
|
stat, err := file.Stat()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
r, err := zip.NewReader(file, stat.Size())
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
tempdir, err := os.MkdirTemp(filepath.Dir(file.Name()), "")
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
defer os.RemoveAll(tempdir)
|
|
|
|
|
|
|
|
fn(api.ProgressResponse{Status: "unpacking model metadata"})
|
|
|
|
for _, f := range r.File {
|
|
|
|
// TODO(mxyng): this should not write out all files to disk
|
|
|
|
outfile, err := os.Create(filepath.Join(tempdir, f.Name))
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-05-06 22:27:19 +00:00
|
|
|
defer outfile.Close()
|
2024-04-12 20:55:12 +00:00
|
|
|
|
|
|
|
infile, err := f.Open()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-05-06 22:27:19 +00:00
|
|
|
defer infile.Close()
|
2024-04-12 20:55:12 +00:00
|
|
|
|
|
|
|
if _, err = io.Copy(outfile, infile); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := outfile.Close(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := infile.Close(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
mf, err := convert.GetModelFormat(tempdir)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
params, err := mf.GetParams(tempdir)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
mArch, err := mf.GetModelArch("", tempdir, params)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
fn(api.ProgressResponse{Status: "processing tensors"})
|
|
|
|
if err := mArch.GetTensors(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := mArch.LoadVocab(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
fn(api.ProgressResponse{Status: "converting model"})
|
|
|
|
|
|
|
|
// TODO(mxyng): this should write directly into a layer
|
|
|
|
// e.g. NewLayer(arch.Reader(), "application/vnd.ollama.image.model")
|
|
|
|
temp, err := os.CreateTemp(tempdir, "fp16")
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
defer temp.Close()
|
|
|
|
defer os.Remove(temp.Name())
|
|
|
|
|
|
|
|
if err = mArch.WriteGGUF(temp); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if _, err := temp.Seek(0, io.SeekStart); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
layer, err := NewLayer(temp, "application/vnd.ollama.image.model")
|
|
|
|
if err != nil {
|
2024-05-20 21:58:27 +00:00
|
|
|
return nil, err
|
2024-04-12 20:55:12 +00:00
|
|
|
}
|
|
|
|
|
2024-05-10 22:48:41 +00:00
|
|
|
bin, err := layer.Open()
|
2024-04-12 20:55:12 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
defer bin.Close()
|
|
|
|
|
|
|
|
ggml, _, err := llm.DecodeGGML(bin)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2024-06-12 20:30:08 +00:00
|
|
|
layers = append(layers, &layerGGML{layer, ggml})
|
2024-05-10 22:48:41 +00:00
|
|
|
|
2024-05-20 21:58:27 +00:00
|
|
|
intermediateBlobs[digest] = layer.Digest
|
2024-06-12 20:30:08 +00:00
|
|
|
return detectChatTemplate(layers)
|
2024-04-12 20:55:12 +00:00
|
|
|
}
|
|
|
|
|
2024-06-12 20:30:08 +00:00
|
|
|
func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
|
2024-04-12 20:55:12 +00:00
|
|
|
sr := io.NewSectionReader(file, 0, 512)
|
|
|
|
contentType, err := detectContentType(sr)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
switch contentType {
|
|
|
|
case "gguf", "ggla":
|
|
|
|
// noop
|
|
|
|
case "application/zip":
|
2024-05-10 22:48:41 +00:00
|
|
|
return parseFromZipFile(ctx, file, digest, fn)
|
2024-04-12 20:55:12 +00:00
|
|
|
default:
|
|
|
|
return nil, fmt.Errorf("unsupported content type: %s", contentType)
|
|
|
|
}
|
|
|
|
|
|
|
|
stat, err := file.Stat()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
var offset int64
|
|
|
|
for offset < stat.Size() {
|
|
|
|
ggml, n, err := llm.DecodeGGML(file)
|
|
|
|
if errors.Is(err, io.EOF) {
|
|
|
|
break
|
|
|
|
} else if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
mediatype := "application/vnd.ollama.image.model"
|
|
|
|
if ggml.Name() == "ggla" {
|
|
|
|
mediatype = "application/vnd.ollama.image.adapter"
|
|
|
|
} else if ggml.KV().Architecture() == "clip" {
|
|
|
|
mediatype = "application/vnd.ollama.image.projector"
|
|
|
|
}
|
|
|
|
|
|
|
|
layer, err := NewLayer(io.NewSectionReader(file, offset, n), mediatype)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2024-06-12 20:30:08 +00:00
|
|
|
layers = append(layers, &layerGGML{layer, ggml})
|
2024-04-12 20:55:12 +00:00
|
|
|
offset = n
|
|
|
|
}
|
|
|
|
|
2024-06-12 20:30:08 +00:00
|
|
|
return detectChatTemplate(layers)
|
|
|
|
}
|
|
|
|
|
|
|
|
func detectChatTemplate(layers []*layerGGML) ([]*layerGGML, error) {
|
|
|
|
for _, layer := range layers {
|
|
|
|
if s := layer.GGML.KV().ChatTemplate(); s != "" {
|
|
|
|
if t, err := templates.NamedTemplate(s); err != nil {
|
|
|
|
slog.Debug("template detection", "error", err)
|
|
|
|
} else {
|
|
|
|
tmpl, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
tmpl.status = fmt.Sprintf("using autodetected template %s", t.Name)
|
|
|
|
layers = append(layers, &layerGGML{tmpl, nil})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-04-12 20:55:12 +00:00
|
|
|
return layers, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func detectContentType(r io.Reader) (string, error) {
|
|
|
|
var b bytes.Buffer
|
|
|
|
if _, err := io.Copy(&b, r); err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
if contentType := llm.DetectGGMLType(b.Bytes()); contentType != "" {
|
|
|
|
return contentType, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if contentType := http.DetectContentType(b.Bytes()); contentType != "application/octet-stream" {
|
|
|
|
return contentType, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return "unknown", nil
|
|
|
|
}
|