ollama/server/model.go

420 lines
9.4 KiB
Go
Raw Normal View History

package server
import (
"archive/zip"
"bytes"
"context"
2024-06-20 20:45:47 +00:00
"encoding/json"
"errors"
"fmt"
"io"
"log/slog"
"net/http"
"os"
"path/filepath"
2024-06-20 20:45:47 +00:00
"slices"
"strings"
"text/template/parse"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/convert"
"github.com/ollama/ollama/llm"
2024-06-10 21:54:42 +00:00
"github.com/ollama/ollama/template"
"github.com/ollama/ollama/types/model"
)
2024-05-20 21:58:27 +00:00
var intermediateBlobs map[string]string = make(map[string]string)
type layerGGML struct {
2024-04-25 15:53:08 +00:00
*Layer
*llm.GGML
}
func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
m, err := ParseNamedManifest(name)
switch {
case errors.Is(err, os.ErrNotExist):
2024-05-01 17:34:39 +00:00
if err := PullModel(ctx, name.String(), &registryOptions{}, fn); err != nil {
return nil, err
}
m, err = ParseNamedManifest(name)
2024-04-23 22:18:45 +00:00
if err != nil {
return nil, err
}
case err != nil:
return nil, err
}
for _, layer := range m.Layers {
layer, err := NewLayerFromLayer(layer.Digest, layer.MediaType, name.DisplayShortest())
if err != nil {
return nil, err
}
switch layer.MediaType {
case "application/vnd.ollama.image.model",
"application/vnd.ollama.image.projector",
"application/vnd.ollama.image.adapter":
blobpath, err := GetBlobsPath(layer.Digest)
if err != nil {
return nil, err
}
blob, err := os.Open(blobpath)
if err != nil {
return nil, err
}
defer blob.Close()
ggml, _, err := llm.DecodeGGML(blob, 0)
if err != nil {
return nil, err
}
2024-04-25 15:53:08 +00:00
layers = append(layers, &layerGGML{layer, ggml})
default:
layers = append(layers, &layerGGML{layer, nil})
}
}
return layers, nil
}
func extractFromZipFile(p string, file *os.File, fn func(api.ProgressResponse)) error {
stat, err := file.Stat()
if err != nil {
return err
}
r, err := zip.NewReader(file, stat.Size())
if err != nil {
return err
}
fn(api.ProgressResponse{Status: "unpacking model metadata"})
for _, f := range r.File {
2024-06-30 18:10:40 +00:00
if !filepath.IsLocal(f.Name) {
return fmt.Errorf("%w: %s", zip.ErrInsecurePath, f.Name)
}
2024-06-30 18:10:40 +00:00
n := filepath.Join(p, f.Name)
if err := os.MkdirAll(filepath.Dir(n), 0o750); err != nil {
return err
}
// TODO(mxyng): this should not write out all files to disk
outfile, err := os.Create(n)
if err != nil {
return err
}
2024-05-06 22:27:19 +00:00
defer outfile.Close()
infile, err := f.Open()
if err != nil {
return err
}
2024-05-06 22:27:19 +00:00
defer infile.Close()
if _, err = io.Copy(outfile, infile); err != nil {
return err
}
if err := outfile.Close(); err != nil {
return err
}
if err := infile.Close(); err != nil {
return err
}
}
return nil
}
func parseFromZipFile(_ context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
tempDir, err := os.MkdirTemp(filepath.Dir(file.Name()), "")
if err != nil {
return nil, err
}
defer os.RemoveAll(tempDir)
if err := extractFromZipFile(tempDir, file, fn); err != nil {
return nil, err
}
mf, err := convert.GetModelFormat(tempDir)
if err != nil {
return nil, err
}
params, err := mf.GetParams(tempDir)
if err != nil {
return nil, err
}
mArch, err := mf.GetModelArch("", tempDir, params)
if err != nil {
return nil, err
}
fn(api.ProgressResponse{Status: "processing tensors"})
if err := mArch.GetTensors(); err != nil {
return nil, err
}
if err := mArch.LoadVocab(); err != nil {
return nil, err
}
fn(api.ProgressResponse{Status: "converting model"})
// TODO(mxyng): this should write directly into a layer
// e.g. NewLayer(arch.Reader(), "application/vnd.ollama.image.model")
temp, err := os.CreateTemp(tempDir, "fp16")
if err != nil {
return nil, err
}
defer temp.Close()
defer os.Remove(temp.Name())
if err = mArch.WriteGGUF(temp); err != nil {
return nil, err
}
if _, err := temp.Seek(0, io.SeekStart); err != nil {
return nil, err
}
layer, err := NewLayer(temp, "application/vnd.ollama.image.model")
if err != nil {
2024-05-20 21:58:27 +00:00
return nil, err
}
bin, err := layer.Open()
if err != nil {
return nil, err
}
defer bin.Close()
ggml, _, err := llm.DecodeGGML(bin, 0)
if err != nil {
return nil, err
}
layers = append(layers, &layerGGML{layer, ggml})
2024-05-20 21:58:27 +00:00
intermediateBlobs[digest] = layer.Digest
return detectChatTemplate(layers)
}
func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
sr := io.NewSectionReader(file, 0, 512)
contentType, err := detectContentType(sr)
if err != nil {
return nil, err
}
switch contentType {
case "gguf", "ggla":
// noop
case "application/zip":
return parseFromZipFile(ctx, file, digest, fn)
default:
return nil, fmt.Errorf("unsupported content type: %s", contentType)
}
stat, err := file.Stat()
if err != nil {
return nil, err
}
var offset int64
for offset < stat.Size() {
ggml, n, err := llm.DecodeGGML(file, 0)
if errors.Is(err, io.EOF) {
break
} else if err != nil {
return nil, err
}
mediatype := "application/vnd.ollama.image.model"
if ggml.Name() == "ggla" {
mediatype = "application/vnd.ollama.image.adapter"
} else if ggml.KV().Architecture() == "clip" {
mediatype = "application/vnd.ollama.image.projector"
}
layer, err := NewLayer(io.NewSectionReader(file, offset, n), mediatype)
if err != nil {
return nil, err
}
layers = append(layers, &layerGGML{layer, ggml})
offset = n
}
return detectChatTemplate(layers)
}
func detectChatTemplate(layers []*layerGGML) ([]*layerGGML, error) {
for _, layer := range layers {
if s := layer.GGML.KV().ChatTemplate(); s != "" {
2024-06-10 21:54:42 +00:00
if t, err := template.Named(s); err != nil {
slog.Debug("template detection", "error", err)
} else {
layer, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
if err != nil {
return nil, err
}
layer.status = fmt.Sprintf("using autodetected template %s", t.Name)
layers = append(layers, &layerGGML{layer, nil})
if t.Parameters != nil {
var b bytes.Buffer
if err := json.NewEncoder(&b).Encode(t.Parameters); err != nil {
return nil, err
}
layer, err := NewLayer(&b, "application/vnd.ollama.image.params")
if err != nil {
return nil, err
}
layers = append(layers, &layerGGML{layer, nil})
}
}
}
}
return layers, nil
}
func detectContentType(r io.Reader) (string, error) {
var b bytes.Buffer
if _, err := io.Copy(&b, r); err != nil {
return "", err
}
if contentType := llm.DetectGGMLType(b.Bytes()); contentType != "" {
return contentType, nil
}
if contentType := http.DetectContentType(b.Bytes()); contentType != "application/octet-stream" {
return contentType, nil
}
return "unknown", nil
}
2024-06-20 20:45:47 +00:00
// parseToolCalls attempts to parse a JSON string into a slice of ToolCalls.
// mxyng: this only really works if the input contains tool calls in some JSON format
func (m *Model) parseToolCalls(s string) ([]api.ToolCall, bool) {
// create a subtree from the node that ranges over .ToolCalls
tmpl := m.Template.Subtree(func(n parse.Node) bool {
if t, ok := n.(*parse.RangeNode); ok {
return slices.Contains(template.Identifiers(t.Pipe), "ToolCalls")
}
return false
})
if tmpl == nil {
return nil, false
}
var b bytes.Buffer
if err := tmpl.Execute(&b, map[string][]api.ToolCall{
2024-06-20 20:45:47 +00:00
"ToolCalls": {
{
Function: api.ToolCallFunction{
Name: "@@name@@",
Arguments: api.ToolCallFunctionArguments{
"@@argument@@": 1,
},
2024-06-20 20:45:47 +00:00
},
},
},
}); err != nil {
return nil, false
}
var kv map[string]any
2024-06-20 20:45:47 +00:00
// execute the subtree with placeholders to identify the keys
2024-07-16 16:38:46 +00:00
// trim any commands that might exist in the template
if err := json.Unmarshal(bytes.TrimSuffix(b.Bytes(), []byte(",")), &kv); err != nil {
2024-06-20 20:45:47 +00:00
return nil, false
}
// find the keys that correspond to the name and arguments fields
var name, arguments string
for k, v := range kv {
switch v.(type) {
case string:
2024-06-20 20:45:47 +00:00
name = k
case map[string]any:
2024-06-20 20:45:47 +00:00
arguments = k
}
}
if name == "" || arguments == "" {
return nil, false
}
2024-07-16 16:38:46 +00:00
var objs []map[string]any
for offset := 0; offset < len(s); {
2024-07-17 18:02:36 +00:00
var obj map[string]any
decoder := json.NewDecoder(strings.NewReader(s[offset:]))
2024-07-18 19:07:59 +00:00
if err := decoder.Decode(&obj); errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
2024-07-16 16:38:46 +00:00
break
} else if syntax := &(json.SyntaxError{}); errors.As(err, &syntax) {
// skip over any syntax errors
offset += int(syntax.Offset)
} else if unmarshalType := &(json.UnmarshalTypeError{}); errors.As(err, &unmarshalType) {
// skip over any unmarshalable types
offset += int(unmarshalType.Offset)
} else if err != nil {
2024-07-18 19:07:59 +00:00
slog.Error("parseToolCalls", "error", err)
2024-06-20 20:45:47 +00:00
return nil, false
2024-07-16 16:38:46 +00:00
} else {
2024-07-17 18:02:36 +00:00
offset += int(decoder.InputOffset())
// collect all nested objects
var collect func(any) []map[string]any
collect = func(obj any) (all []map[string]any) {
switch o := obj.(type) {
case map[string]any:
all = append(all, o)
for _, v := range o {
all = append(all, collect(v)...)
}
case []any:
for _, v := range o {
all = append(all, collect(v)...)
}
}
return all
}
objs = append(objs, collect(obj)...)
2024-06-20 20:45:47 +00:00
}
}
var toolCalls []api.ToolCall
2024-07-16 16:38:46 +00:00
for _, kv := range objs {
n, nok := kv[name].(string)
a, aok := kv[arguments].(map[string]any)
if nok && aok {
toolCalls = append(toolCalls, api.ToolCall{
Function: api.ToolCallFunction{
Name: n,
Arguments: a,
},
})
2024-06-20 20:45:47 +00:00
}
}
2024-07-16 16:38:46 +00:00
return toolCalls, len(toolCalls) > 0
2024-06-20 20:45:47 +00:00
}