2024-02-12 23:06:57 +00:00
|
|
|
package server
|
|
|
|
|
|
|
|
import (
|
2024-06-17 17:38:55 +00:00
|
|
|
"bytes"
|
|
|
|
"context"
|
2024-10-18 23:12:35 +00:00
|
|
|
"encoding/binary"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
2024-02-12 23:06:57 +00:00
|
|
|
"log/slog"
|
2024-10-18 23:12:35 +00:00
|
|
|
"strings"
|
2024-02-12 23:06:57 +00:00
|
|
|
|
2024-03-26 20:04:17 +00:00
|
|
|
"github.com/ollama/ollama/api"
|
2024-06-17 17:38:55 +00:00
|
|
|
"github.com/ollama/ollama/llm"
|
2024-10-18 23:12:35 +00:00
|
|
|
"github.com/ollama/ollama/server/imageproc"
|
2024-06-10 21:54:42 +00:00
|
|
|
"github.com/ollama/ollama/template"
|
2024-02-12 23:06:57 +00:00
|
|
|
)
|
|
|
|
|
2024-06-20 18:00:08 +00:00
|
|
|
type tokenizeFunc func(context.Context, string) ([]int, error)
|
|
|
|
|
2024-10-18 23:12:35 +00:00
|
|
|
var errTooManyImages = errors.New("vision model only supports a single image per message")
|
|
|
|
|
2024-06-20 18:00:08 +00:00
|
|
|
// chatPrompt accepts a list of messages and returns the prompt and images that should be used for the next chat turn.
|
|
|
|
// chatPrompt truncates any messages that exceed the context window of the model, making sure to always include 1) the
|
|
|
|
// latest message and 2) system messages
|
2024-06-20 20:45:47 +00:00
|
|
|
func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.Options, msgs []api.Message, tools []api.Tool) (prompt string, images []llm.ImageData, _ error) {
|
2024-06-17 17:38:55 +00:00
|
|
|
var system []api.Message
|
2024-10-18 23:12:35 +00:00
|
|
|
|
|
|
|
isMllama := checkMllamaModelFamily(m)
|
|
|
|
|
2024-06-17 17:38:55 +00:00
|
|
|
n := len(msgs) - 1
|
2024-06-20 18:00:08 +00:00
|
|
|
// in reverse, find all messages that fit into context window
|
2024-10-18 23:12:35 +00:00
|
|
|
for i := n; i >= 0; i-- {
|
|
|
|
if isMllama && len(msgs[i].Images) > 1 {
|
|
|
|
return "", nil, errTooManyImages
|
|
|
|
}
|
|
|
|
|
|
|
|
// always include the last message
|
|
|
|
if i == n {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2024-07-13 04:04:44 +00:00
|
|
|
system = make([]api.Message, 0)
|
|
|
|
for j := range i {
|
|
|
|
if msgs[j].Role == "system" {
|
|
|
|
system = append(system, msgs[j])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-06-17 17:38:55 +00:00
|
|
|
var b bytes.Buffer
|
2024-06-20 20:45:47 +00:00
|
|
|
if err := m.Template.Execute(&b, template.Values{Messages: append(system, msgs[i:]...), Tools: tools}); err != nil {
|
2024-06-17 17:38:55 +00:00
|
|
|
return "", nil, err
|
2024-02-12 23:06:57 +00:00
|
|
|
}
|
|
|
|
|
2024-06-20 18:00:08 +00:00
|
|
|
s, err := tokenize(ctx, b.String())
|
2024-02-12 23:06:57 +00:00
|
|
|
if err != nil {
|
2024-06-17 17:38:55 +00:00
|
|
|
return "", nil, err
|
2024-02-12 23:06:57 +00:00
|
|
|
}
|
|
|
|
|
2024-10-18 23:12:35 +00:00
|
|
|
ctxLen := len(s)
|
2024-06-20 18:00:08 +00:00
|
|
|
if m.ProjectorPaths != nil {
|
2024-06-17 17:38:55 +00:00
|
|
|
for _, m := range msgs[i:] {
|
2024-06-20 18:00:08 +00:00
|
|
|
// images are represented as 768 sized embeddings
|
|
|
|
// TODO: get embedding length from project metadata
|
2024-10-18 23:12:35 +00:00
|
|
|
ctxLen += 768 * len(m.Images)
|
2024-06-17 17:38:55 +00:00
|
|
|
}
|
2024-02-12 23:06:57 +00:00
|
|
|
}
|
|
|
|
|
2024-10-18 23:12:35 +00:00
|
|
|
if ctxLen > opts.NumCtx {
|
2024-06-17 17:38:55 +00:00
|
|
|
slog.Debug("truncating input messages which exceed context length", "truncated", len(msgs[i:]))
|
2024-02-12 23:06:57 +00:00
|
|
|
break
|
2024-06-17 17:38:55 +00:00
|
|
|
} else {
|
|
|
|
n = i
|
2024-02-12 23:06:57 +00:00
|
|
|
}
|
2024-06-17 17:38:55 +00:00
|
|
|
}
|
2024-02-12 23:06:57 +00:00
|
|
|
|
2024-10-18 23:12:35 +00:00
|
|
|
currMsgIdx := n
|
|
|
|
|
|
|
|
if isMllama {
|
|
|
|
lastMsgIdx := len(msgs) - 1
|
|
|
|
for i := lastMsgIdx; i >= currMsgIdx; i-- {
|
|
|
|
if len(msgs[i].Images) > 0 {
|
|
|
|
data, aspectRatioID, err := imageproc.Preprocess(msgs[i].Images[0])
|
|
|
|
if err != nil {
|
|
|
|
return "", nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
buf := new(bytes.Buffer)
|
|
|
|
err = binary.Write(buf, binary.LittleEndian, data)
|
|
|
|
if err != nil {
|
|
|
|
return "", nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
imgData := llm.ImageData{
|
|
|
|
Data: buf.Bytes(),
|
|
|
|
AspectRatioID: aspectRatioID,
|
|
|
|
}
|
|
|
|
|
|
|
|
msgs[i].Content = strings.TrimSpace("<|image|>" + msgs[i].Content)
|
|
|
|
images = append(images, imgData)
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for cnt, msg := range msgs[currMsgIdx:] {
|
|
|
|
prefix := ""
|
|
|
|
prompt := msg.Content
|
|
|
|
for _, i := range msg.Images {
|
|
|
|
imgData := llm.ImageData{
|
|
|
|
ID: len(images),
|
|
|
|
Data: i,
|
|
|
|
}
|
|
|
|
|
|
|
|
imgTag := fmt.Sprintf("[img-%d]", imgData.ID)
|
|
|
|
if !strings.Contains(prompt, "[img]") {
|
|
|
|
prefix += imgTag
|
|
|
|
} else {
|
|
|
|
prompt = strings.Replace(prompt, "[img]", imgTag, 1)
|
|
|
|
}
|
|
|
|
|
|
|
|
images = append(images, imgData)
|
|
|
|
}
|
|
|
|
msgs[currMsgIdx+cnt].Content = strings.TrimSpace(prefix + " " + prompt)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-06-20 18:00:08 +00:00
|
|
|
// truncate any messages that do not fit into the context window
|
2024-06-17 17:38:55 +00:00
|
|
|
var b bytes.Buffer
|
2024-10-18 23:12:35 +00:00
|
|
|
if err := m.Template.Execute(&b, template.Values{Messages: append(system, msgs[currMsgIdx:]...), Tools: tools}); err != nil {
|
2024-06-17 17:38:55 +00:00
|
|
|
return "", nil, err
|
2024-02-12 23:06:57 +00:00
|
|
|
}
|
|
|
|
|
2024-10-18 23:12:35 +00:00
|
|
|
return b.String(), images, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func checkMllamaModelFamily(m *Model) bool {
|
|
|
|
for _, arch := range m.Config.ModelFamilies {
|
|
|
|
if arch == "mllama" {
|
|
|
|
return true
|
2024-02-12 23:06:57 +00:00
|
|
|
}
|
|
|
|
}
|
2024-10-18 23:12:35 +00:00
|
|
|
return false
|
2024-02-12 23:06:57 +00:00
|
|
|
}
|