ollama/server/prompt.go

149 lines
3.6 KiB
Go
Raw Normal View History

package server
import (
2024-06-17 10:38:55 -07:00
"bytes"
"context"
"encoding/binary"
"errors"
"fmt"
"log/slog"
"strings"
"github.com/ollama/ollama/api"
2024-06-17 10:38:55 -07:00
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/server/imageproc"
2024-06-10 14:54:42 -07:00
"github.com/ollama/ollama/template"
)
2024-06-20 11:00:08 -07:00
type tokenizeFunc func(context.Context, string) ([]int, error)
var errTooManyImages = errors.New("vision model only supports a single image per message")
2024-06-20 11:00:08 -07:00
// chatPrompt accepts a list of messages and returns the prompt and images that should be used for the next chat turn.
// chatPrompt truncates any messages that exceed the context window of the model, making sure to always include 1) the
// latest message and 2) system messages
2024-06-20 13:45:47 -07:00
func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.Options, msgs []api.Message, tools []api.Tool) (prompt string, images []llm.ImageData, _ error) {
2024-06-17 10:38:55 -07:00
var system []api.Message
isMllama := checkMllamaModelFamily(m)
var imageNumTokens int
// TODO: Ideally we would compute this from the projector metadata but some pieces are implementation dependent
if isMllama {
// Our mllama implementation packs all of the embeddings into a single token
imageNumTokens = 1
} else {
// Clip images are represented as 768 tokens, each an embedding
imageNumTokens = 768
}
2024-06-17 10:38:55 -07:00
n := len(msgs) - 1
2024-06-20 11:00:08 -07:00
// in reverse, find all messages that fit into context window
for i := n; i >= 0; i-- {
if isMllama && len(msgs[i].Images) > 1 {
return "", nil, errTooManyImages
}
// always include the last message
if i == n {
continue
}
system = make([]api.Message, 0)
for j := range i {
if msgs[j].Role == "system" {
system = append(system, msgs[j])
}
}
2024-06-17 10:38:55 -07:00
var b bytes.Buffer
2024-06-20 13:45:47 -07:00
if err := m.Template.Execute(&b, template.Values{Messages: append(system, msgs[i:]...), Tools: tools}); err != nil {
2024-06-17 10:38:55 -07:00
return "", nil, err
}
2024-06-20 11:00:08 -07:00
s, err := tokenize(ctx, b.String())
if err != nil {
2024-06-17 10:38:55 -07:00
return "", nil, err
}
ctxLen := len(s)
2024-06-20 11:00:08 -07:00
if m.ProjectorPaths != nil {
2024-06-17 10:38:55 -07:00
for _, m := range msgs[i:] {
ctxLen += imageNumTokens * len(m.Images)
2024-06-17 10:38:55 -07:00
}
}
if ctxLen > opts.NumCtx {
2024-06-17 10:38:55 -07:00
slog.Debug("truncating input messages which exceed context length", "truncated", len(msgs[i:]))
break
2024-06-17 10:38:55 -07:00
} else {
n = i
}
2024-06-17 10:38:55 -07:00
}
currMsgIdx := n
for cnt, msg := range msgs[currMsgIdx:] {
prefix := ""
imgPrompt := ""
prompt := msg.Content
for _, i := range msg.Images {
var imgData llm.ImageData
if isMllama {
data, aspectRatioID, err := imageproc.Preprocess(i)
if err != nil {
return "", nil, err
}
buf := new(bytes.Buffer)
err = binary.Write(buf, binary.LittleEndian, data)
if err != nil {
return "", nil, err
}
imgData = llm.ImageData{
ID: len(images),
Data: buf.Bytes(),
AspectRatioID: aspectRatioID,
}
imgPrompt = "<|image|>"
} else {
imgData = llm.ImageData{
ID: len(images),
Data: i,
}
imgPrompt = " "
}
imgTag := fmt.Sprintf("[img-%d]", imgData.ID)
if !strings.Contains(prompt, "[img]") {
prefix += imgTag
} else {
prompt = strings.Replace(prompt, "[img]", imgTag, 1)
}
images = append(images, imgData)
}
msgs[currMsgIdx+cnt].Content = strings.TrimSpace(prefix + imgPrompt + prompt)
}
2024-06-20 11:00:08 -07:00
// truncate any messages that do not fit into the context window
2024-06-17 10:38:55 -07:00
var b bytes.Buffer
if err := m.Template.Execute(&b, template.Values{Messages: append(system, msgs[currMsgIdx:]...), Tools: tools}); err != nil {
2024-06-17 10:38:55 -07:00
return "", nil, err
}
return b.String(), images, nil
}
func checkMllamaModelFamily(m *Model) bool {
for _, arch := range m.Config.ModelFamilies {
if arch == "mllama" {
return true
}
}
return false
}