ollama/server/prompt.go

package server

import (
	"bytes"
	"context"
	"encoding/binary"
	"errors"
	"fmt"
	"log/slog"
	"strings"

	"github.com/ollama/ollama/api"
	"github.com/ollama/ollama/llm"
	"github.com/ollama/ollama/server/imageproc"
	"github.com/ollama/ollama/template"
)

type tokenizeFunc func(context.Context, string) ([]int, error)

var errTooManyImages = errors.New("vision model only supports a single image per message")

// chatPrompt accepts a list of messages and returns the prompt and images that should be used for the next chat turn.
// chatPrompt truncates any messages that exceed the context window of the model, making sure to always include 1) the
// latest message and 2) system messages
func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.Options, msgs []api.Message, tools []api.Tool) (prompt string, images []llm.ImageData, _ error) {
	var system []api.Message

	isMllama := checkMllamaModelFamily(m)

	n := len(msgs) - 1
	// in reverse, find all messages that fit into context window
	for i := n; i >= 0; i-- {
		if isMllama && len(msgs[i].Images) > 1 {
			return "", nil, errTooManyImages
		}

		// always include the last message
		if i == n {
			continue
		}

		system = make([]api.Message, 0)
		for j := range i {
			if msgs[j].Role == "system" {
				system = append(system, msgs[j])
			}
		}

		var b bytes.Buffer
		if err := m.Template.Execute(&b, template.Values{Messages: append(system, msgs[i:]...), Tools: tools}); err != nil {
			return "", nil, err
		}

		s, err := tokenize(ctx, b.String())
		if err != nil {
			return "", nil, err
		}

		ctxLen := len(s)
		if m.ProjectorPaths != nil {
			for _, m := range msgs[i:] {
				// images are represented as 768 sized embeddings
				// TODO: get embedding length from project metadata
				ctxLen += 768 * len(m.Images)
			}
		}

		if ctxLen > opts.NumCtx {
			slog.Debug("truncating input messages which exceed context length", "truncated", len(msgs[i:]))
			break
		} else {
			n = i
		}
	}

	currMsgIdx := n

	if isMllama {
		lastMsgIdx := len(msgs) - 1
		for i := lastMsgIdx; i >= currMsgIdx; i-- {
			if len(msgs[i].Images) > 0 {
				data, aspectRatioID, err := imageproc.Preprocess(msgs[i].Images[0])
				if err != nil {
					return "", nil, err
				}

				buf := new(bytes.Buffer)
				err = binary.Write(buf, binary.LittleEndian, data)
				if err != nil {
					return "", nil, err
				}

				imgData := llm.ImageData{
					Data:          buf.Bytes(),
					AspectRatioID: aspectRatioID,
				}

				msgs[i].Content = strings.TrimSpace("<|image|>" + msgs[i].Content)
				images = append(images, imgData)
				break
			}
		}
	} else {
		for cnt, msg := range msgs[currMsgIdx:] {
			prefix := ""
			prompt := msg.Content
			for _, i := range msg.Images {
				imgData := llm.ImageData{
					ID:   len(images),
					Data: i,
				}

				imgTag := fmt.Sprintf("[img-%d]", imgData.ID)
				if !strings.Contains(prompt, "[img]") {
					prefix += imgTag
				} else {
					prompt = strings.Replace(prompt, "[img]", imgTag, 1)
				}

				images = append(images, imgData)
			}
			msgs[currMsgIdx+cnt].Content = strings.TrimSpace(prefix + " " + prompt)
		}
	}

	// truncate any messages that do not fit into the context window
	var b bytes.Buffer
	if err := m.Template.Execute(&b, template.Values{Messages: append(system, msgs[currMsgIdx:]...), Tools: tools}); err != nil {
		return "", nil, err
	}

	return b.String(), images, nil
}

func checkMllamaModelFamily(m *Model) bool {
	for _, arch := range m.Config.ModelFamilies {
		if arch == "mllama" {
			return true
		}
	}
	return false
}
Fix issues with templating prompt in chat mode (#2460) 2024-02-12 23:06:57 +00:00			`package server`

			`import (`
update message processing 2024-06-17 17:38:55 +00:00			`"bytes"`
			`"context"`
image processing for llama3.2 (#6963) Co-authored-by: jmorganca <jmorganca@gmail.com> Co-authored-by: Michael Yang <mxyng@pm.me> Co-authored-by: Jesse Gross <jesse@ollama.com> 2024-10-18 23:12:35 +00:00			`"encoding/binary"`
			`"errors"`
			`"fmt"`
Fix issues with templating prompt in chat mode (#2460) 2024-02-12 23:06:57 +00:00			`"log/slog"`
image processing for llama3.2 (#6963) Co-authored-by: jmorganca <jmorganca@gmail.com> Co-authored-by: Michael Yang <mxyng@pm.me> Co-authored-by: Jesse Gross <jesse@ollama.com> 2024-10-18 23:12:35 +00:00			`"strings"`
Fix issues with templating prompt in chat mode (#2460) 2024-02-12 23:06:57 +00:00
change `github.com/jmorganca/ollama` to `github.com/ollama/ollama` (#3347) 2024-03-26 20:04:17 +00:00			`"github.com/ollama/ollama/api"`
update message processing 2024-06-17 17:38:55 +00:00			`"github.com/ollama/ollama/llm"`
image processing for llama3.2 (#6963) Co-authored-by: jmorganca <jmorganca@gmail.com> Co-authored-by: Michael Yang <mxyng@pm.me> Co-authored-by: Jesse Gross <jesse@ollama.com> 2024-10-18 23:12:35 +00:00			`"github.com/ollama/ollama/server/imageproc"`
rename templates to template 2024-06-10 21:54:42 +00:00			`"github.com/ollama/ollama/template"`
Fix issues with templating prompt in chat mode (#2460) 2024-02-12 23:06:57 +00:00			`)`

comments 2024-06-20 18:00:08 +00:00			`type tokenizeFunc func(context.Context, string) ([]int, error)`

image processing for llama3.2 (#6963) Co-authored-by: jmorganca <jmorganca@gmail.com> Co-authored-by: Michael Yang <mxyng@pm.me> Co-authored-by: Jesse Gross <jesse@ollama.com> 2024-10-18 23:12:35 +00:00			`var errTooManyImages = errors.New("vision model only supports a single image per message")`

comments 2024-06-20 18:00:08 +00:00			`// chatPrompt accepts a list of messages and returns the prompt and images that should be used for the next chat turn.`
			`// chatPrompt truncates any messages that exceed the context window of the model, making sure to always include 1) the`
			`// latest message and 2) system messages`
tools 2024-06-20 20:45:47 +00:00			`func chatPrompt(ctx context.Context, m Model, tokenize tokenizeFunc, opts api.Options, msgs []api.Message, tools []api.Tool) (prompt string, images []llm.ImageData, _ error) {`
update message processing 2024-06-17 17:38:55 +00:00			`var system []api.Message`
image processing for llama3.2 (#6963) Co-authored-by: jmorganca <jmorganca@gmail.com> Co-authored-by: Michael Yang <mxyng@pm.me> Co-authored-by: Jesse Gross <jesse@ollama.com> 2024-10-18 23:12:35 +00:00
			`isMllama := checkMllamaModelFamily(m)`

update message processing 2024-06-17 17:38:55 +00:00			`n := len(msgs) - 1`
comments 2024-06-20 18:00:08 +00:00			`// in reverse, find all messages that fit into context window`
image processing for llama3.2 (#6963) Co-authored-by: jmorganca <jmorganca@gmail.com> Co-authored-by: Michael Yang <mxyng@pm.me> Co-authored-by: Jesse Gross <jesse@ollama.com> 2024-10-18 23:12:35 +00:00			`for i := n; i >= 0; i-- {`
			`if isMllama && len(msgs[i].Images) > 1 {`
			`return "", nil, errTooManyImages`
			`}`

			`// always include the last message`
			`if i == n {`
			`continue`
			`}`

fix system prompt (#5662) * fix system prompt * execute template when hitting previous roles * fix tests --------- Co-authored-by: jmorganca <jmorganca@gmail.com> 2024-07-13 04:04:44 +00:00			`system = make([]api.Message, 0)`
			`for j := range i {`
			`if msgs[j].Role == "system" {`
			`system = append(system, msgs[j])`
			`}`
			`}`

update message processing 2024-06-17 17:38:55 +00:00			`var b bytes.Buffer`
tools 2024-06-20 20:45:47 +00:00			`if err := m.Template.Execute(&b, template.Values{Messages: append(system, msgs[i:]...), Tools: tools}); err != nil {`
update message processing 2024-06-17 17:38:55 +00:00			`return "", nil, err`
Fix issues with templating prompt in chat mode (#2460) 2024-02-12 23:06:57 +00:00			`}`

comments 2024-06-20 18:00:08 +00:00			`s, err := tokenize(ctx, b.String())`
Fix issues with templating prompt in chat mode (#2460) 2024-02-12 23:06:57 +00:00			`if err != nil {`
update message processing 2024-06-17 17:38:55 +00:00			`return "", nil, err`
Fix issues with templating prompt in chat mode (#2460) 2024-02-12 23:06:57 +00:00			`}`

image processing for llama3.2 (#6963) Co-authored-by: jmorganca <jmorganca@gmail.com> Co-authored-by: Michael Yang <mxyng@pm.me> Co-authored-by: Jesse Gross <jesse@ollama.com> 2024-10-18 23:12:35 +00:00			`ctxLen := len(s)`
comments 2024-06-20 18:00:08 +00:00			`if m.ProjectorPaths != nil {`
update message processing 2024-06-17 17:38:55 +00:00			`for _, m := range msgs[i:] {`
comments 2024-06-20 18:00:08 +00:00			`// images are represented as 768 sized embeddings`
			`// TODO: get embedding length from project metadata`
image processing for llama3.2 (#6963) Co-authored-by: jmorganca <jmorganca@gmail.com> Co-authored-by: Michael Yang <mxyng@pm.me> Co-authored-by: Jesse Gross <jesse@ollama.com> 2024-10-18 23:12:35 +00:00			`ctxLen += 768 * len(m.Images)`
update message processing 2024-06-17 17:38:55 +00:00			`}`
Fix issues with templating prompt in chat mode (#2460) 2024-02-12 23:06:57 +00:00			`}`

image processing for llama3.2 (#6963) Co-authored-by: jmorganca <jmorganca@gmail.com> Co-authored-by: Michael Yang <mxyng@pm.me> Co-authored-by: Jesse Gross <jesse@ollama.com> 2024-10-18 23:12:35 +00:00			`if ctxLen > opts.NumCtx {`
update message processing 2024-06-17 17:38:55 +00:00			`slog.Debug("truncating input messages which exceed context length", "truncated", len(msgs[i:]))`
Fix issues with templating prompt in chat mode (#2460) 2024-02-12 23:06:57 +00:00			`break`
update message processing 2024-06-17 17:38:55 +00:00			`} else {`
			`n = i`
Fix issues with templating prompt in chat mode (#2460) 2024-02-12 23:06:57 +00:00			`}`
update message processing 2024-06-17 17:38:55 +00:00			`}`
Fix issues with templating prompt in chat mode (#2460) 2024-02-12 23:06:57 +00:00
image processing for llama3.2 (#6963) Co-authored-by: jmorganca <jmorganca@gmail.com> Co-authored-by: Michael Yang <mxyng@pm.me> Co-authored-by: Jesse Gross <jesse@ollama.com> 2024-10-18 23:12:35 +00:00			`currMsgIdx := n`

			`if isMllama {`
			`lastMsgIdx := len(msgs) - 1`
			`for i := lastMsgIdx; i >= currMsgIdx; i-- {`
			`if len(msgs[i].Images) > 0 {`
			`data, aspectRatioID, err := imageproc.Preprocess(msgs[i].Images[0])`
			`if err != nil {`
			`return "", nil, err`
			`}`

			`buf := new(bytes.Buffer)`
			`err = binary.Write(buf, binary.LittleEndian, data)`
			`if err != nil {`
			`return "", nil, err`
			`}`

			`imgData := llm.ImageData{`
			`Data: buf.Bytes(),`
			`AspectRatioID: aspectRatioID,`
			`}`

			`msgs[i].Content = strings.TrimSpace("<\|image\|>" + msgs[i].Content)`
			`images = append(images, imgData)`
			`break`
			`}`
			`}`
			`} else {`
			`for cnt, msg := range msgs[currMsgIdx:] {`
			`prefix := ""`
			`prompt := msg.Content`
			`for _, i := range msg.Images {`
			`imgData := llm.ImageData{`
			`ID: len(images),`
			`Data: i,`
			`}`

			`imgTag := fmt.Sprintf("[img-%d]", imgData.ID)`
			`if !strings.Contains(prompt, "[img]") {`
			`prefix += imgTag`
			`} else {`
			`prompt = strings.Replace(prompt, "[img]", imgTag, 1)`
			`}`

			`images = append(images, imgData)`
			`}`
			`msgs[currMsgIdx+cnt].Content = strings.TrimSpace(prefix + " " + prompt)`
			`}`
			`}`

comments 2024-06-20 18:00:08 +00:00			`// truncate any messages that do not fit into the context window`
update message processing 2024-06-17 17:38:55 +00:00			`var b bytes.Buffer`
image processing for llama3.2 (#6963) Co-authored-by: jmorganca <jmorganca@gmail.com> Co-authored-by: Michael Yang <mxyng@pm.me> Co-authored-by: Jesse Gross <jesse@ollama.com> 2024-10-18 23:12:35 +00:00			`if err := m.Template.Execute(&b, template.Values{Messages: append(system, msgs[currMsgIdx:]...), Tools: tools}); err != nil {`
update message processing 2024-06-17 17:38:55 +00:00			`return "", nil, err`
Fix issues with templating prompt in chat mode (#2460) 2024-02-12 23:06:57 +00:00			`}`

image processing for llama3.2 (#6963) Co-authored-by: jmorganca <jmorganca@gmail.com> Co-authored-by: Michael Yang <mxyng@pm.me> Co-authored-by: Jesse Gross <jesse@ollama.com> 2024-10-18 23:12:35 +00:00			`return b.String(), images, nil`
			`}`

			`func checkMllamaModelFamily(m *Model) bool {`
			`for _, arch := range m.Config.ModelFamilies {`
			`if arch == "mllama" {`
			`return true`
Fix issues with templating prompt in chat mode (#2460) 2024-02-12 23:06:57 +00:00			`}`
			`}`
image processing for llama3.2 (#6963) Co-authored-by: jmorganca <jmorganca@gmail.com> Co-authored-by: Michael Yang <mxyng@pm.me> Co-authored-by: Jesse Gross <jesse@ollama.com> 2024-10-18 23:12:35 +00:00			`return false`
Fix issues with templating prompt in chat mode (#2460) 2024-02-12 23:06:57 +00:00			`}`