ollama/parser/parser_test.go

579 lines
12 KiB
Go
Raw Normal View History

package parser
2024-01-05 14:04:31 -08:00
import (
2024-04-22 15:37:14 -07:00
"bytes"
"encoding/binary"
2024-04-22 15:37:14 -07:00
"fmt"
"io"
2024-01-05 14:04:31 -08:00
"strings"
"testing"
"unicode/utf16"
2024-01-05 14:04:31 -08:00
"github.com/stretchr/testify/assert"
2024-05-21 22:21:04 -07:00
"github.com/stretchr/testify/require"
2024-06-13 11:09:22 -07:00
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/unicode"
2024-01-05 14:04:31 -08:00
)
2024-04-30 10:55:19 -07:00
func TestParseFileFile(t *testing.T) {
2024-01-05 14:04:31 -08:00
input := `
FROM model1
ADAPTER adapter1
LICENSE MIT
PARAMETER param1 value1
PARAMETER param2 value2
TEMPLATE template1
`
reader := strings.NewReader(input)
2024-04-30 10:55:19 -07:00
modelfile, err := ParseFile(reader)
2024-05-21 22:21:04 -07:00
require.NoError(t, err)
2024-01-05 14:04:31 -08:00
expectedCommands := []Command{
{Name: "model", Args: "model1"},
{Name: "adapter", Args: "adapter1"},
{Name: "license", Args: "MIT"},
{Name: "param1", Args: "value1"},
{Name: "param2", Args: "value2"},
{Name: "template", Args: "template1"},
}
2024-04-30 10:55:19 -07:00
assert.Equal(t, expectedCommands, modelfile.Commands)
2024-01-05 14:04:31 -08:00
}
2024-04-30 10:55:19 -07:00
func TestParseFileFrom(t *testing.T) {
2024-04-24 16:12:56 -07:00
var cases = []struct {
input string
expected []Command
err error
}{
{
"FROM foo",
[]Command{{Name: "model", Args: "foo"}},
nil,
},
{
"FROM /path/to/model",
[]Command{{Name: "model", Args: "/path/to/model"}},
nil,
},
{
"FROM /path/to/model/fp16.bin",
[]Command{{Name: "model", Args: "/path/to/model/fp16.bin"}},
nil,
},
{
"FROM llama3:latest",
[]Command{{Name: "model", Args: "llama3:latest"}},
nil,
},
{
"FROM llama3:7b-instruct-q4_K_M",
[]Command{{Name: "model", Args: "llama3:7b-instruct-q4_K_M"}},
nil,
},
{
"", nil, errMissingFrom,
},
{
"PARAMETER param1 value1",
nil,
errMissingFrom,
},
{
"PARAMETER param1 value1\nFROM foo",
[]Command{{Name: "param1", Args: "value1"}, {Name: "model", Args: "foo"}},
nil,
},
}
2024-01-05 14:04:31 -08:00
2024-04-24 16:12:56 -07:00
for _, c := range cases {
t.Run("", func(t *testing.T) {
2024-04-30 10:55:19 -07:00
modelfile, err := ParseFile(strings.NewReader(c.input))
2024-05-21 22:21:04 -07:00
require.ErrorIs(t, err, c.err)
2024-04-30 10:55:19 -07:00
if modelfile != nil {
assert.Equal(t, c.expected, modelfile.Commands)
}
2024-04-24 16:12:56 -07:00
})
}
2024-01-05 14:04:31 -08:00
}
2024-04-30 10:55:19 -07:00
func TestParseFileParametersMissingValue(t *testing.T) {
2024-01-05 14:04:31 -08:00
input := `
FROM foo
PARAMETER param1
`
reader := strings.NewReader(input)
2024-04-30 10:55:19 -07:00
_, err := ParseFile(reader)
2024-05-21 22:21:04 -07:00
require.ErrorIs(t, err, io.ErrUnexpectedEOF)
2024-01-05 14:04:31 -08:00
}
2024-01-25 12:12:36 -08:00
2024-04-30 10:55:19 -07:00
func TestParseFileBadCommand(t *testing.T) {
2024-04-26 17:11:47 -07:00
input := `
FROM foo
BADCOMMAND param1 value1
`
2024-04-30 10:55:19 -07:00
_, err := ParseFile(strings.NewReader(input))
2024-05-21 22:21:04 -07:00
require.ErrorIs(t, err, errInvalidCommand)
2024-04-26 17:11:47 -07:00
}
2024-04-30 10:55:19 -07:00
func TestParseFileMessages(t *testing.T) {
2024-04-22 15:37:14 -07:00
var cases = []struct {
input string
expected []Command
err error
}{
{
`
FROM foo
2024-04-30 10:55:19 -07:00
MESSAGE system You are a file parser. Always parse things.
2024-04-22 15:37:14 -07:00
`,
[]Command{
{Name: "model", Args: "foo"},
2024-04-30 10:55:19 -07:00
{Name: "message", Args: "system: You are a file parser. Always parse things."},
2024-04-22 15:37:14 -07:00
},
nil,
},
{
`
2024-01-25 12:12:36 -08:00
FROM foo
2024-04-30 10:55:19 -07:00
MESSAGE system You are a file parser. Always parse things.`,
2024-04-24 19:17:26 -07:00
[]Command{
{Name: "model", Args: "foo"},
2024-04-30 10:55:19 -07:00
{Name: "message", Args: "system: You are a file parser. Always parse things."},
2024-04-24 19:17:26 -07:00
},
nil,
},
{
`
FROM foo
2024-04-30 10:55:19 -07:00
MESSAGE system You are a file parser. Always parse things.
2024-01-25 12:12:36 -08:00
MESSAGE user Hey there!
MESSAGE assistant Hello, I want to parse all the things!
2024-04-22 15:37:14 -07:00
`,
[]Command{
{Name: "model", Args: "foo"},
2024-04-30 10:55:19 -07:00
{Name: "message", Args: "system: You are a file parser. Always parse things."},
2024-04-22 15:37:14 -07:00
{Name: "message", Args: "user: Hey there!"},
{Name: "message", Args: "assistant: Hello, I want to parse all the things!"},
},
nil,
},
{
`
FROM foo
MESSAGE system """
2024-04-30 10:55:19 -07:00
You are a multiline file parser. Always parse things.
2024-04-22 15:37:14 -07:00
"""
`,
[]Command{
{Name: "model", Args: "foo"},
2024-04-30 10:55:19 -07:00
{Name: "message", Args: "system: \nYou are a multiline file parser. Always parse things.\n"},
2024-04-22 15:37:14 -07:00
},
nil,
},
{
`
FROM foo
MESSAGE badguy I'm a bad guy!
`,
nil,
2024-04-26 17:11:47 -07:00
errInvalidMessageRole,
2024-04-22 15:37:14 -07:00
},
{
`
FROM foo
MESSAGE system
`,
nil,
io.ErrUnexpectedEOF,
},
{
`
FROM foo
MESSAGE system`,
nil,
io.ErrUnexpectedEOF,
},
}
2024-01-25 12:12:36 -08:00
2024-04-22 15:37:14 -07:00
for _, c := range cases {
t.Run("", func(t *testing.T) {
2024-04-30 10:55:19 -07:00
modelfile, err := ParseFile(strings.NewReader(c.input))
2024-05-21 22:21:04 -07:00
require.ErrorIs(t, err, c.err)
2024-04-30 10:55:19 -07:00
if modelfile != nil {
assert.Equal(t, c.expected, modelfile.Commands)
}
2024-04-22 15:37:14 -07:00
})
}
}
2024-01-25 12:12:36 -08:00
2024-04-30 10:55:19 -07:00
func TestParseFileQuoted(t *testing.T) {
2024-04-22 15:37:14 -07:00
var cases = []struct {
multiline string
expected []Command
err error
}{
{
`
FROM foo
2024-04-24 19:17:26 -07:00
SYSTEM """
2024-04-22 15:37:14 -07:00
This is a
2024-04-24 19:17:26 -07:00
multiline system.
2024-04-22 15:37:14 -07:00
"""
`,
[]Command{
{Name: "model", Args: "foo"},
2024-04-24 19:17:26 -07:00
{Name: "system", Args: "\nThis is a\nmultiline system.\n"},
2024-04-22 15:37:14 -07:00
},
nil,
},
{
`
FROM foo
2024-04-24 19:17:26 -07:00
SYSTEM """
2024-04-22 15:37:14 -07:00
This is a
2024-04-24 19:17:26 -07:00
multiline system."""
2024-04-22 15:37:14 -07:00
`,
[]Command{
{Name: "model", Args: "foo"},
2024-04-24 19:17:26 -07:00
{Name: "system", Args: "\nThis is a\nmultiline system."},
2024-04-22 15:37:14 -07:00
},
nil,
},
{
`
FROM foo
2024-04-24 19:17:26 -07:00
SYSTEM """This is a
multiline system."""
2024-04-22 15:37:14 -07:00
`,
[]Command{
{Name: "model", Args: "foo"},
2024-04-24 19:17:26 -07:00
{Name: "system", Args: "This is a\nmultiline system."},
2024-04-22 15:37:14 -07:00
},
nil,
},
{
`
FROM foo
2024-04-24 19:17:26 -07:00
SYSTEM """This is a multiline system."""
2024-04-22 15:37:14 -07:00
`,
[]Command{
{Name: "model", Args: "foo"},
2024-04-24 19:17:26 -07:00
{Name: "system", Args: "This is a multiline system."},
2024-04-22 15:37:14 -07:00
},
nil,
},
{
`
FROM foo
2024-04-24 19:17:26 -07:00
SYSTEM """This is a multiline system.""
2024-04-22 15:37:14 -07:00
`,
nil,
io.ErrUnexpectedEOF,
},
{
`
FROM foo
2024-04-24 19:17:26 -07:00
SYSTEM "
2024-04-22 15:37:14 -07:00
`,
nil,
io.ErrUnexpectedEOF,
},
{
`
FROM foo
2024-04-24 19:17:26 -07:00
SYSTEM """
This is a multiline system with "quotes".
2024-04-22 15:37:14 -07:00
"""
`,
[]Command{
{Name: "model", Args: "foo"},
2024-04-24 19:17:26 -07:00
{Name: "system", Args: "\nThis is a multiline system with \"quotes\".\n"},
2024-04-22 15:37:14 -07:00
},
nil,
},
{
`
FROM foo
2024-04-24 19:17:26 -07:00
SYSTEM """"""
2024-04-22 15:37:14 -07:00
`,
[]Command{
{Name: "model", Args: "foo"},
2024-04-24 19:17:26 -07:00
{Name: "system", Args: ""},
2024-04-22 15:37:14 -07:00
},
nil,
},
{
`
FROM foo
2024-04-24 19:17:26 -07:00
SYSTEM ""
2024-04-22 15:37:14 -07:00
`,
[]Command{
{Name: "model", Args: "foo"},
2024-04-24 19:17:26 -07:00
{Name: "system", Args: ""},
2024-04-22 15:37:14 -07:00
},
nil,
},
{
`
FROM foo
2024-04-24 19:17:26 -07:00
SYSTEM "'"
2024-04-22 15:37:14 -07:00
`,
[]Command{
{Name: "model", Args: "foo"},
2024-04-24 19:17:26 -07:00
{Name: "system", Args: "'"},
2024-04-22 15:37:14 -07:00
},
nil,
},
2024-04-24 16:12:56 -07:00
{
`
FROM foo
2024-04-24 19:17:26 -07:00
SYSTEM """''"'""'""'"'''''""'""'"""
2024-04-24 16:12:56 -07:00
`,
[]Command{
{Name: "model", Args: "foo"},
2024-04-24 19:17:26 -07:00
{Name: "system", Args: `''"'""'""'"'''''""'""'`},
},
nil,
},
{
`
FROM foo
TEMPLATE """
{{ .Prompt }}
"""`,
[]Command{
{Name: "model", Args: "foo"},
{Name: "template", Args: "\n{{ .Prompt }}\n"},
2024-04-24 16:12:56 -07:00
},
nil,
},
2024-01-25 12:12:36 -08:00
}
2024-04-22 15:37:14 -07:00
for _, c := range cases {
t.Run("", func(t *testing.T) {
2024-04-30 10:55:19 -07:00
modelfile, err := ParseFile(strings.NewReader(c.multiline))
2024-05-21 22:21:04 -07:00
require.ErrorIs(t, err, c.err)
2024-04-30 10:55:19 -07:00
if modelfile != nil {
assert.Equal(t, c.expected, modelfile.Commands)
}
2024-04-22 15:37:14 -07:00
})
}
2024-01-25 12:12:36 -08:00
}
2024-04-30 10:55:19 -07:00
func TestParseFileParameters(t *testing.T) {
2024-04-24 16:12:56 -07:00
var cases = map[string]struct {
name, value string
}{
"numa true": {"numa", "true"},
"num_ctx 1": {"num_ctx", "1"},
"num_batch 1": {"num_batch", "1"},
"num_gqa 1": {"num_gqa", "1"},
"num_gpu 1": {"num_gpu", "1"},
"main_gpu 1": {"main_gpu", "1"},
"low_vram true": {"low_vram", "true"},
"f16_kv true": {"f16_kv", "true"},
"logits_all true": {"logits_all", "true"},
"vocab_only true": {"vocab_only", "true"},
"use_mmap true": {"use_mmap", "true"},
"use_mlock true": {"use_mlock", "true"},
"num_thread 1": {"num_thread", "1"},
"num_keep 1": {"num_keep", "1"},
"seed 1": {"seed", "1"},
"num_predict 1": {"num_predict", "1"},
"top_k 1": {"top_k", "1"},
"top_p 1.0": {"top_p", "1.0"},
"tfs_z 1.0": {"tfs_z", "1.0"},
"typical_p 1.0": {"typical_p", "1.0"},
"repeat_last_n 1": {"repeat_last_n", "1"},
"temperature 1.0": {"temperature", "1.0"},
"repeat_penalty 1.0": {"repeat_penalty", "1.0"},
"presence_penalty 1.0": {"presence_penalty", "1.0"},
"frequency_penalty 1.0": {"frequency_penalty", "1.0"},
"mirostat 1": {"mirostat", "1"},
"mirostat_tau 1.0": {"mirostat_tau", "1.0"},
"mirostat_eta 1.0": {"mirostat_eta", "1.0"},
"penalize_newline true": {"penalize_newline", "true"},
"stop ### User:": {"stop", "### User:"},
"stop ### User: ": {"stop", "### User: "},
"stop \"### User:\"": {"stop", "### User:"},
"stop \"### User: \"": {"stop", "### User: "},
"stop \"\"\"### User:\"\"\"": {"stop", "### User:"},
"stop \"\"\"### User:\n\"\"\"": {"stop", "### User:\n"},
"stop <|endoftext|>": {"stop", "<|endoftext|>"},
"stop <|eot_id|>": {"stop", "<|eot_id|>"},
"stop </s>": {"stop", "</s>"},
2024-04-22 15:37:14 -07:00
}
2024-01-25 12:12:36 -08:00
2024-04-24 16:12:56 -07:00
for k, v := range cases {
t.Run(k, func(t *testing.T) {
2024-04-22 15:37:14 -07:00
var b bytes.Buffer
fmt.Fprintln(&b, "FROM foo")
2024-04-24 16:12:56 -07:00
fmt.Fprintln(&b, "PARAMETER", k)
2024-04-30 10:55:19 -07:00
modelfile, err := ParseFile(&b)
2024-05-21 22:21:04 -07:00
require.NoError(t, err)
2024-04-24 16:12:56 -07:00
assert.Equal(t, []Command{
{Name: "model", Args: "foo"},
{Name: v.name, Args: v.value},
2024-04-30 10:55:19 -07:00
}, modelfile.Commands)
2024-04-22 15:37:14 -07:00
})
}
}
2024-04-30 10:55:19 -07:00
func TestParseFileComments(t *testing.T) {
2024-04-22 15:37:14 -07:00
var cases = []struct {
input string
expected []Command
}{
{
`
# comment
2024-01-25 12:12:36 -08:00
FROM foo
2024-04-22 15:37:14 -07:00
`,
[]Command{
{Name: "model", Args: "foo"},
},
},
}
2024-01-25 12:12:36 -08:00
2024-04-22 15:37:14 -07:00
for _, c := range cases {
t.Run("", func(t *testing.T) {
2024-04-30 10:55:19 -07:00
modelfile, err := ParseFile(strings.NewReader(c.input))
2024-05-21 22:21:04 -07:00
require.NoError(t, err)
2024-04-30 10:55:19 -07:00
assert.Equal(t, c.expected, modelfile.Commands)
2024-04-22 15:37:14 -07:00
})
}
2024-01-25 12:12:36 -08:00
}
2024-04-24 18:49:14 -07:00
2024-04-30 10:55:19 -07:00
func TestParseFileFormatParseFile(t *testing.T) {
2024-04-24 18:49:14 -07:00
var cases = []string{
`
FROM foo
ADAPTER adapter1
LICENSE MIT
PARAMETER param1 value1
PARAMETER param2 value2
TEMPLATE template1
2024-04-30 10:55:19 -07:00
MESSAGE system You are a file parser. Always parse things.
2024-04-24 18:49:14 -07:00
MESSAGE user Hey there!
MESSAGE assistant Hello, I want to parse all the things!
`,
`
FROM foo
ADAPTER adapter1
LICENSE MIT
PARAMETER param1 value1
PARAMETER param2 value2
TEMPLATE template1
MESSAGE system """
You are a store greeter. Always responsed with "Hello!".
"""
MESSAGE user Hey there!
MESSAGE assistant Hello, I want to parse all the things!
`,
`
FROM foo
ADAPTER adapter1
LICENSE """
Very long and boring legal text.
Blah blah blah.
"Oh look, a quote!"
"""
PARAMETER param1 value1
PARAMETER param2 value2
TEMPLATE template1
MESSAGE system """
You are a store greeter. Always responsed with "Hello!".
"""
MESSAGE user Hey there!
MESSAGE assistant Hello, I want to parse all the things!
`,
`
FROM foo
SYSTEM ""
2024-04-24 18:49:14 -07:00
`,
}
for _, c := range cases {
t.Run("", func(t *testing.T) {
2024-04-30 10:55:19 -07:00
modelfile, err := ParseFile(strings.NewReader(c))
2024-05-21 22:21:04 -07:00
require.NoError(t, err)
2024-04-24 18:49:14 -07:00
2024-04-30 10:55:19 -07:00
modelfile2, err := ParseFile(strings.NewReader(modelfile.String()))
2024-05-21 22:21:04 -07:00
require.NoError(t, err)
2024-04-24 18:49:14 -07:00
2024-04-30 10:55:19 -07:00
assert.Equal(t, modelfile, modelfile2)
2024-04-24 18:49:14 -07:00
})
}
}
func TestParseFileUTF16ParseFile(t *testing.T) {
data := `FROM bob
PARAMETER param1 1
PARAMETER param2 4096
SYSTEM You are a utf16 file.
`
expected := []Command{
{Name: "model", Args: "bob"},
{Name: "param1", Args: "1"},
{Name: "param2", Args: "4096"},
{Name: "system", Args: "You are a utf16 file."},
}
2024-06-13 11:09:22 -07:00
t.Run("le", func(t *testing.T) {
var b bytes.Buffer
require.NoError(t, binary.Write(&b, binary.LittleEndian, []byte{0xff, 0xfe}))
require.NoError(t, binary.Write(&b, binary.LittleEndian, utf16.Encode([]rune(data))))
2024-06-13 11:09:22 -07:00
actual, err := ParseFile(&b)
require.NoError(t, err)
2024-06-13 11:09:22 -07:00
assert.Equal(t, expected, actual.Commands)
})
t.Run("be", func(t *testing.T) {
var b bytes.Buffer
require.NoError(t, binary.Write(&b, binary.BigEndian, []byte{0xfe, 0xff}))
require.NoError(t, binary.Write(&b, binary.BigEndian, utf16.Encode([]rune(data))))
actual, err := ParseFile(&b)
require.NoError(t, err)
assert.Equal(t, expected, actual.Commands)
})
}
func TestParseMultiByte(t *testing.T) {
input := `FROM test
SYSTEM 你好👋`
expect := []Command{
{Name: "model", Args: "test"},
{Name: "system", Args: "你好👋"},
}
encodings := []encoding.Encoding{
unicode.UTF8,
unicode.UTF16(unicode.LittleEndian, unicode.UseBOM),
unicode.UTF16(unicode.BigEndian, unicode.UseBOM),
}
for _, encoding := range encodings {
t.Run(fmt.Sprintf("%s", encoding), func(t *testing.T) {
s, err := encoding.NewEncoder().String(input)
require.NoError(t, err)
actual, err := ParseFile(strings.NewReader(s))
require.NoError(t, err)
assert.Equal(t, expect, actual.Commands)
})
}
}