2023-07-17 00:02:22 +00:00
|
|
|
package parser
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
2023-07-25 17:22:23 +00:00
|
|
|
"bytes"
|
2023-07-17 21:21:27 +00:00
|
|
|
"errors"
|
2023-07-27 16:55:48 +00:00
|
|
|
"fmt"
|
2023-07-17 00:02:22 +00:00
|
|
|
"io"
|
2024-04-22 22:37:14 +00:00
|
|
|
"strconv"
|
|
|
|
"strings"
|
2023-07-17 00:02:22 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
type Command struct {
|
|
|
|
Name string
|
2023-07-17 21:21:27 +00:00
|
|
|
Args string
|
|
|
|
}
|
|
|
|
|
2024-04-22 22:37:14 +00:00
|
|
|
type state int
|
2023-07-17 00:02:22 +00:00
|
|
|
|
2024-04-22 22:37:14 +00:00
|
|
|
const (
|
|
|
|
stateNil state = iota
|
|
|
|
stateName
|
|
|
|
stateValue
|
|
|
|
stateParameter
|
|
|
|
stateMessage
|
|
|
|
stateComment
|
|
|
|
)
|
2023-07-17 00:02:22 +00:00
|
|
|
|
2024-04-24 23:12:56 +00:00
|
|
|
var (
|
2024-04-27 00:11:47 +00:00
|
|
|
errMissingFrom = errors.New("no FROM line")
|
|
|
|
errInvalidMessageRole = errors.New("message role must be one of \"system\", \"user\", or \"assistant\"")
|
|
|
|
errInvalidCommand = errors.New("command must be one of \"from\", \"license\", \"template\", \"system\", \"adapter\", \"parameter\", or \"message\"")
|
2024-04-24 23:12:56 +00:00
|
|
|
)
|
2024-04-22 22:37:14 +00:00
|
|
|
|
2024-04-25 01:49:14 +00:00
|
|
|
func Format(cmds []Command) string {
|
2024-04-26 23:59:31 +00:00
|
|
|
var sb strings.Builder
|
2024-04-25 01:49:14 +00:00
|
|
|
for _, cmd := range cmds {
|
|
|
|
name := cmd.Name
|
|
|
|
args := cmd.Args
|
|
|
|
|
|
|
|
switch cmd.Name {
|
|
|
|
case "model":
|
|
|
|
name = "from"
|
|
|
|
args = cmd.Args
|
|
|
|
case "license", "template", "system", "adapter":
|
|
|
|
args = quote(args)
|
|
|
|
case "message":
|
|
|
|
role, message, _ := strings.Cut(cmd.Args, ": ")
|
|
|
|
args = role + " " + quote(message)
|
|
|
|
default:
|
|
|
|
name = "parameter"
|
2024-04-26 23:59:31 +00:00
|
|
|
args = cmd.Name + " " + quote(cmd.Args)
|
2024-04-25 01:49:14 +00:00
|
|
|
}
|
|
|
|
|
2024-04-26 23:59:31 +00:00
|
|
|
fmt.Fprintln(&sb, strings.ToUpper(name), args)
|
2024-04-25 01:49:14 +00:00
|
|
|
}
|
|
|
|
|
2024-04-26 23:59:31 +00:00
|
|
|
return sb.String()
|
2024-04-25 01:49:14 +00:00
|
|
|
}
|
|
|
|
|
2024-04-22 22:37:14 +00:00
|
|
|
func Parse(r io.Reader) (cmds []Command, err error) {
|
|
|
|
var cmd Command
|
|
|
|
var curr state
|
|
|
|
var b bytes.Buffer
|
|
|
|
var role string
|
|
|
|
|
|
|
|
br := bufio.NewReader(r)
|
|
|
|
for {
|
|
|
|
r, _, err := br.ReadRune()
|
|
|
|
if errors.Is(err, io.EOF) {
|
|
|
|
break
|
|
|
|
} else if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2023-07-17 21:21:27 +00:00
|
|
|
|
2024-04-22 22:37:14 +00:00
|
|
|
next, r, err := parseRuneForState(r, curr)
|
|
|
|
if errors.Is(err, io.ErrUnexpectedEOF) {
|
|
|
|
return nil, fmt.Errorf("%w: %s", err, b.String())
|
|
|
|
} else if err != nil {
|
|
|
|
return nil, err
|
2023-07-17 00:02:22 +00:00
|
|
|
}
|
|
|
|
|
2024-04-26 22:13:27 +00:00
|
|
|
// process the state transition, some transitions need to be intercepted and redirected
|
2024-04-22 22:37:14 +00:00
|
|
|
if next != curr {
|
|
|
|
switch curr {
|
2024-04-27 00:11:47 +00:00
|
|
|
case stateName:
|
|
|
|
if !isValidCommand(b.String()) {
|
|
|
|
return nil, errInvalidCommand
|
|
|
|
}
|
|
|
|
|
2024-04-26 22:13:27 +00:00
|
|
|
// next state sometimes depends on the current buffer value
|
2024-04-22 22:37:14 +00:00
|
|
|
switch s := strings.ToLower(b.String()); s {
|
|
|
|
case "from":
|
|
|
|
cmd.Name = "model"
|
|
|
|
case "parameter":
|
2024-04-26 22:13:27 +00:00
|
|
|
// transition to stateParameter which sets command name
|
2024-04-22 22:37:14 +00:00
|
|
|
next = stateParameter
|
|
|
|
case "message":
|
2024-04-26 22:13:27 +00:00
|
|
|
// transition to stateMessage which validates the message role
|
2024-04-22 22:37:14 +00:00
|
|
|
next = stateMessage
|
|
|
|
fallthrough
|
|
|
|
default:
|
|
|
|
cmd.Name = s
|
|
|
|
}
|
2024-04-27 00:11:47 +00:00
|
|
|
case stateParameter:
|
|
|
|
cmd.Name = b.String()
|
2024-04-22 22:37:14 +00:00
|
|
|
case stateMessage:
|
2024-04-26 22:13:27 +00:00
|
|
|
if !isValidMessageRole(b.String()) {
|
2024-04-27 00:11:47 +00:00
|
|
|
return nil, errInvalidMessageRole
|
2024-04-22 22:37:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
role = b.String()
|
|
|
|
case stateComment, stateNil:
|
|
|
|
// pass
|
|
|
|
case stateValue:
|
|
|
|
s, ok := unquote(b.String())
|
|
|
|
if !ok || isSpace(r) {
|
|
|
|
if _, err := b.WriteRune(r); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if role != "" {
|
|
|
|
s = role + ": " + s
|
|
|
|
role = ""
|
|
|
|
}
|
|
|
|
|
|
|
|
cmd.Args = s
|
|
|
|
cmds = append(cmds, cmd)
|
2023-08-10 23:09:02 +00:00
|
|
|
}
|
|
|
|
|
2024-04-22 22:37:14 +00:00
|
|
|
b.Reset()
|
|
|
|
curr = next
|
|
|
|
}
|
|
|
|
|
|
|
|
if strconv.IsPrint(r) {
|
|
|
|
if _, err := b.WriteRune(r); err != nil {
|
|
|
|
return nil, err
|
2023-08-10 23:22:08 +00:00
|
|
|
}
|
2024-04-22 22:37:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// flush the buffer
|
|
|
|
switch curr {
|
|
|
|
case stateComment, stateNil:
|
|
|
|
// pass; nothing to flush
|
|
|
|
case stateValue:
|
2024-04-25 02:17:26 +00:00
|
|
|
s, ok := unquote(b.String())
|
|
|
|
if !ok {
|
2024-04-22 22:37:14 +00:00
|
|
|
return nil, io.ErrUnexpectedEOF
|
2023-07-17 00:02:22 +00:00
|
|
|
}
|
2023-07-17 21:21:27 +00:00
|
|
|
|
2024-04-25 02:17:26 +00:00
|
|
|
if role != "" {
|
|
|
|
s = role + ": " + s
|
|
|
|
}
|
|
|
|
|
|
|
|
cmd.Args = s
|
2024-04-22 22:37:14 +00:00
|
|
|
cmds = append(cmds, cmd)
|
|
|
|
default:
|
|
|
|
return nil, io.ErrUnexpectedEOF
|
2023-07-17 00:02:22 +00:00
|
|
|
}
|
|
|
|
|
2024-04-22 22:37:14 +00:00
|
|
|
for _, cmd := range cmds {
|
|
|
|
if cmd.Name == "model" {
|
|
|
|
return cmds, nil
|
|
|
|
}
|
2023-07-17 00:02:22 +00:00
|
|
|
}
|
|
|
|
|
2024-04-24 23:12:56 +00:00
|
|
|
return nil, errMissingFrom
|
2023-07-17 00:02:22 +00:00
|
|
|
}
|
2023-07-17 21:21:27 +00:00
|
|
|
|
2024-04-22 22:37:14 +00:00
|
|
|
func parseRuneForState(r rune, cs state) (state, rune, error) {
|
|
|
|
switch cs {
|
|
|
|
case stateNil:
|
|
|
|
switch {
|
|
|
|
case r == '#':
|
|
|
|
return stateComment, 0, nil
|
|
|
|
case isSpace(r), isNewline(r):
|
|
|
|
return stateNil, 0, nil
|
|
|
|
default:
|
|
|
|
return stateName, r, nil
|
|
|
|
}
|
|
|
|
case stateName:
|
|
|
|
switch {
|
|
|
|
case isAlpha(r):
|
|
|
|
return stateName, r, nil
|
|
|
|
case isSpace(r):
|
|
|
|
return stateValue, 0, nil
|
|
|
|
default:
|
2024-04-27 00:11:47 +00:00
|
|
|
return stateNil, 0, errInvalidCommand
|
2024-04-22 22:37:14 +00:00
|
|
|
}
|
|
|
|
case stateValue:
|
|
|
|
switch {
|
|
|
|
case isNewline(r):
|
|
|
|
return stateNil, r, nil
|
|
|
|
case isSpace(r):
|
|
|
|
return stateNil, r, nil
|
|
|
|
default:
|
|
|
|
return stateValue, r, nil
|
|
|
|
}
|
|
|
|
case stateParameter:
|
|
|
|
switch {
|
|
|
|
case isAlpha(r), isNumber(r), r == '_':
|
|
|
|
return stateParameter, r, nil
|
|
|
|
case isSpace(r):
|
|
|
|
return stateValue, 0, nil
|
|
|
|
default:
|
|
|
|
return stateNil, 0, io.ErrUnexpectedEOF
|
|
|
|
}
|
|
|
|
case stateMessage:
|
|
|
|
switch {
|
|
|
|
case isAlpha(r):
|
|
|
|
return stateMessage, r, nil
|
|
|
|
case isSpace(r):
|
|
|
|
return stateValue, 0, nil
|
|
|
|
default:
|
|
|
|
return stateNil, 0, io.ErrUnexpectedEOF
|
|
|
|
}
|
|
|
|
case stateComment:
|
|
|
|
switch {
|
|
|
|
case isNewline(r):
|
|
|
|
return stateNil, 0, nil
|
|
|
|
default:
|
|
|
|
return stateComment, 0, nil
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
return stateNil, 0, errors.New("")
|
2023-07-27 16:55:48 +00:00
|
|
|
}
|
2024-04-22 22:37:14 +00:00
|
|
|
}
|
2023-07-27 16:55:48 +00:00
|
|
|
|
2024-04-25 01:49:14 +00:00
|
|
|
func quote(s string) string {
|
2024-04-26 23:59:31 +00:00
|
|
|
if strings.Contains(s, "\n") || strings.HasPrefix(s, " ") || strings.HasSuffix(s, " ") {
|
2024-04-25 01:49:14 +00:00
|
|
|
if strings.Contains(s, "\"") {
|
|
|
|
return `"""` + s + `"""`
|
|
|
|
}
|
|
|
|
|
2024-04-26 23:59:31 +00:00
|
|
|
return `"` + s + `"`
|
2024-04-25 01:49:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
2024-04-22 22:37:14 +00:00
|
|
|
func unquote(s string) (string, bool) {
|
|
|
|
if len(s) == 0 {
|
|
|
|
return "", false
|
2023-07-27 16:55:48 +00:00
|
|
|
}
|
2023-07-25 17:22:23 +00:00
|
|
|
|
2024-04-22 22:37:14 +00:00
|
|
|
// TODO: single quotes
|
|
|
|
if len(s) >= 3 && s[:3] == `"""` {
|
|
|
|
if len(s) >= 6 && s[len(s)-3:] == `"""` {
|
|
|
|
return s[3 : len(s)-3], true
|
|
|
|
}
|
|
|
|
|
|
|
|
return "", false
|
2023-07-27 16:55:48 +00:00
|
|
|
}
|
|
|
|
|
2024-04-22 22:37:14 +00:00
|
|
|
if len(s) >= 1 && s[0] == '"' {
|
|
|
|
if len(s) >= 2 && s[len(s)-1] == '"' {
|
|
|
|
return s[1 : len(s)-1], true
|
|
|
|
}
|
|
|
|
|
|
|
|
return "", false
|
2023-07-27 16:55:48 +00:00
|
|
|
}
|
|
|
|
|
2024-04-22 22:37:14 +00:00
|
|
|
return s, true
|
2023-07-27 16:55:48 +00:00
|
|
|
}
|
|
|
|
|
2024-04-22 22:37:14 +00:00
|
|
|
func isAlpha(r rune) bool {
|
|
|
|
return r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z'
|
|
|
|
}
|
2023-07-17 21:21:27 +00:00
|
|
|
|
2024-04-22 22:37:14 +00:00
|
|
|
func isNumber(r rune) bool {
|
|
|
|
return r >= '0' && r <= '9'
|
|
|
|
}
|
2023-07-17 21:21:27 +00:00
|
|
|
|
2024-04-22 22:37:14 +00:00
|
|
|
func isSpace(r rune) bool {
|
|
|
|
return r == ' ' || r == '\t'
|
|
|
|
}
|
2023-07-25 18:50:23 +00:00
|
|
|
|
2024-04-22 22:37:14 +00:00
|
|
|
func isNewline(r rune) bool {
|
|
|
|
return r == '\r' || r == '\n'
|
|
|
|
}
|
2023-07-17 21:21:27 +00:00
|
|
|
|
2024-04-26 22:13:27 +00:00
|
|
|
func isValidMessageRole(role string) bool {
|
2024-04-22 22:37:14 +00:00
|
|
|
return role == "system" || role == "user" || role == "assistant"
|
2023-07-17 21:21:27 +00:00
|
|
|
}
|
2024-04-27 00:11:47 +00:00
|
|
|
|
|
|
|
func isValidCommand(cmd string) bool {
|
|
|
|
switch strings.ToLower(cmd) {
|
|
|
|
case "from", "license", "template", "system", "adapter", "parameter", "message":
|
|
|
|
return true
|
|
|
|
default:
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|