ollama/openai/openai.go

// openai package provides middleware for partial compatibility with the OpenAI REST API
package openai

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io"
	"math/rand"
	"net/http"
	"time"

	"github.com/gin-gonic/gin"
	"github.com/jmorganca/ollama/api"
)

type Error struct {
	Message string      `json:"message"`
	Type    string      `json:"type"`
	Param   interface{} `json:"param"`
	Code    *string     `json:"code"`
}

type ErrorResponse struct {
	Error Error `json:"error"`
}

type Message struct {
	Role    string `json:"role"`
	Content string `json:"content"`
}

type Choice struct {
	Index        int     `json:"index"`
	Message      Message `json:"message"`
	FinishReason *string `json:"finish_reason"`
}

type ChunkChoice struct {
	Index        int     `json:"index"`
	Delta        Message `json:"delta"`
	FinishReason *string `json:"finish_reason"`
}

type Usage struct {
	PromptTokens     int `json:"prompt_tokens"`
	CompletionTokens int `json:"completion_tokens"`
	TotalTokens      int `json:"total_tokens"`
}

type ResponseFormat struct {
	Type string `json:"type"`
}

type ChatCompletionRequest struct {
	Model            string          `json:"model"`
	Messages         []Message       `json:"messages"`
	Stream           bool            `json:"stream"`
	MaxTokens        *int            `json:"max_tokens"`
	Seed             *int            `json:"seed"`
	Stop             any             `json:"stop"`
	Temperature      *float64        `json:"temperature"`
	FrequencyPenalty *float64        `json:"frequency_penalty"`
	PresencePenalty  *float64        `json:"presence_penalty_penalty"`
	TopP             *float64        `json:"top_p"`
	ResponseFormat   *ResponseFormat `json:"response_format"`
}

type ChatCompletion struct {
	Id                string   `json:"id"`
	Object            string   `json:"object"`
	Created           int64    `json:"created"`
	Model             string   `json:"model"`
	SystemFingerprint string   `json:"system_fingerprint"`
	Choices           []Choice `json:"choices"`
	Usage             Usage    `json:"usage,omitempty"`
}

type ChatCompletionChunk struct {
	Id                string        `json:"id"`
	Object            string        `json:"object"`
	Created           int64         `json:"created"`
	Model             string        `json:"model"`
	SystemFingerprint string        `json:"system_fingerprint"`
	Choices           []ChunkChoice `json:"choices"`
}

func NewError(code int, message string) ErrorResponse {
	var etype string
	switch code {
	case http.StatusBadRequest:
		etype = "invalid_request_error"
	case http.StatusNotFound:
		etype = "not_found_error"
	default:
		etype = "api_error"
	}

	return ErrorResponse{Error{Type: etype, Message: message}}
}

func toChatCompletion(id string, r api.ChatResponse) ChatCompletion {
	return ChatCompletion{
		Id:                id,
		Object:            "chat.completion",
		Created:           r.CreatedAt.Unix(),
		Model:             r.Model,
		SystemFingerprint: "fp_ollama",
		Choices: []Choice{{
			Index:   0,
			Message: Message{Role: r.Message.Role, Content: r.Message.Content},
			FinishReason: func(done bool) *string {
				if done {
					reason := "stop"
					return &reason
				}
				return nil
			}(r.Done),
		}},
		Usage: Usage{
			// TODO: ollama returns 0 for prompt eval if the prompt was cached, but openai returns the actual count
			PromptTokens:     r.PromptEvalCount,
			CompletionTokens: r.EvalCount,
			TotalTokens:      r.PromptEvalCount + r.EvalCount,
		},
	}
}

func toChunk(id string, r api.ChatResponse) ChatCompletionChunk {
	return ChatCompletionChunk{
		Id:                id,
		Object:            "chat.completion.chunk",
		Created:           time.Now().Unix(),
		Model:             r.Model,
		SystemFingerprint: "fp_ollama",
		Choices: []ChunkChoice{
			{
				Index: 0,
				Delta: Message{Role: "assistant", Content: r.Message.Content},
				FinishReason: func(done bool) *string {
					if done {
						reason := "stop"
						return &reason
					}
					return nil
				}(r.Done),
			},
		},
	}
}

func fromRequest(r ChatCompletionRequest) api.ChatRequest {
	var messages []api.Message
	for _, msg := range r.Messages {
		messages = append(messages, api.Message{Role: msg.Role, Content: msg.Content})
	}

	options := make(map[string]interface{})

	switch stop := r.Stop.(type) {
	case string:
		options["stop"] = []string{stop}
	case []interface{}:
		var stops []string
		for _, s := range stop {
			if str, ok := s.(string); ok {
				stops = append(stops, str)
			}
		}
		options["stop"] = stops
	}

	if r.MaxTokens != nil {
		options["num_predict"] = *r.MaxTokens
	}

	if r.Temperature != nil {
		options["temperature"] = *r.Temperature * 2.0
	} else {
		options["temperature"] = 1.0
	}

	if r.Seed != nil {
		options["seed"] = *r.Seed

		// temperature=0 is required for reproducible outputs
		options["temperature"] = 0.0
	}

	if r.FrequencyPenalty != nil {
		options["frequency_penalty"] = *r.FrequencyPenalty * 2.0
	}

	if r.PresencePenalty != nil {
		options["presence_penalty"] = *r.PresencePenalty * 2.0
	}

	if r.TopP != nil {
		options["top_p"] = *r.TopP
	} else {
		options["top_p"] = 1.0
	}

	var format string
	if r.ResponseFormat != nil && r.ResponseFormat.Type == "json_object" {
		format = "json"
	}

	return api.ChatRequest{
		Model:    r.Model,
		Messages: messages,
		Format:   format,
		Options:  options,
		Stream:   &r.Stream,
	}
}

type writer struct {
	stream bool
	id     string
	gin.ResponseWriter
}

func (w *writer) writeError(code int, data []byte) (int, error) {
	var serr api.StatusError
	err := json.Unmarshal(data, &serr)
	if err != nil {
		return 0, err
	}

	w.ResponseWriter.Header().Set("Content-Type", "application/json")
	err = json.NewEncoder(w.ResponseWriter).Encode(NewError(http.StatusInternalServerError, serr.Error()))
	if err != nil {
		return 0, err
	}

	return len(data), nil
}

func (w *writer) writeResponse(data []byte) (int, error) {
	var chatResponse api.ChatResponse
	err := json.Unmarshal(data, &chatResponse)
	if err != nil {
		return 0, err
	}

	// chat chunk
	if w.stream {
		d, err := json.Marshal(toChunk(w.id, chatResponse))
		if err != nil {
			return 0, err

		}

		w.ResponseWriter.Header().Set("Content-Type", "text/event-stream")
		_, err = w.ResponseWriter.Write([]byte(fmt.Sprintf("data: %s\n\n", d)))
		if err != nil {
			return 0, err
		}

		if chatResponse.Done {
			_, err = w.ResponseWriter.Write([]byte("data: [DONE]\n\n"))
			if err != nil {
				return 0, err
			}
		}

		return len(data), nil
	}

	// chat completion
	w.ResponseWriter.Header().Set("Content-Type", "application/json")
	err = json.NewEncoder(w.ResponseWriter).Encode(toChatCompletion(w.id, chatResponse))
	if err != nil {
		return 0, err
	}

	return len(data), nil
}

func (w *writer) Write(data []byte) (int, error) {
	code := w.ResponseWriter.Status()
	if code != http.StatusOK {
		return w.writeError(code, data)
	}

	return w.writeResponse(data)
}

func Middleware() gin.HandlerFunc {
	return func(c *gin.Context) {
		var req ChatCompletionRequest
		err := c.ShouldBindJSON(&req)
		if err != nil {
			c.AbortWithStatusJSON(http.StatusBadRequest, NewError(http.StatusBadRequest, err.Error()))
			return
		}

		if len(req.Messages) == 0 {
			c.AbortWithStatusJSON(http.StatusBadRequest, NewError(http.StatusBadRequest, "[] is too short - 'messages'"))
			return
		}

		var b bytes.Buffer
		if err := json.NewEncoder(&b).Encode(fromRequest(req)); err != nil {
			c.AbortWithStatusJSON(http.StatusInternalServerError, NewError(http.StatusInternalServerError, err.Error()))
			return
		}

		c.Request.Body = io.NopCloser(&b)

		w := &writer{
			ResponseWriter: c.Writer,
			stream:         req.Stream,
			id:             fmt.Sprintf("chatcmpl-%d", rand.Intn(999)),
		}

		c.Writer = w

		c.Next()
	}
}
Initial OpenAI `/v1/chat/completions` API compatibility (#2376) 2024-02-07 22:24:29 +00:00			`// openai package provides middleware for partial compatibility with the OpenAI REST API`
			`package openai`

			`import (`
			`"bytes"`
			`"encoding/json"`
			`"fmt"`
			`"io"`
			`"math/rand"`
			`"net/http"`
			`"time"`

			`"github.com/gin-gonic/gin"`
			`"github.com/jmorganca/ollama/api"`
			`)`

			`type Error struct {`
			Message string `json:"message"`
			Type string `json:"type"`
			Param interface{} `json:"param"`
			Code *string `json:"code"`
			`}`

			`type ErrorResponse struct {`
			Error Error `json:"error"`
			`}`

			`type Message struct {`
			Role string `json:"role"`
			Content string `json:"content"`
			`}`

			`type Choice struct {`
			Index int `json:"index"`
			Message Message `json:"message"`
			FinishReason *string `json:"finish_reason"`
			`}`

			`type ChunkChoice struct {`
			Index int `json:"index"`
			Delta Message `json:"delta"`
			FinishReason *string `json:"finish_reason"`
			`}`

			`type Usage struct {`
			PromptTokens int `json:"prompt_tokens"`
			CompletionTokens int `json:"completion_tokens"`
			TotalTokens int `json:"total_tokens"`
			`}`

			`type ResponseFormat struct {`
			Type string `json:"type"`
			`}`

			`type ChatCompletionRequest struct {`
			Model string `json:"model"`
			Messages []Message `json:"messages"`
			Stream bool `json:"stream"`
			MaxTokens *int `json:"max_tokens"`
			Seed *int `json:"seed"`
			Stop any `json:"stop"`
			Temperature *float64 `json:"temperature"`
			FrequencyPenalty *float64 `json:"frequency_penalty"`
			PresencePenalty *float64 `json:"presence_penalty_penalty"`
			TopP *float64 `json:"top_p"`
			ResponseFormat *ResponseFormat `json:"response_format"`
			`}`

			`type ChatCompletion struct {`
			Id string `json:"id"`
			Object string `json:"object"`
			Created int64 `json:"created"`
			Model string `json:"model"`
			SystemFingerprint string `json:"system_fingerprint"`
			Choices []Choice `json:"choices"`
			Usage Usage `json:"usage,omitempty"`
			`}`

			`type ChatCompletionChunk struct {`
			Id string `json:"id"`
			Object string `json:"object"`
			Created int64 `json:"created"`
			Model string `json:"model"`
			SystemFingerprint string `json:"system_fingerprint"`
			Choices []ChunkChoice `json:"choices"`
			`}`

			`func NewError(code int, message string) ErrorResponse {`
			`var etype string`
			`switch code {`
			`case http.StatusBadRequest:`
			`etype = "invalid_request_error"`
			`case http.StatusNotFound:`
			`etype = "not_found_error"`
			`default:`
			`etype = "api_error"`
			`}`

			`return ErrorResponse{Error{Type: etype, Message: message}}`
			`}`

			`func toChatCompletion(id string, r api.ChatResponse) ChatCompletion {`
			`return ChatCompletion{`
			`Id: id,`
			`Object: "chat.completion",`
			`Created: r.CreatedAt.Unix(),`
			`Model: r.Model,`
			`SystemFingerprint: "fp_ollama",`
			`Choices: []Choice{{`
			`Index: 0,`
			`Message: Message{Role: r.Message.Role, Content: r.Message.Content},`
			`FinishReason: func(done bool) *string {`
			`if done {`
			`reason := "stop"`
			`return &reason`
			`}`
			`return nil`
			`}(r.Done),`
			`}},`
			`Usage: Usage{`
			`// TODO: ollama returns 0 for prompt eval if the prompt was cached, but openai returns the actual count`
			`PromptTokens: r.PromptEvalCount,`
			`CompletionTokens: r.EvalCount,`
			`TotalTokens: r.PromptEvalCount + r.EvalCount,`
			`},`
			`}`
			`}`

			`func toChunk(id string, r api.ChatResponse) ChatCompletionChunk {`
			`return ChatCompletionChunk{`
			`Id: id,`
			`Object: "chat.completion.chunk",`
			`Created: time.Now().Unix(),`
			`Model: r.Model,`
			`SystemFingerprint: "fp_ollama",`
			`Choices: []ChunkChoice{`
			`{`
			`Index: 0,`
			`Delta: Message{Role: "assistant", Content: r.Message.Content},`
			`FinishReason: func(done bool) *string {`
			`if done {`
			`reason := "stop"`
			`return &reason`
			`}`
			`return nil`
			`}(r.Done),`
			`},`
			`},`
			`}`
			`}`

			`func fromRequest(r ChatCompletionRequest) api.ChatRequest {`
			`var messages []api.Message`
			`for _, msg := range r.Messages {`
			`messages = append(messages, api.Message{Role: msg.Role, Content: msg.Content})`
			`}`

			`options := make(map[string]interface{})`

			`switch stop := r.Stop.(type) {`
			`case string:`
			`options["stop"] = []string{stop}`
			`case []interface{}:`
			`var stops []string`
			`for _, s := range stop {`
			`if str, ok := s.(string); ok {`
			`stops = append(stops, str)`
			`}`
			`}`
			`options["stop"] = stops`
			`}`

			`if r.MaxTokens != nil {`
			`options["num_predict"] = *r.MaxTokens`
			`}`

			`if r.Temperature != nil {`
			`options["temperature"] = r.Temperature 2.0`
			`} else {`
			`options["temperature"] = 1.0`
			`}`

			`if r.Seed != nil {`
			`options["seed"] = *r.Seed`

			`// temperature=0 is required for reproducible outputs`
			`options["temperature"] = 0.0`
			`}`

			`if r.FrequencyPenalty != nil {`
			`options["frequency_penalty"] = r.FrequencyPenalty 2.0`
			`}`

			`if r.PresencePenalty != nil {`
			`options["presence_penalty"] = r.PresencePenalty 2.0`
			`}`

			`if r.TopP != nil {`
			`options["top_p"] = *r.TopP`
			`} else {`
			`options["top_p"] = 1.0`
			`}`

			`var format string`
			`if r.ResponseFormat != nil && r.ResponseFormat.Type == "json_object" {`
			`format = "json"`
			`}`

			`return api.ChatRequest{`
			`Model: r.Model,`
			`Messages: messages,`
			`Format: format,`
			`Options: options,`
			`Stream: &r.Stream,`
			`}`
			`}`

			`type writer struct {`
			`stream bool`
			`id string`
			`gin.ResponseWriter`
			`}`

			`func (w *writer) writeError(code int, data []byte) (int, error) {`
			`var serr api.StatusError`
			`err := json.Unmarshal(data, &serr)`
			`if err != nil {`
			`return 0, err`
			`}`

			`w.ResponseWriter.Header().Set("Content-Type", "application/json")`
			`err = json.NewEncoder(w.ResponseWriter).Encode(NewError(http.StatusInternalServerError, serr.Error()))`
			`if err != nil {`
			`return 0, err`
			`}`

			`return len(data), nil`
			`}`

			`func (w *writer) writeResponse(data []byte) (int, error) {`
			`var chatResponse api.ChatResponse`
			`err := json.Unmarshal(data, &chatResponse)`
			`if err != nil {`
			`return 0, err`
			`}`

			`// chat chunk`
			`if w.stream {`
			`d, err := json.Marshal(toChunk(w.id, chatResponse))`
			`if err != nil {`
			`return 0, err`

			`}`

			`w.ResponseWriter.Header().Set("Content-Type", "text/event-stream")`
			`_, err = w.ResponseWriter.Write([]byte(fmt.Sprintf("data: %s\n\n", d)))`
			`if err != nil {`
			`return 0, err`
			`}`

			`if chatResponse.Done {`
			`_, err = w.ResponseWriter.Write([]byte("data: [DONE]\n\n"))`
			`if err != nil {`
			`return 0, err`
			`}`
			`}`

			`return len(data), nil`
			`}`

			`// chat completion`
			`w.ResponseWriter.Header().Set("Content-Type", "application/json")`
			`err = json.NewEncoder(w.ResponseWriter).Encode(toChatCompletion(w.id, chatResponse))`
			`if err != nil {`
			`return 0, err`
			`}`

			`return len(data), nil`
			`}`

			`func (w *writer) Write(data []byte) (int, error) {`
			`code := w.ResponseWriter.Status()`
			`if code != http.StatusOK {`
			`return w.writeError(code, data)`
			`}`

			`return w.writeResponse(data)`
			`}`

			`func Middleware() gin.HandlerFunc {`
			`return func(c *gin.Context) {`
			`var req ChatCompletionRequest`
			`err := c.ShouldBindJSON(&req)`
			`if err != nil {`
			`c.AbortWithStatusJSON(http.StatusBadRequest, NewError(http.StatusBadRequest, err.Error()))`
			`return`
			`}`

			`if len(req.Messages) == 0 {`
			`c.AbortWithStatusJSON(http.StatusBadRequest, NewError(http.StatusBadRequest, "[] is too short - 'messages'"))`
			`return`
			`}`

			`var b bytes.Buffer`
			`if err := json.NewEncoder(&b).Encode(fromRequest(req)); err != nil {`
			`c.AbortWithStatusJSON(http.StatusInternalServerError, NewError(http.StatusInternalServerError, err.Error()))`
			`return`
			`}`

			`c.Request.Body = io.NopCloser(&b)`

			`w := &writer{`
			`ResponseWriter: c.Writer,`
			`stream: req.Stream,`
			`id: fmt.Sprintf("chatcmpl-%d", rand.Intn(999)),`
			`}`

			`c.Writer = w`

			`c.Next()`
			`}`
			`}`