add done_reason to the api (#4235)

2024-05-09 13:30:14 -07:00 · 2024-05-09 13:30:14 -07:00 · cfa84b8470
commit cfa84b8470
parent 1580ed4c06
4 changed files with 44 additions and 40 deletions
--- a/api/types.go
+++ b/api/types.go
@ -117,6 +117,7 @@ type ChatResponse struct {
 	Model      string    `json:"model"`
 	CreatedAt  time.Time `json:"created_at"`
 	Message    Message   `json:"message"`
+	DoneReason string    `json:"done_reason"`

 	Done bool `json:"done"`

@ -309,6 +310,9 @@ type GenerateResponse struct {
 	// Done specifies if the response is complete.
 	Done bool `json:"done"`

+	// DoneReason is the reason the model stopped generating text.
+	DoneReason string `json:"done_reason"`
+
 	// Context is an encoding of the conversation used in this response; this
 	// can be sent in the next request to keep a conversational memory.
 	Context []int `json:"context,omitempty"`
--- a/llm/server.go
+++ b/llm/server.go
@ -580,6 +580,7 @@ type completion struct {
 	Model        string `json:"model"`
 	Prompt       string `json:"prompt"`
 	Stop         bool   `json:"stop"`
+	StoppedLimit bool   `json:"stopped_limit"`

 	Timings struct {
 		PredictedN  int     `json:"predicted_n"`
@ -598,6 +599,7 @@ type CompletionRequest struct {

 type CompletionResponse struct {
 	Content            string
+	DoneReason         string
 	Done               bool
 	PromptEvalCount    int
 	PromptEvalDuration time.Duration
@ -739,8 +741,14 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
 			}

 			if c.Stop {
+				doneReason := "stop"
+				if c.StoppedLimit {
+					doneReason = "length"
+				}
+
 				fn(CompletionResponse{
 					Done:               true,
+					DoneReason:         doneReason,
 					PromptEvalCount:    c.Timings.PromptN,
 					PromptEvalDuration: parseDurationMs(c.Timings.PromptMS),
 					EvalCount:          c.Timings.PredictedN,
--- a/openai/openai.go
+++ b/openai/openai.go
@ -109,13 +109,7 @@ func toChatCompletion(id string, r api.ChatResponse) ChatCompletion {
 		Choices: []Choice{{
 			Index:        0,
 			Message:      Message{Role: r.Message.Role, Content: r.Message.Content},
-			FinishReason: func(done bool) *string {
-				if done {
-					reason := "stop"
-					return &reason
-				}
-				return nil
-			}(r.Done),
+			FinishReason: &r.DoneReason,
 		}},
 		Usage: Usage{
 			// TODO: ollama returns 0 for prompt eval if the prompt was cached, but openai returns the actual count
@ -137,13 +131,7 @@ func toChunk(id string, r api.ChatResponse) ChatCompletionChunk {
 			{
 				Index:        0,
 				Delta:        Message{Role: "assistant", Content: r.Message.Content},
-				FinishReason: func(done bool) *string {
-					if done {
-						reason := "stop"
-						return &reason
-					}
-					return nil
-				}(r.Done),
+				FinishReason: &r.DoneReason,
 			},
 		},
 	}
--- a/server/routes.go
+++ b/server/routes.go
@ -155,6 +155,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 			CreatedAt:  time.Now().UTC(),
 			Model:      req.Model,
 			Done:       true,
+			DoneReason: "load",
 		})
 		return
 	}
@ -226,6 +227,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 				CreatedAt:  time.Now().UTC(),
 				Done:       r.Done,
 				Response:   r.Content,
+				DoneReason: r.DoneReason,
 				Metrics: api.Metrics{
 					PromptEvalCount:    r.PromptEvalCount,
 					PromptEvalDuration: r.PromptEvalDuration,
@ -1218,6 +1220,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
 			CreatedAt:  time.Now().UTC(),
 			Model:      req.Model,
 			Done:       true,
+			DoneReason: "load",
 			Message:    api.Message{Role: "assistant"},
 		}
 		c.JSON(http.StatusOK, resp)
@ -1255,6 +1258,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
 				CreatedAt:  time.Now().UTC(),
 				Message:    api.Message{Role: "assistant", Content: r.Content},
 				Done:       r.Done,
+				DoneReason: r.DoneReason,
 				Metrics: api.Metrics{
 					PromptEvalCount:    r.PromptEvalCount,
 					PromptEvalDuration: r.PromptEvalDuration,