add done_reason to the api (#4235)
This commit is contained in:
parent
1580ed4c06
commit
cfa84b8470
4 changed files with 44 additions and 40 deletions
|
@ -117,6 +117,7 @@ type ChatResponse struct {
|
|||
Model string `json:"model"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
Message Message `json:"message"`
|
||||
DoneReason string `json:"done_reason"`
|
||||
|
||||
Done bool `json:"done"`
|
||||
|
||||
|
@ -309,6 +310,9 @@ type GenerateResponse struct {
|
|||
// Done specifies if the response is complete.
|
||||
Done bool `json:"done"`
|
||||
|
||||
// DoneReason is the reason the model stopped generating text.
|
||||
DoneReason string `json:"done_reason"`
|
||||
|
||||
// Context is an encoding of the conversation used in this response; this
|
||||
// can be sent in the next request to keep a conversational memory.
|
||||
Context []int `json:"context,omitempty"`
|
||||
|
|
|
@ -580,6 +580,7 @@ type completion struct {
|
|||
Model string `json:"model"`
|
||||
Prompt string `json:"prompt"`
|
||||
Stop bool `json:"stop"`
|
||||
StoppedLimit bool `json:"stopped_limit"`
|
||||
|
||||
Timings struct {
|
||||
PredictedN int `json:"predicted_n"`
|
||||
|
@ -598,6 +599,7 @@ type CompletionRequest struct {
|
|||
|
||||
type CompletionResponse struct {
|
||||
Content string
|
||||
DoneReason string
|
||||
Done bool
|
||||
PromptEvalCount int
|
||||
PromptEvalDuration time.Duration
|
||||
|
@ -739,8 +741,14 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
|
|||
}
|
||||
|
||||
if c.Stop {
|
||||
doneReason := "stop"
|
||||
if c.StoppedLimit {
|
||||
doneReason = "length"
|
||||
}
|
||||
|
||||
fn(CompletionResponse{
|
||||
Done: true,
|
||||
DoneReason: doneReason,
|
||||
PromptEvalCount: c.Timings.PromptN,
|
||||
PromptEvalDuration: parseDurationMs(c.Timings.PromptMS),
|
||||
EvalCount: c.Timings.PredictedN,
|
||||
|
|
|
@ -109,13 +109,7 @@ func toChatCompletion(id string, r api.ChatResponse) ChatCompletion {
|
|||
Choices: []Choice{{
|
||||
Index: 0,
|
||||
Message: Message{Role: r.Message.Role, Content: r.Message.Content},
|
||||
FinishReason: func(done bool) *string {
|
||||
if done {
|
||||
reason := "stop"
|
||||
return &reason
|
||||
}
|
||||
return nil
|
||||
}(r.Done),
|
||||
FinishReason: &r.DoneReason,
|
||||
}},
|
||||
Usage: Usage{
|
||||
// TODO: ollama returns 0 for prompt eval if the prompt was cached, but openai returns the actual count
|
||||
|
@ -137,13 +131,7 @@ func toChunk(id string, r api.ChatResponse) ChatCompletionChunk {
|
|||
{
|
||||
Index: 0,
|
||||
Delta: Message{Role: "assistant", Content: r.Message.Content},
|
||||
FinishReason: func(done bool) *string {
|
||||
if done {
|
||||
reason := "stop"
|
||||
return &reason
|
||||
}
|
||||
return nil
|
||||
}(r.Done),
|
||||
FinishReason: &r.DoneReason,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
|
@ -155,6 +155,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||
CreatedAt: time.Now().UTC(),
|
||||
Model: req.Model,
|
||||
Done: true,
|
||||
DoneReason: "load",
|
||||
})
|
||||
return
|
||||
}
|
||||
|
@ -226,6 +227,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||
CreatedAt: time.Now().UTC(),
|
||||
Done: r.Done,
|
||||
Response: r.Content,
|
||||
DoneReason: r.DoneReason,
|
||||
Metrics: api.Metrics{
|
||||
PromptEvalCount: r.PromptEvalCount,
|
||||
PromptEvalDuration: r.PromptEvalDuration,
|
||||
|
@ -1218,6 +1220,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||
CreatedAt: time.Now().UTC(),
|
||||
Model: req.Model,
|
||||
Done: true,
|
||||
DoneReason: "load",
|
||||
Message: api.Message{Role: "assistant"},
|
||||
}
|
||||
c.JSON(http.StatusOK, resp)
|
||||
|
@ -1255,6 +1258,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||
CreatedAt: time.Now().UTC(),
|
||||
Message: api.Message{Role: "assistant", Content: r.Content},
|
||||
Done: r.Done,
|
||||
DoneReason: r.DoneReason,
|
||||
Metrics: api.Metrics{
|
||||
PromptEvalCount: r.PromptEvalCount,
|
||||
PromptEvalDuration: r.PromptEvalDuration,
|
||||
|
|
Loading…
Reference in a new issue