OpenAI: Add Suffix to v1/completions (#5611)

* add suffix

* remove todo

* remove TODO

* add to test

* rm outdated prompt tokens info md

* fix test

* fix test
This commit is contained in:
royjhan 2024-07-16 20:50:14 -07:00 committed by GitHub
parent 499e87c9ba
commit 0d41623b52
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 7 additions and 6 deletions

View file

@ -103,10 +103,6 @@ curl http://localhost:11434/v1/chat/completions \
- [ ] `user`
- [ ] `n`
#### Notes
- `usage.prompt_tokens` will be 0 for completions where prompt evaluation is cached
## Models
Before using a model, pull it locally `ollama pull`:

View file

@ -111,6 +111,7 @@ type CompletionRequest struct {
Stream bool `json:"stream"`
Temperature *float32 `json:"temperature"`
TopP float32 `json:"top_p"`
Suffix string `json:"suffix"`
}
type Completion struct {
@ -188,7 +189,6 @@ func toChatCompletion(id string, r api.ChatResponse) ChatCompletion {
}(r.DoneReason),
}},
Usage: Usage{
// TODO: ollama returns 0 for prompt eval if the prompt was cached, but openai returns the actual count
PromptTokens: r.PromptEvalCount,
CompletionTokens: r.EvalCount,
TotalTokens: r.PromptEvalCount + r.EvalCount,
@ -234,7 +234,6 @@ func toCompletion(id string, r api.GenerateResponse) Completion {
}(r.DoneReason),
}},
Usage: Usage{
// TODO: ollama returns 0 for prompt eval if the prompt was cached, but openai returns the actual count
PromptTokens: r.PromptEvalCount,
CompletionTokens: r.EvalCount,
TotalTokens: r.PromptEvalCount + r.EvalCount,
@ -475,6 +474,7 @@ func fromCompleteRequest(r CompletionRequest) (api.GenerateRequest, error) {
Prompt: r.Prompt,
Options: options,
Stream: &r.Stream,
Suffix: r.Suffix,
}, nil
}

View file

@ -85,6 +85,7 @@ func TestMiddlewareRequests(t *testing.T) {
Prompt: "Hello",
Temperature: &temp,
Stop: []string{"\n", "stop"},
Suffix: "suffix",
}
bodyBytes, _ := json.Marshal(body)
@ -115,6 +116,10 @@ func TestMiddlewareRequests(t *testing.T) {
if stopTokens[0] != "\n" || stopTokens[1] != "stop" {
t.Fatalf("expected ['\\n', 'stop'], got %v", stopTokens)
}
if genReq.Suffix != "suffix" {
t.Fatalf("expected 'suffix', got %s", genReq.Suffix)
}
},
},
{