JSON mode: add `"format" as an api parameter (#1051)
* add `"format": "json"` as an API parameter --------- Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
This commit is contained in:
parent
5b39503bcd
commit
5cba29b9d6
5 changed files with 97 additions and 9 deletions
|
@ -38,6 +38,7 @@ type GenerateRequest struct {
|
|||
Context []int `json:"context,omitempty"`
|
||||
Stream *bool `json:"stream,omitempty"`
|
||||
Raw bool `json:"raw,omitempty"`
|
||||
Format string `json:"format"`
|
||||
|
||||
Options map[string]interface{} `json:"options"`
|
||||
}
|
||||
|
|
64
docs/api.md
64
docs/api.md
|
@ -38,6 +38,7 @@ Generate a response for a given prompt with a provided model. This is a streamin
|
|||
|
||||
- `model`: (required) the [model name](#model-names)
|
||||
- `prompt`: the prompt to generate a response for
|
||||
- `format`: the format to return a response in. Currently the only accepted value is `json`
|
||||
|
||||
Advanced parameters (optional):
|
||||
|
||||
|
@ -48,13 +49,17 @@ Advanced parameters (optional):
|
|||
- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
|
||||
- `raw`: if `true` no formatting will be applied to the prompt and no context will be returned. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API, and are managing history yourself.
|
||||
|
||||
### JSON mode
|
||||
|
||||
Enable JSON mode by setting the `format` parameter to `json` and specifying the model should use JSON in the `prompt`. This will structure the response as valid JSON. See the JSON mode [example](#request-json-mode) below.
|
||||
|
||||
### Examples
|
||||
|
||||
#### Request
|
||||
|
||||
```shell
|
||||
curl -X POST http://localhost:11434/api/generate -d '{
|
||||
"model": "llama2:7b",
|
||||
"model": "llama2",
|
||||
"prompt": "Why is the sky blue?"
|
||||
}'
|
||||
```
|
||||
|
@ -65,7 +70,7 @@ A stream of JSON objects is returned:
|
|||
|
||||
```json
|
||||
{
|
||||
"model": "llama2:7b",
|
||||
"model": "llama2",
|
||||
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
||||
"response": "The",
|
||||
"done": false
|
||||
|
@ -89,7 +94,7 @@ To calculate how fast the response is generated in tokens per second (token/s),
|
|||
|
||||
```json
|
||||
{
|
||||
"model": "llama2:7b",
|
||||
"model": "llama2",
|
||||
"created_at": "2023-08-04T19:22:45.499127Z",
|
||||
"response": "",
|
||||
"context": [1, 2, 3],
|
||||
|
@ -105,7 +110,7 @@ To calculate how fast the response is generated in tokens per second (token/s),
|
|||
}
|
||||
```
|
||||
|
||||
#### Request
|
||||
#### Request (No streaming)
|
||||
|
||||
```shell
|
||||
curl -X POST http://localhost:11434/api/generate -d '{
|
||||
|
@ -137,7 +142,7 @@ If `stream` is set to `false`, the response will be a single JSON object:
|
|||
}
|
||||
```
|
||||
|
||||
#### Request
|
||||
#### Request (Raw mode)
|
||||
|
||||
In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context.
|
||||
|
||||
|
@ -167,7 +172,54 @@ curl -X POST http://localhost:11434/api/generate -d '{
|
|||
}
|
||||
```
|
||||
|
||||
#### Request
|
||||
#### Request (JSON mode)
|
||||
|
||||
```shell
|
||||
curl -X POST http://localhost:11434/api/generate -d '{
|
||||
"model": "llama2",
|
||||
"prompt": "What color is the sky at different times of the day? Respond using JSON",
|
||||
"format": "json",
|
||||
"stream": false
|
||||
}'
|
||||
```
|
||||
|
||||
#### Response
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "llama2",
|
||||
"created_at": "2023-11-09T21:07:55.186497Z",
|
||||
"response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n",
|
||||
"done": true,
|
||||
"total_duration": 4661289125,
|
||||
"load_duration": 1714434500,
|
||||
"prompt_eval_count": 36,
|
||||
"prompt_eval_duration": 264132000,
|
||||
"eval_count": 75,
|
||||
"eval_duration": 2112149000
|
||||
}
|
||||
```
|
||||
|
||||
The value of `response` will be a string containing JSON similar to:
|
||||
|
||||
```json
|
||||
{
|
||||
"morning": {
|
||||
"color": "blue"
|
||||
},
|
||||
"noon": {
|
||||
"color": "blue-gray"
|
||||
},
|
||||
"afternoon": {
|
||||
"color": "warm gray"
|
||||
},
|
||||
"evening": {
|
||||
"color": "orange"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Request (With options)
|
||||
|
||||
If you want to set custom options for the model at runtime rather than in the Modelfile, you can do so with the `options` parameter. This example sets every available option, but you can set any of them individually and omit the ones you do not want to override.
|
||||
|
||||
|
|
34
llm/llama.go
34
llm/llama.go
|
@ -27,6 +27,34 @@ import (
|
|||
"github.com/jmorganca/ollama/format"
|
||||
)
|
||||
|
||||
const jsonGrammar = `
|
||||
root ::= object
|
||||
value ::= object | array | string | number | ("true" | "false" | "null") ws
|
||||
|
||||
object ::=
|
||||
"{" ws (
|
||||
string ":" ws value
|
||||
("," ws string ":" ws value)*
|
||||
)? "}" ws
|
||||
|
||||
array ::=
|
||||
"[" ws (
|
||||
value
|
||||
("," ws value)*
|
||||
)? "]" ws
|
||||
|
||||
string ::=
|
||||
"\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
|
||||
)* "\"" ws
|
||||
|
||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
|
||||
|
||||
# Optional space: by convention, applied in this grammar after literal chars when allowed
|
||||
ws ::= ([ \t\n] ws)?
|
||||
`
|
||||
|
||||
//go:embed llama.cpp/*/build/*/bin/*
|
||||
var llamaCppEmbed embed.FS
|
||||
|
||||
|
@ -497,7 +525,7 @@ type prediction struct {
|
|||
|
||||
const maxBufferSize = 512 * format.KiloByte
|
||||
|
||||
func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, fn func(api.GenerateResponse)) error {
|
||||
func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, format string, fn func(api.GenerateResponse)) error {
|
||||
prevConvo, err := llm.Decode(ctx, prevContext)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -532,6 +560,10 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
|
|||
"stop": llm.Stop,
|
||||
}
|
||||
|
||||
if format == "json" {
|
||||
request["grammar"] = jsonGrammar
|
||||
}
|
||||
|
||||
// Handling JSON marshaling with special characters unescaped.
|
||||
buffer := &bytes.Buffer{}
|
||||
enc := json.NewEncoder(buffer)
|
||||
|
|
|
@ -14,7 +14,7 @@ import (
|
|||
)
|
||||
|
||||
type LLM interface {
|
||||
Predict(context.Context, []int, string, func(api.GenerateResponse)) error
|
||||
Predict(context.Context, []int, string, string, func(api.GenerateResponse)) error
|
||||
Embedding(context.Context, string) ([]float64, error)
|
||||
Encode(context.Context, string) ([]int, error)
|
||||
Decode(context.Context, []int) (string, error)
|
||||
|
|
|
@ -163,6 +163,9 @@ func GenerateHandler(c *gin.Context) {
|
|||
case req.Model == "":
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
|
||||
return
|
||||
case len(req.Format) > 0 && req.Format != "json":
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be json"})
|
||||
return
|
||||
case req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0):
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
|
||||
return
|
||||
|
@ -231,7 +234,7 @@ func GenerateHandler(c *gin.Context) {
|
|||
ch <- r
|
||||
}
|
||||
|
||||
if err := loaded.runner.Predict(c.Request.Context(), req.Context, prompt, fn); err != nil {
|
||||
if err := loaded.runner.Predict(c.Request.Context(), req.Context, prompt, req.Format, fn); err != nil {
|
||||
ch <- gin.H{"error": err.Error()}
|
||||
}
|
||||
}()
|
||||
|
|
Loading…
Reference in a new issue