JSON mode: add `"format" as an api parameter (#1051)
* add `"format": "json"` as an API parameter --------- Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
This commit is contained in:
parent
5b39503bcd
commit
5cba29b9d6
5 changed files with 97 additions and 9 deletions
|
@ -38,6 +38,7 @@ type GenerateRequest struct {
|
||||||
Context []int `json:"context,omitempty"`
|
Context []int `json:"context,omitempty"`
|
||||||
Stream *bool `json:"stream,omitempty"`
|
Stream *bool `json:"stream,omitempty"`
|
||||||
Raw bool `json:"raw,omitempty"`
|
Raw bool `json:"raw,omitempty"`
|
||||||
|
Format string `json:"format"`
|
||||||
|
|
||||||
Options map[string]interface{} `json:"options"`
|
Options map[string]interface{} `json:"options"`
|
||||||
}
|
}
|
||||||
|
|
64
docs/api.md
64
docs/api.md
|
@ -38,6 +38,7 @@ Generate a response for a given prompt with a provided model. This is a streamin
|
||||||
|
|
||||||
- `model`: (required) the [model name](#model-names)
|
- `model`: (required) the [model name](#model-names)
|
||||||
- `prompt`: the prompt to generate a response for
|
- `prompt`: the prompt to generate a response for
|
||||||
|
- `format`: the format to return a response in. Currently the only accepted value is `json`
|
||||||
|
|
||||||
Advanced parameters (optional):
|
Advanced parameters (optional):
|
||||||
|
|
||||||
|
@ -48,13 +49,17 @@ Advanced parameters (optional):
|
||||||
- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
|
- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
|
||||||
- `raw`: if `true` no formatting will be applied to the prompt and no context will be returned. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API, and are managing history yourself.
|
- `raw`: if `true` no formatting will be applied to the prompt and no context will be returned. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API, and are managing history yourself.
|
||||||
|
|
||||||
|
### JSON mode
|
||||||
|
|
||||||
|
Enable JSON mode by setting the `format` parameter to `json` and specifying the model should use JSON in the `prompt`. This will structure the response as valid JSON. See the JSON mode [example](#request-json-mode) below.
|
||||||
|
|
||||||
### Examples
|
### Examples
|
||||||
|
|
||||||
#### Request
|
#### Request
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl -X POST http://localhost:11434/api/generate -d '{
|
curl -X POST http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama2:7b",
|
"model": "llama2",
|
||||||
"prompt": "Why is the sky blue?"
|
"prompt": "Why is the sky blue?"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
@ -65,7 +70,7 @@ A stream of JSON objects is returned:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama2:7b",
|
"model": "llama2",
|
||||||
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
||||||
"response": "The",
|
"response": "The",
|
||||||
"done": false
|
"done": false
|
||||||
|
@ -89,7 +94,7 @@ To calculate how fast the response is generated in tokens per second (token/s),
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama2:7b",
|
"model": "llama2",
|
||||||
"created_at": "2023-08-04T19:22:45.499127Z",
|
"created_at": "2023-08-04T19:22:45.499127Z",
|
||||||
"response": "",
|
"response": "",
|
||||||
"context": [1, 2, 3],
|
"context": [1, 2, 3],
|
||||||
|
@ -105,7 +110,7 @@ To calculate how fast the response is generated in tokens per second (token/s),
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Request
|
#### Request (No streaming)
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl -X POST http://localhost:11434/api/generate -d '{
|
curl -X POST http://localhost:11434/api/generate -d '{
|
||||||
|
@ -137,7 +142,7 @@ If `stream` is set to `false`, the response will be a single JSON object:
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Request
|
#### Request (Raw mode)
|
||||||
|
|
||||||
In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context.
|
In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context.
|
||||||
|
|
||||||
|
@ -167,7 +172,54 @@ curl -X POST http://localhost:11434/api/generate -d '{
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Request
|
#### Request (JSON mode)
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl -X POST http://localhost:11434/api/generate -d '{
|
||||||
|
"model": "llama2",
|
||||||
|
"prompt": "What color is the sky at different times of the day? Respond using JSON",
|
||||||
|
"format": "json",
|
||||||
|
"stream": false
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Response
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"model": "llama2",
|
||||||
|
"created_at": "2023-11-09T21:07:55.186497Z",
|
||||||
|
"response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n",
|
||||||
|
"done": true,
|
||||||
|
"total_duration": 4661289125,
|
||||||
|
"load_duration": 1714434500,
|
||||||
|
"prompt_eval_count": 36,
|
||||||
|
"prompt_eval_duration": 264132000,
|
||||||
|
"eval_count": 75,
|
||||||
|
"eval_duration": 2112149000
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The value of `response` will be a string containing JSON similar to:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"morning": {
|
||||||
|
"color": "blue"
|
||||||
|
},
|
||||||
|
"noon": {
|
||||||
|
"color": "blue-gray"
|
||||||
|
},
|
||||||
|
"afternoon": {
|
||||||
|
"color": "warm gray"
|
||||||
|
},
|
||||||
|
"evening": {
|
||||||
|
"color": "orange"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Request (With options)
|
||||||
|
|
||||||
If you want to set custom options for the model at runtime rather than in the Modelfile, you can do so with the `options` parameter. This example sets every available option, but you can set any of them individually and omit the ones you do not want to override.
|
If you want to set custom options for the model at runtime rather than in the Modelfile, you can do so with the `options` parameter. This example sets every available option, but you can set any of them individually and omit the ones you do not want to override.
|
||||||
|
|
||||||
|
|
34
llm/llama.go
34
llm/llama.go
|
@ -27,6 +27,34 @@ import (
|
||||||
"github.com/jmorganca/ollama/format"
|
"github.com/jmorganca/ollama/format"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const jsonGrammar = `
|
||||||
|
root ::= object
|
||||||
|
value ::= object | array | string | number | ("true" | "false" | "null") ws
|
||||||
|
|
||||||
|
object ::=
|
||||||
|
"{" ws (
|
||||||
|
string ":" ws value
|
||||||
|
("," ws string ":" ws value)*
|
||||||
|
)? "}" ws
|
||||||
|
|
||||||
|
array ::=
|
||||||
|
"[" ws (
|
||||||
|
value
|
||||||
|
("," ws value)*
|
||||||
|
)? "]" ws
|
||||||
|
|
||||||
|
string ::=
|
||||||
|
"\"" (
|
||||||
|
[^"\\] |
|
||||||
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
|
||||||
|
)* "\"" ws
|
||||||
|
|
||||||
|
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
|
||||||
|
|
||||||
|
# Optional space: by convention, applied in this grammar after literal chars when allowed
|
||||||
|
ws ::= ([ \t\n] ws)?
|
||||||
|
`
|
||||||
|
|
||||||
//go:embed llama.cpp/*/build/*/bin/*
|
//go:embed llama.cpp/*/build/*/bin/*
|
||||||
var llamaCppEmbed embed.FS
|
var llamaCppEmbed embed.FS
|
||||||
|
|
||||||
|
@ -497,7 +525,7 @@ type prediction struct {
|
||||||
|
|
||||||
const maxBufferSize = 512 * format.KiloByte
|
const maxBufferSize = 512 * format.KiloByte
|
||||||
|
|
||||||
func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, fn func(api.GenerateResponse)) error {
|
func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, format string, fn func(api.GenerateResponse)) error {
|
||||||
prevConvo, err := llm.Decode(ctx, prevContext)
|
prevConvo, err := llm.Decode(ctx, prevContext)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -532,6 +560,10 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
|
||||||
"stop": llm.Stop,
|
"stop": llm.Stop,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if format == "json" {
|
||||||
|
request["grammar"] = jsonGrammar
|
||||||
|
}
|
||||||
|
|
||||||
// Handling JSON marshaling with special characters unescaped.
|
// Handling JSON marshaling with special characters unescaped.
|
||||||
buffer := &bytes.Buffer{}
|
buffer := &bytes.Buffer{}
|
||||||
enc := json.NewEncoder(buffer)
|
enc := json.NewEncoder(buffer)
|
||||||
|
|
|
@ -14,7 +14,7 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
type LLM interface {
|
type LLM interface {
|
||||||
Predict(context.Context, []int, string, func(api.GenerateResponse)) error
|
Predict(context.Context, []int, string, string, func(api.GenerateResponse)) error
|
||||||
Embedding(context.Context, string) ([]float64, error)
|
Embedding(context.Context, string) ([]float64, error)
|
||||||
Encode(context.Context, string) ([]int, error)
|
Encode(context.Context, string) ([]int, error)
|
||||||
Decode(context.Context, []int) (string, error)
|
Decode(context.Context, []int) (string, error)
|
||||||
|
|
|
@ -163,6 +163,9 @@ func GenerateHandler(c *gin.Context) {
|
||||||
case req.Model == "":
|
case req.Model == "":
|
||||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
|
||||||
return
|
return
|
||||||
|
case len(req.Format) > 0 && req.Format != "json":
|
||||||
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be json"})
|
||||||
|
return
|
||||||
case req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0):
|
case req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0):
|
||||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
|
||||||
return
|
return
|
||||||
|
@ -231,7 +234,7 @@ func GenerateHandler(c *gin.Context) {
|
||||||
ch <- r
|
ch <- r
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := loaded.runner.Predict(c.Request.Context(), req.Context, prompt, fn); err != nil {
|
if err := loaded.runner.Predict(c.Request.Context(), req.Context, prompt, req.Format, fn); err != nil {
|
||||||
ch <- gin.H{"error": err.Error()}
|
ch <- gin.H{"error": err.Error()}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
Loading…
Reference in a new issue