support raw generation requests (#952)

- add the optional `raw` generate request parameter to bypass prompt formatting and response context
-add raw request to docs
This commit is contained in:
Bruce MacDonald 2023-11-08 14:05:02 -08:00 committed by GitHub
parent ec84c02d54
commit ec2a31e9b3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 50 additions and 5 deletions

View file

@ -37,6 +37,7 @@ type GenerateRequest struct {
Template string `json:"template"` Template string `json:"template"`
Context []int `json:"context,omitempty"` Context []int `json:"context,omitempty"`
Stream *bool `json:"stream,omitempty"` Stream *bool `json:"stream,omitempty"`
Raw bool `json:"raw,omitempty"`
Options map[string]interface{} `json:"options"` Options map[string]interface{} `json:"options"`
} }

View file

@ -46,6 +46,7 @@ Advanced parameters (optional):
- `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`) - `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
- `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory - `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects - `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
- `raw`: if `true` no formatting will be applied to the prompt and no context will be returned. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API, and are managing history yourself.
### Examples ### Examples
@ -136,6 +137,36 @@ If `stream` is set to `false`, the response will be a single JSON object:
} }
``` ```
#### Request
In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context.
```shell
curl -X POST http://localhost:11434/api/generate -d '{
"model": "mistral",
"prompt": "[INST] why is the sky blue? [/INST]",
"raw": true,
"stream": false
}'
```
#### Response
```json
{
"model": "mistral",
"created_at": "2023-11-03T15:36:02.583064Z",
"response": " The sky appears blue because of a phenomenon called Rayleigh scattering.",
"done": true,
"total_duration": 14648695333,
"load_duration": 3302671417,
"prompt_eval_count": 14,
"prompt_eval_duration": 286243000,
"eval_count": 129,
"eval_duration": 10931424000
}
```
## Create a Model ## Create a Model
```shell ```shell

View file

@ -158,9 +158,14 @@ func GenerateHandler(c *gin.Context) {
return return
} }
if req.Model == "" { // validate the request
switch {
case req.Model == "":
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"}) c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
return return
case req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0):
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
return
} }
model, err := GetModel(req.Model) model, err := GetModel(req.Model)
@ -189,10 +194,13 @@ func GenerateHandler(c *gin.Context) {
checkpointLoaded := time.Now() checkpointLoaded := time.Now()
prompt, err := model.Prompt(req) prompt := req.Prompt
if err != nil { if !req.Raw {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) prompt, err = model.Prompt(req)
return if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
} }
ch := make(chan any) ch := make(chan any)
@ -215,6 +223,11 @@ func GenerateHandler(c *gin.Context) {
r.LoadDuration = checkpointLoaded.Sub(checkpointStart) r.LoadDuration = checkpointLoaded.Sub(checkpointStart)
} }
if req.Raw {
// in raw mode the client must manage history on their own
r.Context = nil
}
ch <- r ch <- r
} }