support raw generation requests (#952)
- add the optional `raw` generate request parameter to bypass prompt formatting and response context -add raw request to docs
This commit is contained in:
parent
ec84c02d54
commit
ec2a31e9b3
3 changed files with 50 additions and 5 deletions
|
@ -37,6 +37,7 @@ type GenerateRequest struct {
|
|||
Template string `json:"template"`
|
||||
Context []int `json:"context,omitempty"`
|
||||
Stream *bool `json:"stream,omitempty"`
|
||||
Raw bool `json:"raw,omitempty"`
|
||||
|
||||
Options map[string]interface{} `json:"options"`
|
||||
}
|
||||
|
|
31
docs/api.md
31
docs/api.md
|
@ -46,6 +46,7 @@ Advanced parameters (optional):
|
|||
- `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
|
||||
- `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
|
||||
- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
|
||||
- `raw`: if `true` no formatting will be applied to the prompt and no context will be returned. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API, and are managing history yourself.
|
||||
|
||||
### Examples
|
||||
|
||||
|
@ -136,6 +137,36 @@ If `stream` is set to `false`, the response will be a single JSON object:
|
|||
}
|
||||
```
|
||||
|
||||
#### Request
|
||||
|
||||
In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context.
|
||||
|
||||
```shell
|
||||
curl -X POST http://localhost:11434/api/generate -d '{
|
||||
"model": "mistral",
|
||||
"prompt": "[INST] why is the sky blue? [/INST]",
|
||||
"raw": true,
|
||||
"stream": false
|
||||
}'
|
||||
```
|
||||
|
||||
#### Response
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "mistral",
|
||||
"created_at": "2023-11-03T15:36:02.583064Z",
|
||||
"response": " The sky appears blue because of a phenomenon called Rayleigh scattering.",
|
||||
"done": true,
|
||||
"total_duration": 14648695333,
|
||||
"load_duration": 3302671417,
|
||||
"prompt_eval_count": 14,
|
||||
"prompt_eval_duration": 286243000,
|
||||
"eval_count": 129,
|
||||
"eval_duration": 10931424000
|
||||
}
|
||||
```
|
||||
|
||||
## Create a Model
|
||||
|
||||
```shell
|
||||
|
|
|
@ -158,9 +158,14 @@ func GenerateHandler(c *gin.Context) {
|
|||
return
|
||||
}
|
||||
|
||||
if req.Model == "" {
|
||||
// validate the request
|
||||
switch {
|
||||
case req.Model == "":
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
|
||||
return
|
||||
case req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0):
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
|
||||
return
|
||||
}
|
||||
|
||||
model, err := GetModel(req.Model)
|
||||
|
@ -189,10 +194,13 @@ func GenerateHandler(c *gin.Context) {
|
|||
|
||||
checkpointLoaded := time.Now()
|
||||
|
||||
prompt, err := model.Prompt(req)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
prompt := req.Prompt
|
||||
if !req.Raw {
|
||||
prompt, err = model.Prompt(req)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
ch := make(chan any)
|
||||
|
@ -215,6 +223,11 @@ func GenerateHandler(c *gin.Context) {
|
|||
r.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
||||
}
|
||||
|
||||
if req.Raw {
|
||||
// in raw mode the client must manage history on their own
|
||||
r.Context = nil
|
||||
}
|
||||
|
||||
ch <- r
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue