support raw generation requests (#952)
- add the optional `raw` generate request parameter to bypass prompt formatting and response context -add raw request to docs
This commit is contained in:
parent
ec84c02d54
commit
ec2a31e9b3
3 changed files with 50 additions and 5 deletions
|
@ -37,6 +37,7 @@ type GenerateRequest struct {
|
||||||
Template string `json:"template"`
|
Template string `json:"template"`
|
||||||
Context []int `json:"context,omitempty"`
|
Context []int `json:"context,omitempty"`
|
||||||
Stream *bool `json:"stream,omitempty"`
|
Stream *bool `json:"stream,omitempty"`
|
||||||
|
Raw bool `json:"raw,omitempty"`
|
||||||
|
|
||||||
Options map[string]interface{} `json:"options"`
|
Options map[string]interface{} `json:"options"`
|
||||||
}
|
}
|
||||||
|
|
31
docs/api.md
31
docs/api.md
|
@ -46,6 +46,7 @@ Advanced parameters (optional):
|
||||||
- `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
|
- `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
|
||||||
- `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
|
- `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
|
||||||
- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
|
- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
|
||||||
|
- `raw`: if `true` no formatting will be applied to the prompt and no context will be returned. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API, and are managing history yourself.
|
||||||
|
|
||||||
### Examples
|
### Examples
|
||||||
|
|
||||||
|
@ -136,6 +137,36 @@ If `stream` is set to `false`, the response will be a single JSON object:
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Request
|
||||||
|
|
||||||
|
In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl -X POST http://localhost:11434/api/generate -d '{
|
||||||
|
"model": "mistral",
|
||||||
|
"prompt": "[INST] why is the sky blue? [/INST]",
|
||||||
|
"raw": true,
|
||||||
|
"stream": false
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Response
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"model": "mistral",
|
||||||
|
"created_at": "2023-11-03T15:36:02.583064Z",
|
||||||
|
"response": " The sky appears blue because of a phenomenon called Rayleigh scattering.",
|
||||||
|
"done": true,
|
||||||
|
"total_duration": 14648695333,
|
||||||
|
"load_duration": 3302671417,
|
||||||
|
"prompt_eval_count": 14,
|
||||||
|
"prompt_eval_duration": 286243000,
|
||||||
|
"eval_count": 129,
|
||||||
|
"eval_duration": 10931424000
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
## Create a Model
|
## Create a Model
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
|
|
|
@ -158,9 +158,14 @@ func GenerateHandler(c *gin.Context) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if req.Model == "" {
|
// validate the request
|
||||||
|
switch {
|
||||||
|
case req.Model == "":
|
||||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
|
||||||
return
|
return
|
||||||
|
case req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0):
|
||||||
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
model, err := GetModel(req.Model)
|
model, err := GetModel(req.Model)
|
||||||
|
@ -189,11 +194,14 @@ func GenerateHandler(c *gin.Context) {
|
||||||
|
|
||||||
checkpointLoaded := time.Now()
|
checkpointLoaded := time.Now()
|
||||||
|
|
||||||
prompt, err := model.Prompt(req)
|
prompt := req.Prompt
|
||||||
|
if !req.Raw {
|
||||||
|
prompt, err = model.Prompt(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ch := make(chan any)
|
ch := make(chan any)
|
||||||
go func() {
|
go func() {
|
||||||
|
@ -215,6 +223,11 @@ func GenerateHandler(c *gin.Context) {
|
||||||
r.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
r.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if req.Raw {
|
||||||
|
// in raw mode the client must manage history on their own
|
||||||
|
r.Context = nil
|
||||||
|
}
|
||||||
|
|
||||||
ch <- r
|
ch <- r
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue