support raw generation requests (#952)

- add the optional `raw` generate request parameter to bypass prompt formatting and response context -add raw request to docs
2023-11-08 14:05:02 -08:00 · 2023-11-08 14:05:02 -08:00 · ec2a31e9b3
commit ec2a31e9b3
parent ec84c02d54
3 changed files with 50 additions and 5 deletions
--- a/api/types.go
+++ b/api/types.go
@ -37,6 +37,7 @@ type GenerateRequest struct {
 	Template string `json:"template"`
 	Context  []int  `json:"context,omitempty"`
 	Stream   *bool  `json:"stream,omitempty"`
 	Raw      bool   `json:"raw,omitempty"`
 	Options map[string]interface{} `json:"options"`
 }
--- a/docs/api.md
+++ b/docs/api.md
@ -46,6 +46,7 @@ Advanced parameters (optional):
 - `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
 - `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
 - `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
 - `raw`: if `true` no formatting will be applied to the prompt and no context will be returned. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API, and are managing history yourself.
 ### Examples
@ -136,6 +137,36 @@ If `stream` is set to `false`, the response will be a single JSON object:
 }
 ```
 #### Request
 In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context.
 ```shell
 curl -X POST http://localhost:11434/api/generate -d '{
  "model": "mistral",
  "prompt": "[INST] why is the sky blue? [/INST]",
  "raw": true,
  "stream": false
 }'
 ```
 #### Response
 ```json
 {
  "model": "mistral",
  "created_at": "2023-11-03T15:36:02.583064Z",
  "response": " The sky appears blue because of a phenomenon called Rayleigh scattering.",
  "done": true,
  "total_duration": 14648695333,
  "load_duration": 3302671417,
  "prompt_eval_count": 14,
  "prompt_eval_duration": 286243000,
  "eval_count": 129,
  "eval_duration": 10931424000
 }
 ```
 ## Create a Model
 ```shell
--- a/server/routes.go
+++ b/server/routes.go
@ -158,9 +158,14 @@ func GenerateHandler(c *gin.Context) {
 		return
 	}
-	if req.Model == "" {
+	// validate the request
 	switch {
 	case req.Model == "":
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
 		return
 	case req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0):
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
 		return
 	}
 	model, err := GetModel(req.Model)
@ -189,11 +194,14 @@ func GenerateHandler(c *gin.Context) {
 	checkpointLoaded := time.Now()
-	prompt, err := model.Prompt(req)
+	prompt := req.Prompt
 	if !req.Raw {
 		prompt, err = model.Prompt(req)
 		if err != nil {
 			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 			return
 		}
 	}
 	ch := make(chan any)
 	go func() {
@ -215,6 +223,11 @@ func GenerateHandler(c *gin.Context) {
 				r.LoadDuration = checkpointLoaded.Sub(checkpointStart)
 			}
 			if req.Raw {
 				// in raw mode the client must manage history on their own
 				r.Context = nil
 			}
 			ch <- r
 		}