optional parameter to not stream response (#639)

* update streaming request accept header
* add optional stream param to request bodies
This commit is contained in:
Bruce MacDonald 2023-10-11 12:54:27 -04:00 committed by GitHub
parent 77295f716e
commit 274d5a5fdf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 94 additions and 18 deletions

View file

@ -17,6 +17,10 @@ import (
"github.com/jmorganca/ollama/version" "github.com/jmorganca/ollama/version"
) )
const DefaultHost = "127.0.0.1:11434"
var envHost = os.Getenv("OLLAMA_HOST")
type Client struct { type Client struct {
base *url.URL base *url.URL
http http.Client http http.Client
@ -143,7 +147,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
} }
request.Header.Set("Content-Type", "application/json") request.Header.Set("Content-Type", "application/json")
request.Header.Set("Accept", "application/json") request.Header.Set("Accept", "application/x-ndjson")
request.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version())) request.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))
response, err := c.http.Do(request) response, err := c.http.Do(request)

View file

@ -37,6 +37,7 @@ type GenerateRequest struct {
System string `json:"system"` System string `json:"system"`
Template string `json:"template"` Template string `json:"template"`
Context []int `json:"context,omitempty"` Context []int `json:"context,omitempty"`
Stream *bool `json:"stream,omitempty"`
Options map[string]interface{} `json:"options"` Options map[string]interface{} `json:"options"`
} }
@ -55,6 +56,7 @@ type EmbeddingResponse struct {
type CreateRequest struct { type CreateRequest struct {
Name string `json:"name"` Name string `json:"name"`
Path string `json:"path"` Path string `json:"path"`
Stream *bool `json:"stream,omitempty"`
} }
type DeleteRequest struct { type DeleteRequest struct {
@ -81,6 +83,9 @@ type CopyRequest struct {
type PullRequest struct { type PullRequest struct {
Name string `json:"name"` Name string `json:"name"`
Insecure bool `json:"insecure,omitempty"` Insecure bool `json:"insecure,omitempty"`
Username string `json:"username"`
Password string `json:"password"`
Stream *bool `json:"stream,omitempty"`
} }
type ProgressResponse struct { type ProgressResponse struct {
@ -93,6 +98,9 @@ type ProgressResponse struct {
type PushRequest struct { type PushRequest struct {
Name string `json:"name"` Name string `json:"name"`
Insecure bool `json:"insecure,omitempty"` Insecure bool `json:"insecure,omitempty"`
Username string `json:"username"`
Password string `json:"password"`
Stream *bool `json:"stream,omitempty"`
} }
type ListResponse struct { type ListResponse struct {
@ -113,7 +121,7 @@ type TokenResponse struct {
type GenerateResponse struct { type GenerateResponse struct {
Model string `json:"model"` Model string `json:"model"`
CreatedAt time.Time `json:"created_at"` CreatedAt time.Time `json:"created_at"`
Response string `json:"response,omitempty"` Response string `json:"response"`
Done bool `json:"done"` Done bool `json:"done"`
Context []int `json:"context,omitempty"` Context []int `json:"context,omitempty"`

View file

@ -12,7 +12,6 @@
- [Push a Model](#push-a-model) - [Push a Model](#push-a-model)
- [Generate Embeddings](#generate-embeddings) - [Generate Embeddings](#generate-embeddings)
## Conventions ## Conventions
### Model names ### Model names
@ -40,12 +39,13 @@ Generate a response for a given prompt with a provided model. This is a streamin
- `model`: (required) the [model name](#model-names) - `model`: (required) the [model name](#model-names)
- `prompt`: the prompt to generate a response for - `prompt`: the prompt to generate a response for
Advanced parameters: Advanced parameters (optional):
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature` - `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
- `system`: system prompt to (overrides what is defined in the `Modelfile`) - `system`: system prompt to (overrides what is defined in the `Modelfile`)
- `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`) - `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
- `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory - `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
- `stream`: if `false` the response will be be returned as a single response object, rather than a stream of objects
### Request ### Request
@ -80,6 +80,7 @@ The final response in the stream also includes additional data about the generat
- `eval_count`: number of tokens the response - `eval_count`: number of tokens the response
- `eval_duration`: time in nanoseconds spent generating the response - `eval_duration`: time in nanoseconds spent generating the response
- `context`: an encoding of the conversation used in this response, this can be sent in the next request to keep a conversational memory - `context`: an encoding of the conversation used in this response, this can be sent in the next request to keep a conversational memory
- `response`: empty if the response was streamed, if not streamed, this will contain the full response
To calculate how fast the response is generated in tokens per second (token/s), divide `eval_count` / `eval_duration`. To calculate how fast the response is generated in tokens per second (token/s), divide `eval_count` / `eval_duration`.
@ -87,6 +88,7 @@ To calculate how fast the response is generated in tokens per second (token/s),
{ {
"model": "llama2:7b", "model": "llama2:7b",
"created_at": "2023-08-04T19:22:45.499127Z", "created_at": "2023-08-04T19:22:45.499127Z",
"response": "",
"context": [1, 2, 3], "context": [1, 2, 3],
"done": true, "done": true,
"total_duration": 5589157167, "total_duration": 5589157167,
@ -112,6 +114,7 @@ Create a model from a [`Modelfile`](./modelfile.md)
- `name`: name of the model to create - `name`: name of the model to create
- `path`: path to the Modelfile - `path`: path to the Modelfile
- `stream`: (optional) if `false` the response will be be returned as a single response object, rather than a stream of objects
### Request ### Request
@ -245,6 +248,7 @@ Download a model from the ollama library. Cancelled pulls are resumed from where
- `name`: name of the model to pull - `name`: name of the model to pull
- `insecure`: (optional) allow insecure connections to the library. Only use this if you are pulling from your own library during development. - `insecure`: (optional) allow insecure connections to the library. Only use this if you are pulling from your own library during development.
- `stream`: (optional) if `false` the response will be be returned as a single response object, rather than a stream of objects
### Request ### Request
@ -276,6 +280,7 @@ Upload a model to a model library. Requires registering for ollama.ai and adding
- `name`: name of the model to push in the form of `<namespace>/<model>:<tag>` - `name`: name of the model to push in the form of `<namespace>/<model>:<tag>`
- `insecure`: (optional) allow insecure connections to the library. Only use this if you are pushing to your library during development. - `insecure`: (optional) allow insecure connections to the library. Only use this if you are pushing to your library during development.
- `stream`: (optional) if `false` the response will be be returned as a single response object, rather than a stream of objects
### Request ### Request
@ -297,7 +302,8 @@ and then:
```json ```json
{ {
"status":"starting upload","digest":"sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab", "status": "starting upload",
"digest": "sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab",
"total": 1928429856 "total": 1928429856
} }
``` ```
@ -308,7 +314,8 @@ Then there is a series of uploading responses:
{ {
"status": "starting upload", "status": "starting upload",
"digest": "sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab", "digest": "sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab",
"total":1928429856} "total": 1928429856
}
``` ```
Finally, when the upload is complete: Finally, when the upload is complete:

View file

@ -240,6 +240,23 @@ func GenerateHandler(c *gin.Context) {
} }
}() }()
if req.Stream != nil && !*req.Stream {
var response api.GenerateResponse
generated := ""
for resp := range ch {
if r, ok := resp.(api.GenerateResponse); ok {
generated += r.Response
response = r
} else {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
}
response.Response = generated
c.JSON(http.StatusOK, response)
return
}
streamResponse(c, ch) streamResponse(c, ch)
} }
@ -309,6 +326,11 @@ func PullModelHandler(c *gin.Context) {
} }
}() }()
if req.Stream != nil && !*req.Stream {
waitForStream(c, ch)
return
}
streamResponse(c, ch) streamResponse(c, ch)
} }
@ -336,6 +358,11 @@ func PushModelHandler(c *gin.Context) {
} }
}() }()
if req.Stream != nil && !*req.Stream {
waitForStream(c, ch)
return
}
streamResponse(c, ch) streamResponse(c, ch)
} }
@ -363,6 +390,11 @@ func CreateModelHandler(c *gin.Context) {
} }
}() }()
if req.Stream != nil && !*req.Stream {
waitForStream(c, ch)
return
}
streamResponse(c, ch) streamResponse(c, ch)
} }
@ -603,6 +635,31 @@ func Serve(ln net.Listener, allowOrigins []string) error {
return s.Serve(ln) return s.Serve(ln)
} }
func waitForStream(c *gin.Context, ch chan interface{}) {
c.Header("Content-Type", "application/json")
for resp := range ch {
switch r := resp.(type) {
case api.ProgressResponse:
if r.Status == "success" {
c.JSON(http.StatusOK, r)
return
}
case gin.H:
if errorMsg, ok := r["error"].(string); ok {
c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
return
} else {
c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in progress response"})
return
}
default:
c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected progress response"})
return
}
}
c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected end of progress response"})
}
func streamResponse(c *gin.Context, ch chan any) { func streamResponse(c *gin.Context, ch chan any) {
c.Header("Content-Type", "application/x-ndjson") c.Header("Content-Type", "application/x-ndjson")
c.Stream(func(w io.Writer) bool { c.Stream(func(w io.Writer) bool {