diff --git a/api/client.go b/api/client.go index 961cd417..4a5b97c9 100644 --- a/api/client.go +++ b/api/client.go @@ -17,6 +17,10 @@ import ( "github.com/jmorganca/ollama/version" ) +const DefaultHost = "127.0.0.1:11434" + +var envHost = os.Getenv("OLLAMA_HOST") + type Client struct { base *url.URL http http.Client @@ -143,7 +147,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f } request.Header.Set("Content-Type", "application/json") - request.Header.Set("Accept", "application/json") + request.Header.Set("Accept", "application/x-ndjson") request.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version())) response, err := c.http.Do(request) diff --git a/api/types.go b/api/types.go index c1bb1471..002db000 100644 --- a/api/types.go +++ b/api/types.go @@ -37,6 +37,7 @@ type GenerateRequest struct { System string `json:"system"` Template string `json:"template"` Context []int `json:"context,omitempty"` + Stream *bool `json:"stream,omitempty"` Options map[string]interface{} `json:"options"` } @@ -53,8 +54,9 @@ type EmbeddingResponse struct { } type CreateRequest struct { - Name string `json:"name"` - Path string `json:"path"` + Name string `json:"name"` + Path string `json:"path"` + Stream *bool `json:"stream,omitempty"` } type DeleteRequest struct { @@ -81,6 +83,9 @@ type CopyRequest struct { type PullRequest struct { Name string `json:"name"` Insecure bool `json:"insecure,omitempty"` + Username string `json:"username"` + Password string `json:"password"` + Stream *bool `json:"stream,omitempty"` } type ProgressResponse struct { @@ -93,6 +98,9 @@ type ProgressResponse struct { type PushRequest struct { Name string `json:"name"` Insecure bool `json:"insecure,omitempty"` + Username string `json:"username"` + Password string `json:"password"` + Stream *bool `json:"stream,omitempty"` } type ListResponse struct { @@ -113,7 +121,7 @@ type TokenResponse struct { type GenerateResponse struct { Model string `json:"model"` CreatedAt time.Time `json:"created_at"` - Response string `json:"response,omitempty"` + Response string `json:"response"` Done bool `json:"done"` Context []int `json:"context,omitempty"` diff --git a/docs/api.md b/docs/api.md index 59f17fe4..c51b56db 100644 --- a/docs/api.md +++ b/docs/api.md @@ -12,7 +12,6 @@ - [Push a Model](#push-a-model) - [Generate Embeddings](#generate-embeddings) - ## Conventions ### Model names @@ -40,12 +39,13 @@ Generate a response for a given prompt with a provided model. This is a streamin - `model`: (required) the [model name](#model-names) - `prompt`: the prompt to generate a response for -Advanced parameters: +Advanced parameters (optional): - `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature` - `system`: system prompt to (overrides what is defined in the `Modelfile`) - `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`) - `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory +- `stream`: if `false` the response will be be returned as a single response object, rather than a stream of objects ### Request @@ -80,6 +80,7 @@ The final response in the stream also includes additional data about the generat - `eval_count`: number of tokens the response - `eval_duration`: time in nanoseconds spent generating the response - `context`: an encoding of the conversation used in this response, this can be sent in the next request to keep a conversational memory +- `response`: empty if the response was streamed, if not streamed, this will contain the full response To calculate how fast the response is generated in tokens per second (token/s), divide `eval_count` / `eval_duration`. @@ -87,6 +88,7 @@ To calculate how fast the response is generated in tokens per second (token/s), { "model": "llama2:7b", "created_at": "2023-08-04T19:22:45.499127Z", + "response": "", "context": [1, 2, 3], "done": true, "total_duration": 5589157167, @@ -112,6 +114,7 @@ Create a model from a [`Modelfile`](./modelfile.md) - `name`: name of the model to create - `path`: path to the Modelfile +- `stream`: (optional) if `false` the response will be be returned as a single response object, rather than a stream of objects ### Request @@ -179,7 +182,7 @@ Show details about a model including modelfile, template, parameters, license, a ### Request -```shell +```shell curl http://localhost:11434/api/show -d '{ "name": "llama2:7b" }' @@ -189,10 +192,10 @@ curl http://localhost:11434/api/show -d '{ ```json { - "license": "", - "modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llama2:latest\n\nFROM /Users/username/.ollama/models/blobs/sha256:8daa9615cce30c259a9555b1cc250d461d1bc69980a274b44d7eda0be78076d8\nTEMPLATE \"\"\"[INST] {{ if and .First .System }}<>{{ .System }}<>\n\n{{ end }}{{ .Prompt }} [/INST] \"\"\"\nSYSTEM \"\"\"\"\"\"\nPARAMETER stop [INST]\nPARAMETER stop [/INST]\nPARAMETER stop <>\nPARAMETER stop <>\n", - "parameters": "stop [INST]\nstop [/INST]\nstop <>\nstop <>", - "template": "[INST] {{ if and .First .System }}<>{{ .System }}<>\n\n{{ end }}{{ .Prompt }} [/INST] " + "license": "", + "modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llama2:latest\n\nFROM /Users/username/.ollama/models/blobs/sha256:8daa9615cce30c259a9555b1cc250d461d1bc69980a274b44d7eda0be78076d8\nTEMPLATE \"\"\"[INST] {{ if and .First .System }}<>{{ .System }}<>\n\n{{ end }}{{ .Prompt }} [/INST] \"\"\"\nSYSTEM \"\"\"\"\"\"\nPARAMETER stop [INST]\nPARAMETER stop [/INST]\nPARAMETER stop <>\nPARAMETER stop <>\n", + "parameters": "stop [INST]\nstop [/INST]\nstop <>\nstop <>", + "template": "[INST] {{ if and .First .System }}<>{{ .System }}<>\n\n{{ end }}{{ .Prompt }} [/INST] " } ``` @@ -245,6 +248,7 @@ Download a model from the ollama library. Cancelled pulls are resumed from where - `name`: name of the model to pull - `insecure`: (optional) allow insecure connections to the library. Only use this if you are pulling from your own library during development. +- `stream`: (optional) if `false` the response will be be returned as a single response object, rather than a stream of objects ### Request @@ -275,7 +279,8 @@ Upload a model to a model library. Requires registering for ollama.ai and adding ### Parameters - `name`: name of the model to push in the form of `/:` -- `insecure`: (optional) allow insecure connections to the library. Only use this if you are pushing to your library during development. +- `insecure`: (optional) allow insecure connections to the library. Only use this if you are pushing to your library during development. +- `stream`: (optional) if `false` the response will be be returned as a single response object, rather than a stream of objects ### Request @@ -290,15 +295,16 @@ curl -X POST http://localhost:11434/api/push -d '{ Streaming response that starts with: ```json -{"status":"retrieving manifest"} +{ "status": "retrieving manifest" } ``` and then: ```json { -"status":"starting upload","digest":"sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab", -"total":1928429856 + "status": "starting upload", + "digest": "sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab", + "total": 1928429856 } ``` @@ -306,9 +312,10 @@ Then there is a series of uploading responses: ```json { -"status":"starting upload", -"digest":"sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab", -"total":1928429856} + "status": "starting upload", + "digest": "sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab", + "total": 1928429856 +} ``` Finally, when the upload is complete: diff --git a/server/routes.go b/server/routes.go index 3e5fa5e8..34cbb05e 100644 --- a/server/routes.go +++ b/server/routes.go @@ -240,6 +240,23 @@ func GenerateHandler(c *gin.Context) { } }() + if req.Stream != nil && !*req.Stream { + var response api.GenerateResponse + generated := "" + for resp := range ch { + if r, ok := resp.(api.GenerateResponse); ok { + generated += r.Response + response = r + } else { + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + } + response.Response = generated + c.JSON(http.StatusOK, response) + return + } + streamResponse(c, ch) } @@ -309,6 +326,11 @@ func PullModelHandler(c *gin.Context) { } }() + if req.Stream != nil && !*req.Stream { + waitForStream(c, ch) + return + } + streamResponse(c, ch) } @@ -336,6 +358,11 @@ func PushModelHandler(c *gin.Context) { } }() + if req.Stream != nil && !*req.Stream { + waitForStream(c, ch) + return + } + streamResponse(c, ch) } @@ -363,6 +390,11 @@ func CreateModelHandler(c *gin.Context) { } }() + if req.Stream != nil && !*req.Stream { + waitForStream(c, ch) + return + } + streamResponse(c, ch) } @@ -603,6 +635,31 @@ func Serve(ln net.Listener, allowOrigins []string) error { return s.Serve(ln) } +func waitForStream(c *gin.Context, ch chan interface{}) { + c.Header("Content-Type", "application/json") + for resp := range ch { + switch r := resp.(type) { + case api.ProgressResponse: + if r.Status == "success" { + c.JSON(http.StatusOK, r) + return + } + case gin.H: + if errorMsg, ok := r["error"].(string); ok { + c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg}) + return + } else { + c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in progress response"}) + return + } + default: + c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected progress response"}) + return + } + } + c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected end of progress response"}) +} + func streamResponse(c *gin.Context, ch chan any) { c.Header("Content-Type", "application/x-ndjson") c.Stream(func(w io.Writer) bool {