b9f5e16c80
* Initial Batch Embedding * Revert "Initial Batch Embedding" This reverts commit c22d54895a280b54c727279d85a5fc94defb5a29. * Initial Draft * mock up notes * api/embed draft * add server function * check normalization * clean up * normalization * playing around with truncate stuff * Truncation * Truncation * move normalization to go * Integration Test Template * Truncation Integration Tests * Clean up * use float32 * move normalize * move normalize test * refactoring * integration float32 * input handling and handler testing * Refactoring of legacy and new * clear comments * merge conflicts * touches * embedding type 64 * merge conflicts * fix hanging on single string * refactoring * test values * set context length * clean up * testing clean up * testing clean up * remove function closure * Revert "remove function closure" This reverts commit 55d48c6ed17abe42e7a122e69d603ef0c1506787. * remove function closure * remove redundant error check * clean up * more clean up * clean up
385 lines
11 KiB
Go
385 lines
11 KiB
Go
// Package api implements the client-side API for code wishing to interact
|
|
// with the ollama service. The methods of the [Client] type correspond to
|
|
// the ollama REST API as described in [the API documentation].
|
|
// The ollama command-line client itself uses this package to interact with
|
|
// the backend service.
|
|
//
|
|
// # Examples
|
|
//
|
|
// Several examples of using this package are available [in the GitHub
|
|
// repository].
|
|
//
|
|
// [the API documentation]: https://github.com/ollama/ollama/blob/main/docs/api.md
|
|
// [in the GitHub repository]: https://github.com/ollama/ollama/tree/main/examples
|
|
package api
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net"
|
|
"net/http"
|
|
"net/url"
|
|
"runtime"
|
|
|
|
"github.com/ollama/ollama/envconfig"
|
|
"github.com/ollama/ollama/format"
|
|
"github.com/ollama/ollama/version"
|
|
)
|
|
|
|
// Client encapsulates client state for interacting with the ollama
|
|
// service. Use [ClientFromEnvironment] to create new Clients.
|
|
type Client struct {
|
|
base *url.URL
|
|
http *http.Client
|
|
}
|
|
|
|
func checkError(resp *http.Response, body []byte) error {
|
|
if resp.StatusCode < http.StatusBadRequest {
|
|
return nil
|
|
}
|
|
|
|
apiError := StatusError{StatusCode: resp.StatusCode}
|
|
|
|
err := json.Unmarshal(body, &apiError)
|
|
if err != nil {
|
|
// Use the full body as the message if we fail to decode a response.
|
|
apiError.ErrorMessage = string(body)
|
|
}
|
|
|
|
return apiError
|
|
}
|
|
|
|
// ClientFromEnvironment creates a new [Client] using configuration from the
|
|
// environment variable OLLAMA_HOST, which points to the network host and
|
|
// port on which the ollama service is listenting. The format of this variable
|
|
// is:
|
|
//
|
|
// <scheme>://<host>:<port>
|
|
//
|
|
// If the variable is not specified, a default ollama host and port will be
|
|
// used.
|
|
func ClientFromEnvironment() (*Client, error) {
|
|
ollamaHost := envconfig.Host
|
|
|
|
return &Client{
|
|
base: &url.URL{
|
|
Scheme: ollamaHost.Scheme,
|
|
Host: net.JoinHostPort(ollamaHost.Host, ollamaHost.Port),
|
|
},
|
|
http: http.DefaultClient,
|
|
}, nil
|
|
}
|
|
|
|
func NewClient(base *url.URL, http *http.Client) *Client {
|
|
return &Client{
|
|
base: base,
|
|
http: http,
|
|
}
|
|
}
|
|
|
|
func (c *Client) do(ctx context.Context, method, path string, reqData, respData any) error {
|
|
var reqBody io.Reader
|
|
var data []byte
|
|
var err error
|
|
|
|
switch reqData := reqData.(type) {
|
|
case io.Reader:
|
|
// reqData is already an io.Reader
|
|
reqBody = reqData
|
|
case nil:
|
|
// noop
|
|
default:
|
|
data, err = json.Marshal(reqData)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
reqBody = bytes.NewReader(data)
|
|
}
|
|
|
|
requestURL := c.base.JoinPath(path)
|
|
request, err := http.NewRequestWithContext(ctx, method, requestURL.String(), reqBody)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
request.Header.Set("Content-Type", "application/json")
|
|
request.Header.Set("Accept", "application/json")
|
|
request.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))
|
|
|
|
respObj, err := c.http.Do(request)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer respObj.Body.Close()
|
|
|
|
respBody, err := io.ReadAll(respObj.Body)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := checkError(respObj, respBody); err != nil {
|
|
return err
|
|
}
|
|
|
|
if len(respBody) > 0 && respData != nil {
|
|
if err := json.Unmarshal(respBody, respData); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
const maxBufferSize = 512 * format.KiloByte
|
|
|
|
func (c *Client) stream(ctx context.Context, method, path string, data any, fn func([]byte) error) error {
|
|
var buf *bytes.Buffer
|
|
if data != nil {
|
|
bts, err := json.Marshal(data)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
buf = bytes.NewBuffer(bts)
|
|
}
|
|
|
|
requestURL := c.base.JoinPath(path)
|
|
request, err := http.NewRequestWithContext(ctx, method, requestURL.String(), buf)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
request.Header.Set("Content-Type", "application/json")
|
|
request.Header.Set("Accept", "application/x-ndjson")
|
|
request.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))
|
|
|
|
response, err := c.http.Do(request)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer response.Body.Close()
|
|
|
|
scanner := bufio.NewScanner(response.Body)
|
|
// increase the buffer size to avoid running out of space
|
|
scanBuf := make([]byte, 0, maxBufferSize)
|
|
scanner.Buffer(scanBuf, maxBufferSize)
|
|
for scanner.Scan() {
|
|
var errorResponse struct {
|
|
Error string `json:"error,omitempty"`
|
|
}
|
|
|
|
bts := scanner.Bytes()
|
|
if err := json.Unmarshal(bts, &errorResponse); err != nil {
|
|
return fmt.Errorf("unmarshal: %w", err)
|
|
}
|
|
|
|
if errorResponse.Error != "" {
|
|
return fmt.Errorf(errorResponse.Error)
|
|
}
|
|
|
|
if response.StatusCode >= http.StatusBadRequest {
|
|
return StatusError{
|
|
StatusCode: response.StatusCode,
|
|
Status: response.Status,
|
|
ErrorMessage: errorResponse.Error,
|
|
}
|
|
}
|
|
|
|
if err := fn(bts); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// GenerateResponseFunc is a function that [Client.Generate] invokes every time
|
|
// a response is received from the service. If this function returns an error,
|
|
// [Client.Generate] will stop generating and return this error.
|
|
type GenerateResponseFunc func(GenerateResponse) error
|
|
|
|
// Generate generates a response for a given prompt. The req parameter should
|
|
// be populated with prompt details. fn is called for each response (there may
|
|
// be multiple responses, e.g. in case streaming is enabled).
|
|
func (c *Client) Generate(ctx context.Context, req *GenerateRequest, fn GenerateResponseFunc) error {
|
|
return c.stream(ctx, http.MethodPost, "/api/generate", req, func(bts []byte) error {
|
|
var resp GenerateResponse
|
|
if err := json.Unmarshal(bts, &resp); err != nil {
|
|
return err
|
|
}
|
|
|
|
return fn(resp)
|
|
})
|
|
}
|
|
|
|
// ChatResponseFunc is a function that [Client.Chat] invokes every time
|
|
// a response is received from the service. If this function returns an error,
|
|
// [Client.Chat] will stop generating and return this error.
|
|
type ChatResponseFunc func(ChatResponse) error
|
|
|
|
// Chat generates the next message in a chat. [ChatRequest] may contain a
|
|
// sequence of messages which can be used to maintain chat history with a model.
|
|
// fn is called for each response (there may be multiple responses, e.g. if case
|
|
// streaming is enabled).
|
|
func (c *Client) Chat(ctx context.Context, req *ChatRequest, fn ChatResponseFunc) error {
|
|
return c.stream(ctx, http.MethodPost, "/api/chat", req, func(bts []byte) error {
|
|
var resp ChatResponse
|
|
if err := json.Unmarshal(bts, &resp); err != nil {
|
|
return err
|
|
}
|
|
|
|
return fn(resp)
|
|
})
|
|
}
|
|
|
|
// PullProgressFunc is a function that [Client.Pull] invokes every time there
|
|
// is progress with a "pull" request sent to the service. If this function
|
|
// returns an error, [Client.Pull] will stop the process and return this error.
|
|
type PullProgressFunc func(ProgressResponse) error
|
|
|
|
// Pull downloads a model from the ollama library. fn is called each time
|
|
// progress is made on the request and can be used to display a progress bar,
|
|
// etc.
|
|
func (c *Client) Pull(ctx context.Context, req *PullRequest, fn PullProgressFunc) error {
|
|
return c.stream(ctx, http.MethodPost, "/api/pull", req, func(bts []byte) error {
|
|
var resp ProgressResponse
|
|
if err := json.Unmarshal(bts, &resp); err != nil {
|
|
return err
|
|
}
|
|
|
|
return fn(resp)
|
|
})
|
|
}
|
|
|
|
// PushProgressFunc is a function that [Client.Push] invokes when progress is
|
|
// made.
|
|
// It's similar to other progress function types like [PullProgressFunc].
|
|
type PushProgressFunc func(ProgressResponse) error
|
|
|
|
// Push uploads a model to the model library; requires registering for ollama.ai
|
|
// and adding a public key first. fn is called each time progress is made on
|
|
// the request and can be used to display a progress bar, etc.
|
|
func (c *Client) Push(ctx context.Context, req *PushRequest, fn PushProgressFunc) error {
|
|
return c.stream(ctx, http.MethodPost, "/api/push", req, func(bts []byte) error {
|
|
var resp ProgressResponse
|
|
if err := json.Unmarshal(bts, &resp); err != nil {
|
|
return err
|
|
}
|
|
|
|
return fn(resp)
|
|
})
|
|
}
|
|
|
|
// CreateProgressFunc is a function that [Client.Create] invokes when progress
|
|
// is made.
|
|
// It's similar to other progress function types like [PullProgressFunc].
|
|
type CreateProgressFunc func(ProgressResponse) error
|
|
|
|
// Create creates a model from a [Modelfile]. fn is a progress function that
|
|
// behaves similarly to other methods (see [Client.Pull]).
|
|
//
|
|
// [Modelfile]: https://github.com/ollama/ollama/blob/main/docs/modelfile.md
|
|
func (c *Client) Create(ctx context.Context, req *CreateRequest, fn CreateProgressFunc) error {
|
|
return c.stream(ctx, http.MethodPost, "/api/create", req, func(bts []byte) error {
|
|
var resp ProgressResponse
|
|
if err := json.Unmarshal(bts, &resp); err != nil {
|
|
return err
|
|
}
|
|
|
|
return fn(resp)
|
|
})
|
|
}
|
|
|
|
// List lists models that are available locally.
|
|
func (c *Client) List(ctx context.Context) (*ListResponse, error) {
|
|
var lr ListResponse
|
|
if err := c.do(ctx, http.MethodGet, "/api/tags", nil, &lr); err != nil {
|
|
return nil, err
|
|
}
|
|
return &lr, nil
|
|
}
|
|
|
|
// List running models.
|
|
func (c *Client) ListRunning(ctx context.Context) (*ProcessResponse, error) {
|
|
var lr ProcessResponse
|
|
if err := c.do(ctx, http.MethodGet, "/api/ps", nil, &lr); err != nil {
|
|
return nil, err
|
|
}
|
|
return &lr, nil
|
|
}
|
|
|
|
// Copy copies a model - creating a model with another name from an existing
|
|
// model.
|
|
func (c *Client) Copy(ctx context.Context, req *CopyRequest) error {
|
|
if err := c.do(ctx, http.MethodPost, "/api/copy", req, nil); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Delete deletes a model and its data.
|
|
func (c *Client) Delete(ctx context.Context, req *DeleteRequest) error {
|
|
if err := c.do(ctx, http.MethodDelete, "/api/delete", req, nil); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Show obtains model information, including details, modelfile, license etc.
|
|
func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, error) {
|
|
var resp ShowResponse
|
|
if err := c.do(ctx, http.MethodPost, "/api/show", req, &resp); err != nil {
|
|
return nil, err
|
|
}
|
|
return &resp, nil
|
|
}
|
|
|
|
// Hearbeat checks if the server has started and is responsive; if yes, it
|
|
// returns nil, otherwise an error.
|
|
func (c *Client) Heartbeat(ctx context.Context) error {
|
|
if err := c.do(ctx, http.MethodHead, "/", nil, nil); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Embed generates embeddings from a model.
|
|
func (c *Client) Embed(ctx context.Context, req *EmbedRequest) (*EmbedResponse, error) {
|
|
var resp EmbedResponse
|
|
if err := c.do(ctx, http.MethodPost, "/api/embed", req, &resp); err != nil {
|
|
return nil, err
|
|
}
|
|
return &resp, nil
|
|
}
|
|
|
|
// Embeddings generates an embedding from a model.
|
|
func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*EmbeddingResponse, error) {
|
|
var resp EmbeddingResponse
|
|
if err := c.do(ctx, http.MethodPost, "/api/embeddings", req, &resp); err != nil {
|
|
return nil, err
|
|
}
|
|
return &resp, nil
|
|
}
|
|
|
|
// CreateBlob creates a blob from a file on the server. digest is the
|
|
// expected SHA256 digest of the file, and r represents the file.
|
|
func (c *Client) CreateBlob(ctx context.Context, digest string, r io.Reader) error {
|
|
return c.do(ctx, http.MethodPost, fmt.Sprintf("/api/blobs/%s", digest), r, nil)
|
|
}
|
|
|
|
// Version returns the Ollama server version as a string.
|
|
func (c *Client) Version(ctx context.Context) (string, error) {
|
|
var version struct {
|
|
Version string `json:"version"`
|
|
}
|
|
|
|
if err := c.do(ctx, http.MethodGet, "/api/version", nil, &version); err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return version.Version, nil
|
|
}
|