diff --git a/.dockerignore b/.dockerignore index a9253852..150c8f6e 100644 --- a/.dockerignore +++ b/.dockerignore @@ -6,3 +6,4 @@ scripts llm/llama.cpp/ggml llm/llama.cpp/gguf .env +.cache diff --git a/.gitignore b/.gitignore index 1e9ab3f4..feb68d6c 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ dist ollama ggml-metal.metal +.cache diff --git a/README.md b/README.md index 875adbd2..3ee49d8d 100644 --- a/README.md +++ b/README.md @@ -206,7 +206,7 @@ Ollama has a REST API for running and managing models. For example, to generate text from a model: ``` -curl -X POST http://localhost:11434/api/generate -d '{ +curl http://localhost:11434/api/generate -d '{ "model": "llama2", "prompt":"Why is the sky blue?" }' @@ -229,6 +229,7 @@ See the [API documentation](./docs/api.md) for all endpoints. - [Web UI](https://github.com/ollama-webui/ollama-webui) - [Ollamac](https://github.com/kevinhermawan/Ollamac) - [big-AGI](https://github.com/enricoros/big-agi/blob/main/docs/config-ollama.md) +- [Cheshire Cat assistant framework](https://github.com/cheshire-cat-ai/core) ### Terminal @@ -237,11 +238,13 @@ See the [API documentation](./docs/api.md) for all endpoints. - [Emacs client](https://github.com/zweifisch/ollama) - [gen.nvim](https://github.com/David-Kunz/gen.nvim) - [ollama.nvim](https://github.com/nomnivore/ollama.nvim) +- [ogpt.nvim](https://github.com/huynle/ogpt.nvim) - [gptel Emacs client](https://github.com/karthink/gptel) ### Libraries - [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa) +- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example) - [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html) - [LiteLLM](https://github.com/BerriAI/litellm) - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp) @@ -250,6 +253,11 @@ See the [API documentation](./docs/api.md) for all endpoints. - [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama) - [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit) - [Ollama for Dart](https://github.com/breitburg/dart-ollama) +- [Ollama for Laravel](https://github.com/cloudstudio/ollama-laravel) + +### Mobile + +- [Maid](https://github.com/danemadsen/Maid) (Mobile Artificial Intelligence Distribution) ### Extensions & Plugins @@ -261,3 +269,4 @@ See the [API documentation](./docs/api.md) for all endpoints. - [Dagger Chatbot](https://github.com/samalba/dagger-chatbot) - [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot) - [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation) +- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama) diff --git a/api/client.go b/api/client.go index 974c08eb..44af222c 100644 --- a/api/client.go +++ b/api/client.go @@ -5,6 +5,7 @@ import ( "bytes" "context" "encoding/json" + "errors" "fmt" "io" "net" @@ -95,11 +96,19 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData var reqBody io.Reader var data []byte var err error - if reqData != nil { + + switch reqData := reqData.(type) { + case io.Reader: + // reqData is already an io.Reader + reqBody = reqData + case nil: + // noop + default: data, err = json.Marshal(reqData) if err != nil { return err } + reqBody = bytes.NewReader(data) } @@ -287,3 +296,18 @@ func (c *Client) Heartbeat(ctx context.Context) error { } return nil } + +func (c *Client) CreateBlob(ctx context.Context, digest string, r io.Reader) error { + if err := c.do(ctx, http.MethodHead, fmt.Sprintf("/api/blobs/%s", digest), nil, nil); err != nil { + var statusError StatusError + if !errors.As(err, &statusError) || statusError.StatusCode != http.StatusNotFound { + return err + } + + if err := c.do(ctx, http.MethodPost, fmt.Sprintf("/api/blobs/%s", digest), r, nil); err != nil { + return err + } + } + + return nil +} diff --git a/api/types.go b/api/types.go index ffa5b7ca..2a36a1f6 100644 --- a/api/types.go +++ b/api/types.go @@ -99,9 +99,10 @@ type EmbeddingResponse struct { } type CreateRequest struct { - Name string `json:"name"` - Path string `json:"path"` - Stream *bool `json:"stream,omitempty"` + Name string `json:"name"` + Path string `json:"path"` + Modelfile string `json:"modelfile"` + Stream *bool `json:"stream,omitempty"` } type DeleteRequest struct { diff --git a/cmd/cmd.go b/cmd/cmd.go index d7839558..56d3471c 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -1,9 +1,11 @@ package cmd import ( + "bytes" "context" "crypto/ed25519" "crypto/rand" + "crypto/sha256" "encoding/pem" "errors" "fmt" @@ -20,7 +22,6 @@ import ( "syscall" "time" - "github.com/dustin/go-humanize" "github.com/olekukonko/tablewriter" "github.com/spf13/cobra" "golang.org/x/crypto/ssh" @@ -28,7 +29,8 @@ import ( "github.com/jmorganca/ollama/api" "github.com/jmorganca/ollama/format" - "github.com/jmorganca/ollama/progressbar" + "github.com/jmorganca/ollama/parser" + "github.com/jmorganca/ollama/progress" "github.com/jmorganca/ollama/readline" "github.com/jmorganca/ollama/server" "github.com/jmorganca/ollama/version" @@ -46,49 +48,95 @@ func CreateHandler(cmd *cobra.Command, args []string) error { return err } - var spinner *Spinner + p := progress.NewProgress(os.Stderr) + defer p.Stop() - var currentDigest string - var bar *progressbar.ProgressBar + bars := make(map[string]*progress.Bar) + + modelfile, err := os.ReadFile(filename) + if err != nil { + return err + } + + commands, err := parser.Parse(bytes.NewReader(modelfile)) + if err != nil { + return err + } + + home, err := os.UserHomeDir() + if err != nil { + return err + } + + status := "transferring model data" + spinner := progress.NewSpinner(status) + p.Add(status, spinner) + + for _, c := range commands { + switch c.Name { + case "model", "adapter": + path := c.Args + if path == "~" { + path = home + } else if strings.HasPrefix(path, "~/") { + path = filepath.Join(home, path[2:]) + } + + if !filepath.IsAbs(path) { + path = filepath.Join(filepath.Dir(filename), path) + } + + bin, err := os.Open(path) + if errors.Is(err, os.ErrNotExist) && c.Name == "model" { + continue + } else if err != nil { + return err + } + defer bin.Close() + + hash := sha256.New() + if _, err := io.Copy(hash, bin); err != nil { + return err + } + bin.Seek(0, io.SeekStart) + + digest := fmt.Sprintf("sha256:%x", hash.Sum(nil)) + if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil { + return err + } + + modelfile = bytes.ReplaceAll(modelfile, []byte(c.Args), []byte("@"+digest)) + } + } - request := api.CreateRequest{Name: args[0], Path: filename} fn := func(resp api.ProgressResponse) error { - if resp.Digest != currentDigest && resp.Digest != "" { - if spinner != nil { - spinner.Stop() + if resp.Digest != "" { + spinner.Stop() + + bar, ok := bars[resp.Digest] + if !ok { + bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed) + bars[resp.Digest] = bar + p.Add(resp.Digest, bar) } - currentDigest = resp.Digest - // pulling - bar = progressbar.DefaultBytes( - resp.Total, - resp.Status, - ) - bar.Set64(resp.Completed) - } else if resp.Digest == currentDigest && resp.Digest != "" { - bar.Set64(resp.Completed) - } else { - currentDigest = "" - if spinner != nil { - spinner.Stop() - } - spinner = NewSpinner(resp.Status) - go spinner.Spin(100 * time.Millisecond) + + bar.Set(resp.Completed) + } else if status != resp.Status { + spinner.Stop() + + status = resp.Status + spinner = progress.NewSpinner(status) + p.Add(status, spinner) } return nil } + request := api.CreateRequest{Name: args[0], Modelfile: string(modelfile)} if err := client.Create(context.Background(), &request, fn); err != nil { return err } - if spinner != nil { - spinner.Stop() - if spinner.description != "success" { - return errors.New("unexpected end to create model") - } - } - return nil } @@ -125,36 +173,46 @@ func PushHandler(cmd *cobra.Command, args []string) error { return err } - var currentDigest string - var bar *progressbar.ProgressBar + p := progress.NewProgress(os.Stderr) + defer p.Stop() + + bars := make(map[string]*progress.Bar) + var status string + var spinner *progress.Spinner - request := api.PushRequest{Name: args[0], Insecure: insecure} fn := func(resp api.ProgressResponse) error { - if resp.Digest != currentDigest && resp.Digest != "" { - currentDigest = resp.Digest - bar = progressbar.DefaultBytes( - resp.Total, - fmt.Sprintf("pushing %s...", resp.Digest[7:19]), - ) + if resp.Digest != "" { + if spinner != nil { + spinner.Stop() + } - bar.Set64(resp.Completed) - } else if resp.Digest == currentDigest && resp.Digest != "" { - bar.Set64(resp.Completed) - } else { - currentDigest = "" - fmt.Println(resp.Status) + bar, ok := bars[resp.Digest] + if !ok { + bar = progress.NewBar(fmt.Sprintf("pushing %s...", resp.Digest[7:19]), resp.Total, resp.Completed) + bars[resp.Digest] = bar + p.Add(resp.Digest, bar) + } + + bar.Set(resp.Completed) + } else if status != resp.Status { + if spinner != nil { + spinner.Stop() + } + + status = resp.Status + spinner = progress.NewSpinner(status) + p.Add(status, spinner) } + return nil } + request := api.PushRequest{Name: args[0], Insecure: insecure} if err := client.Push(context.Background(), &request, fn); err != nil { return err } - if bar != nil && !bar.IsFinished() { - return errors.New("unexpected end to push model") - } - + spinner.Stop() return nil } @@ -173,7 +231,7 @@ func ListHandler(cmd *cobra.Command, args []string) error { for _, m := range models.Models { if len(args) == 0 || strings.HasPrefix(m.Name, args[0]) { - data = append(data, []string{m.Name, m.Digest[:12], humanize.Bytes(uint64(m.Size)), format.HumanTime(m.ModifiedAt, "Never")}) + data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), format.HumanTime(m.ModifiedAt, "Never")}) } } @@ -305,46 +363,51 @@ func PullHandler(cmd *cobra.Command, args []string) error { return err } - return pull(args[0], insecure) -} - -func pull(model string, insecure bool) error { client, err := api.ClientFromEnvironment() if err != nil { return err } - var currentDigest string - var bar *progressbar.ProgressBar + p := progress.NewProgress(os.Stderr) + defer p.Stop() + + bars := make(map[string]*progress.Bar) + + var status string + var spinner *progress.Spinner - request := api.PullRequest{Name: model, Insecure: insecure} fn := func(resp api.ProgressResponse) error { - if resp.Digest != currentDigest && resp.Digest != "" { - currentDigest = resp.Digest - bar = progressbar.DefaultBytes( - resp.Total, - fmt.Sprintf("pulling %s...", resp.Digest[7:19]), - ) + if resp.Digest != "" { + if spinner != nil { + spinner.Stop() + } - bar.Set64(resp.Completed) - } else if resp.Digest == currentDigest && resp.Digest != "" { - bar.Set64(resp.Completed) - } else { - currentDigest = "" - fmt.Println(resp.Status) + bar, ok := bars[resp.Digest] + if !ok { + bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed) + bars[resp.Digest] = bar + p.Add(resp.Digest, bar) + } + + bar.Set(resp.Completed) + } else if status != resp.Status { + if spinner != nil { + spinner.Stop() + } + + status = resp.Status + spinner = progress.NewSpinner(status) + p.Add(status, spinner) } return nil } + request := api.PullRequest{Name: args[0], Insecure: insecure} if err := client.Pull(context.Background(), &request, fn); err != nil { return err } - if bar != nil && !bar.IsFinished() { - return errors.New("unexpected end to pull model") - } - return nil } @@ -397,8 +460,11 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st return err } - spinner := NewSpinner("") - go spinner.Spin(60 * time.Millisecond) + p := progress.NewProgress(os.Stderr) + defer p.StopAndClear() + + spinner := progress.NewSpinner("") + p.Add("", spinner) var latest api.GenerateResponse @@ -430,9 +496,7 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st request := api.GenerateRequest{Model: model, Prompt: prompt, Context: generateContext, Format: format} fn := func(response api.GenerateResponse) error { - if !spinner.IsFinished() { - spinner.Finish() - } + p.StopAndClear() latest = response @@ -466,7 +530,6 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st if err := client.Generate(cancelCtx, &request, fn); err != nil { if strings.Contains(err.Error(), "context canceled") && abort { - spinner.Finish() return nil } return err diff --git a/cmd/spinner.go b/cmd/spinner.go deleted file mode 100644 index 53751c74..00000000 --- a/cmd/spinner.go +++ /dev/null @@ -1,44 +0,0 @@ -package cmd - -import ( - "fmt" - "os" - "time" - - "github.com/jmorganca/ollama/progressbar" -) - -type Spinner struct { - description string - *progressbar.ProgressBar -} - -func NewSpinner(description string) *Spinner { - return &Spinner{ - description: description, - ProgressBar: progressbar.NewOptions(-1, - progressbar.OptionSetWriter(os.Stderr), - progressbar.OptionThrottle(60*time.Millisecond), - progressbar.OptionSpinnerType(14), - progressbar.OptionSetRenderBlankState(true), - progressbar.OptionSetElapsedTime(false), - progressbar.OptionClearOnFinish(), - progressbar.OptionSetDescription(description), - ), - } -} - -func (s *Spinner) Spin(tick time.Duration) { - for range time.Tick(tick) { - if s.IsFinished() { - break - } - - s.Add(1) - } -} - -func (s *Spinner) Stop() { - s.Finish() - fmt.Println(s.description) -} diff --git a/docs/api.md b/docs/api.md index 08402266..99378ac3 100644 --- a/docs/api.md +++ b/docs/api.md @@ -51,14 +51,16 @@ Advanced parameters (optional): ### JSON mode -Enable JSON mode by setting the `format` parameter to `json` and specifying the model should use JSON in the `prompt`. This will structure the response as valid JSON. See the JSON mode [example](#request-json-mode) below. +Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as valid JSON. See the JSON mode [example](#request-json-mode) below. + +> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace. ### Examples #### Request ```shell -curl -X POST http://localhost:11434/api/generate -d '{ +curl http://localhost:11434/api/generate -d '{ "model": "llama2", "prompt": "Why is the sky blue?" }' @@ -113,8 +115,8 @@ To calculate how fast the response is generated in tokens per second (token/s), #### Request (No streaming) ```shell -curl -X POST http://localhost:11434/api/generate -d '{ - "model": "llama2:7b", +curl http://localhost:11434/api/generate -d '{ + "model": "llama2", "prompt": "Why is the sky blue?", "stream": false }' @@ -126,7 +128,7 @@ If `stream` is set to `false`, the response will be a single JSON object: ```json { - "model": "llama2:7b", + "model": "llama2", "created_at": "2023-08-04T19:22:45.499127Z", "response": "The sky is blue because it is the color of the sky.", "context": [1, 2, 3], @@ -147,7 +149,7 @@ If `stream` is set to `false`, the response will be a single JSON object: In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context. ```shell -curl -X POST http://localhost:11434/api/generate -d '{ +curl http://localhost:11434/api/generate -d '{ "model": "mistral", "prompt": "[INST] why is the sky blue? [/INST]", "raw": true, @@ -175,7 +177,7 @@ curl -X POST http://localhost:11434/api/generate -d '{ #### Request (JSON mode) ```shell -curl -X POST http://localhost:11434/api/generate -d '{ +curl http://localhost:11434/api/generate -d '{ "model": "llama2", "prompt": "What color is the sky at different times of the day? Respond using JSON", "format": "json", @@ -224,8 +226,8 @@ The value of `response` will be a string containing JSON similar to: If you want to set custom options for the model at runtime rather than in the Modelfile, you can do so with the `options` parameter. This example sets every available option, but you can set any of them individually and omit the ones you do not want to override. ```shell -curl -X POST http://localhost:11434/api/generate -d '{ - "model": "llama2:7b", +curl http://localhost:11434/api/generate -d '{ + "model": "llama2", "prompt": "Why is the sky blue?", "stream": false, "options": { @@ -270,7 +272,7 @@ curl -X POST http://localhost:11434/api/generate -d '{ ```json { - "model": "llama2:7b", + "model": "llama2", "created_at": "2023-08-04T19:22:45.499127Z", "response": "The sky is blue because it is the color of the sky.", "context": [1, 2, 3], @@ -292,22 +294,23 @@ curl -X POST http://localhost:11434/api/generate -d '{ POST /api/create ``` -Create a model from a [`Modelfile`](./modelfile.md) +Create a model from a [`Modelfile`](./modelfile.md). It is recommended to set `modelfile` to the content of the Modelfile rather than just set `path`. This is a requirement for remote create. Remote model creation should also create any file blobs, fields such as `FROM` and `ADAPTER`, explicitly with the server using [Create a Blob](#create-a-blob) and the value to the path indicated in the response. ### Parameters - `name`: name of the model to create -- `path`: path to the Modelfile +- `modelfile`: contents of the Modelfile - `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects +- `path` (deprecated): path to the Modelfile ### Examples #### Request ```shell -curl -X POST http://localhost:11434/api/create -d '{ +curl http://localhost:11434/api/create -d '{ "name": "mario", - "path": "~/Modelfile" + "modelfile": "FROM llama2\nSYSTEM You are mario from Super Mario Bros." }' ``` @@ -321,6 +324,54 @@ A stream of JSON objects. When finished, `status` is `success`. } ``` +### Check if a Blob Exists + +```shell +HEAD /api/blobs/:digest +``` + +Check if a blob is known to the server. + +#### Query Parameters + +- `digest`: the SHA256 digest of the blob + +#### Examples + +##### Request + +```shell +curl -I http://localhost:11434/api/blobs/sha256:29fdb92e57cf0827ded04ae6461b5931d01fa595843f55d36f5b275a52087dd2 +``` + +##### Response + +Return 200 OK if the blob exists, 404 Not Found if it does not. + +### Create a Blob + +```shell +POST /api/blobs/:digest +``` + +Create a blob from a file. Returns the server file path. + +#### Query Parameters + +- `digest`: the expected SHA256 digest of the file + +#### Examples + +##### Request + +```shell +curl -T model.bin -X POST http://localhost:11434/api/blobs/sha256:29fdb92e57cf0827ded04ae6461b5931d01fa595843f55d36f5b275a52087dd2 +``` + +##### Response + +Return 201 Created if the blob was successfully created. + ## List Local Models ```shell @@ -345,7 +396,7 @@ A single JSON object will be returned. { "models": [ { - "name": "llama2:7b", + "name": "llama2", "modified_at": "2023-08-02T17:02:23.713454393-07:00", "size": 3791730596 }, @@ -376,7 +427,7 @@ Show details about a model including modelfile, template, parameters, license, a ```shell curl http://localhost:11434/api/show -d '{ - "name": "llama2:7b" + "name": "llama2" }' ``` @@ -405,7 +456,7 @@ Copy a model. Creates a model with another name from an existing model. ```shell curl http://localhost:11434/api/copy -d '{ - "source": "llama2:7b", + "source": "llama2", "destination": "llama2-backup" }' ``` @@ -459,8 +510,8 @@ Download a model from the ollama library. Cancelled pulls are resumed from where #### Request ```shell -curl -X POST http://localhost:11434/api/pull -d '{ - "name": "llama2:7b" +curl http://localhost:11434/api/pull -d '{ + "name": "llama2" }' ``` @@ -531,7 +582,7 @@ Upload a model to a model library. Requires registering for ollama.ai and adding #### Request ```shell -curl -X POST http://localhost:11434/api/push -d '{ +curl http://localhost:11434/api/push -d '{ "name": "mattw/pygmalion:latest" }' ``` @@ -599,8 +650,8 @@ Advanced parameters: #### Request ```shell -curl -X POST http://localhost:11434/api/embeddings -d '{ - "model": "llama2:7b", +curl http://localhost:11434/api/embeddings -d '{ + "model": "llama2", "prompt": "Here is an article about llamas..." }' ``` diff --git a/docs/faq.md b/docs/faq.md index 915d0cc0..ce32beb2 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -32,11 +32,11 @@ Create a `systemd` drop-in directory and set `Environment=OLLAMA_HOST` ```bash mkdir -p /etc/systemd/system/ollama.service.d -echo "[Service]" >>/etc/systemd/system/ollama.service.d/environment.conf +echo '[Service]' >>/etc/systemd/system/ollama.service.d/environment.conf ``` ```bash -echo "Environment=OLLAMA_HOST=0.0.0.0:11434" >>/etc/systemd/system/ollama.service.d/environment.conf +echo 'Environment="OLLAMA_HOST=0.0.0.0:11434"' >>/etc/systemd/system/ollama.service.d/environment.conf ``` Reload `systemd` and restart Ollama: @@ -59,7 +59,7 @@ OLLAMA_ORIGINS=http://192.168.1.1:*,https://example.com ollama serve On Linux: ```bash -echo "Environment=OLLAMA_ORIGINS=http://129.168.1.1:*,https://example.com" >>/etc/systemd/system/ollama.service.d/environment.conf +echo 'Environment="OLLAMA_ORIGINS=http://129.168.1.1:*,https://example.com"' >>/etc/systemd/system/ollama.service.d/environment.conf ``` Reload `systemd` and restart Ollama: @@ -74,8 +74,6 @@ systemctl restart ollama - macOS: Raw model data is stored under `~/.ollama/models`. - Linux: Raw model data is stored under `/usr/share/ollama/.ollama/models` - - Below the models directory you will find a structure similar to the following: ```shell @@ -96,3 +94,63 @@ The manifest lists all the layers used in this model. You will see a `media type ### How can I change where Ollama stores models? To modify where models are stored, you can use the `OLLAMA_MODELS` environment variable. Note that on Linux this means defining `OLLAMA_MODELS` in a drop-in `/etc/systemd/system/ollama.service.d` service file, reloading systemd, and restarting the ollama service. + +## Does Ollama send my prompts and answers back to Ollama.ai to use in any way? + +No. Anything you do with Ollama, such as generate a response from the model, stays with you. We don't collect any data about how you use the model. You are always in control of your own data. + +## How can I use Ollama in Visual Studio Code? + +There is already a large collection of plugins available for VSCode as well as other editors that leverage Ollama. You can see the list of [extensions & plugins](https://github.com/jmorganca/ollama#extensions--plugins) at the bottom of the main repository readme. + +## How do I use Ollama behind a proxy? + +Ollama is compatible with proxy servers if `HTTP_PROXY` or `HTTPS_PROXY` are configured. When using either variables, ensure it is set where `ollama serve` can access the values. + +When using `HTTPS_PROXY`, ensure the proxy certificate is installed as a system certificate. + +On macOS: + +```bash +HTTPS_PROXY=http://proxy.example.com ollama serve +``` + +On Linux: + +```bash +echo 'Environment="HTTPS_PROXY=https://proxy.example.com"' >>/etc/systemd/system/ollama.service.d/environment.conf +``` + +Reload `systemd` and restart Ollama: + +```bash +systemctl daemon-reload +systemctl restart ollama +``` + +### How do I use Ollama behind a proxy in Docker? + +The Ollama Docker container image can be configured to use a proxy by passing `-e HTTPS_PROXY=https://proxy.example.com` when starting the container. + +Alternatively, Docker daemon can be configured to use a proxy. Instructions are available for Docker Desktop on [macOS](https://docs.docker.com/desktop/settings/mac/#proxies), [Windows](https://docs.docker.com/desktop/settings/windows/#proxies), and [Linux](https://docs.docker.com/desktop/settings/linux/#proxies), and Docker [daemon with systemd](https://docs.docker.com/config/daemon/systemd/#httphttps-proxy). + +Ensure the certificate is installed as a system certificate when using HTTPS. This may require a new Docker image when using a self-signed certificate. + +```dockerfile +FROM ollama/ollama +COPY my-ca.pem /usr/local/share/ca-certificates/my-ca.crt +RUN update-ca-certificate +``` + +Build and run this image: + +```shell +docker build -t ollama-with-ca . +docker run -d -e HTTPS_PROXY=https://my.proxy.example.com -p 11434:11434 ollama-with-ca +``` + +## How do I use Ollama with GPU acceleration in Docker? + +The Ollama Docker container can be configured with GPU acceleration in Linux or Windows (with WSL2). This requires the [nvidia-container-toolkit](https://github.com/NVIDIA/nvidia-container-toolkit). See [ollama/ollama](https://hub.docker.com/r/ollama/ollama) for more details. + +GPU acceleration is not available for Docker Desktop in macOS due to the lack of GPU passthrough and emulation. diff --git a/docs/modelfile.md b/docs/modelfile.md index 47386b67..6f1d8e88 100644 --- a/docs/modelfile.md +++ b/docs/modelfile.md @@ -41,6 +41,8 @@ INSTRUCTION arguments ## Examples +### Basic `Modelfile` + An example of a `Modelfile` creating a mario blueprint: ```modelfile @@ -63,6 +65,35 @@ To use this: More examples are available in the [examples directory](../examples). +### `Modelfile`s in [ollama.ai/library][1] + +There are two ways to view `Modelfile`s underlying the models in [ollama.ai/library][1]: + +- Option 1: view a details page from a model's tags page: + 1. Go to a particular model's tags (e.g. https://ollama.ai/library/llama2/tags) + 2. Click on a tag (e.g. https://ollama.ai/library/llama2:13b) + 3. Scroll down to "Layers" + - Note: if the [`FROM` instruction](#from-required) is not present, + it means the model was created from a local file +- Option 2: use `ollama show` to print the `Modelfile` like so: + + ```bash + > ollama show --modelfile llama2:13b + # Modelfile generated by "ollama show" + # To build a new Modelfile based on this one, replace the FROM line with: + # FROM llama2:13b + + FROM /root/.ollama/models/blobs/sha256:123abc + TEMPLATE """[INST] {{ if and .First .System }}<>{{ .System }}<> + + {{ end }}{{ .Prompt }} [/INST] """ + SYSTEM """""" + PARAMETER stop [INST] + PARAMETER stop [/INST] + PARAMETER stop <> + PARAMETER stop <> + ``` + ## Instructions ### FROM (Required) @@ -177,3 +208,5 @@ LICENSE """ - the **`Modelfile` is not case sensitive**. In the examples, we use uppercase for instructions to make it easier to distinguish it from arguments. - Instructions can be in any order. In the examples, we start with FROM instruction to keep it easily readable. + +[1]: https://ollama.ai/library diff --git a/docs/tutorials.md b/docs/tutorials.md index bf8adf8d..0f520c95 100644 --- a/docs/tutorials.md +++ b/docs/tutorials.md @@ -4,5 +4,6 @@ Here is a list of ways you can use Ollama with other tools to build interesting - [Using LangChain with Ollama in JavaScript](./tutorials/langchainjs.md) - [Using LangChain with Ollama in Python](./tutorials/langchainpy.md) +- [Running Ollama on NVIDIA Jetson Devices](./tutorials/nvidia-jetson.md) -Also be sure to check out the [examples](../examples) directory for more ways to use Ollama. \ No newline at end of file +Also be sure to check out the [examples](../examples) directory for more ways to use Ollama. diff --git a/docs/tutorials/nvidia-jetson.md b/docs/tutorials/nvidia-jetson.md new file mode 100644 index 00000000..85cf741c --- /dev/null +++ b/docs/tutorials/nvidia-jetson.md @@ -0,0 +1,38 @@ +# Running Ollama on NVIDIA Jetson Devices + +With some minor configuration, Ollama runs well on [NVIDIA Jetson Devices](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/). The following has been tested on [JetPack 5.1.2](https://developer.nvidia.com/embedded/jetpack). + +NVIDIA Jetson devices are Linux-based embedded AI computers that are purpose-built for AI applications. + +Jetsons have an integrated GPU that is wired directly to the memory controller of the machine. For this reason, the `nvidia-smi` command is unrecognized, and Ollama proceeds to operate in "CPU only" +mode. This can be verified by using a monitoring tool like jtop. + +In order to address this, we simply pass the path to the Jetson's pre-installed CUDA libraries into `ollama serve` (while in a tmux session). We then hardcode the num_gpu parameters into a cloned +version of our target model. + +Prerequisites: + +- curl +- tmux + +Here are the steps: + +- Install Ollama via standard Linux command (ignore the 404 error): `curl https://ollama.ai/install.sh | sh` +- Stop the Ollama service: `sudo systemctl stop ollama` +- Start Ollama serve in a tmux session called ollama_jetson and reference the CUDA libraries path: `tmux has-session -t ollama_jetson 2>/dev/null || tmux new-session -d -s ollama_jetson +'LD_LIBRARY_PATH=/usr/local/cuda/lib64 ollama serve'` +- Pull the model you want to use (e.g. mistral): `ollama pull mistral` +- Create a new Modelfile specifically for enabling GPU support on the Jetson: `touch ModelfileMistralJetson` +- In the ModelfileMistralJetson file, specify the FROM model and the num_gpu PARAMETER as shown below: + +``` +FROM mistral +PARAMETER num_gpu 999 +``` + +- Create a new model from your Modelfile: `ollama create mistral-jetson -f ./ModelfileMistralJetson` +- Run the new model: `ollama run mistral-jetson` + +If you run a monitoring tool like jtop you should now see that Ollama is using the Jetson's integrated GPU. + +And that's it! diff --git a/examples/jupyter-notebook/README.md b/examples/jupyter-notebook/README.md new file mode 100644 index 00000000..fba6802f --- /dev/null +++ b/examples/jupyter-notebook/README.md @@ -0,0 +1,5 @@ +# Ollama Jupyter Notebook + +This example downloads and installs Ollama in a Jupyter instance such as Google Colab. It will start the Ollama service and expose an endpoint using `ngrok` which can be used to communicate with the Ollama instance remotely. + +For best results, use an instance with GPU accelerator. diff --git a/examples/jupyter-notebook/ollama.ipynb b/examples/jupyter-notebook/ollama.ipynb new file mode 100644 index 00000000..d57e2057 --- /dev/null +++ b/examples/jupyter-notebook/ollama.ipynb @@ -0,0 +1,102 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "93f59dcb-c588-41b8-a792-55d88ade739c", + "metadata": {}, + "outputs": [], + "source": [ + "# Download and run the Ollama Linux install script\n", + "!curl https://ollama.ai/install.sh | sh\n", + "!command -v systemctl >/dev/null && sudo systemctl stop ollama" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "658c147e-c7f8-490e-910e-62b80f577dda", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install aiohttp pyngrok\n", + "\n", + "import os\n", + "import asyncio\n", + "from aiohttp import ClientSession\n", + "\n", + "# Set LD_LIBRARY_PATH so the system NVIDIA library becomes preferred\n", + "# over the built-in library. This is particularly important for \n", + "# Google Colab which installs older drivers\n", + "os.environ.update({'LD_LIBRARY_PATH': '/usr/lib64-nvidia'})\n", + "\n", + "async def run(cmd):\n", + " '''\n", + " run is a helper function to run subcommands asynchronously.\n", + " '''\n", + " print('>>> starting', *cmd)\n", + " p = await asyncio.subprocess.create_subprocess_exec(\n", + " *cmd,\n", + " stdout=asyncio.subprocess.PIPE,\n", + " stderr=asyncio.subprocess.PIPE,\n", + " )\n", + "\n", + " async def pipe(lines):\n", + " async for line in lines:\n", + " print(line.strip().decode('utf-8'))\n", + "\n", + " await asyncio.gather(\n", + " pipe(p.stdout),\n", + " pipe(p.stderr),\n", + " )\n", + "\n", + "\n", + "await asyncio.gather(\n", + " run(['ollama', 'serve']),\n", + " run(['ngrok', 'http', '--log', 'stderr', '11434']),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "e7735a55-9aad-4caf-8683-52e2163ba53b", + "metadata": {}, + "source": [ + "The previous cell starts two processes, `ollama` and `ngrok`. The log output will show a line like the following which describes the external address.\n", + "\n", + "```\n", + "t=2023-11-12T22:55:56+0000 lvl=info msg=\"started tunnel\" obj=tunnels name=command_line addr=http://localhost:11434 url=https://8249-34-125-179-11.ngrok.io\n", + "```\n", + "\n", + "The external address in this case is `https://8249-34-125-179-11.ngrok.io` which can be passed into `OLLAMA_HOST` to access this instance.\n", + "\n", + "```bash\n", + "export OLLAMA_HOST=https://8249-34-125-179-11.ngrok.io\n", + "ollama list\n", + "ollama run mistral\n", + "```" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/python-json-datagenerator/predefinedschema.py b/examples/python-json-datagenerator/predefinedschema.py new file mode 100644 index 00000000..abc399c4 --- /dev/null +++ b/examples/python-json-datagenerator/predefinedschema.py @@ -0,0 +1,31 @@ +import requests +import json +import random + +model = "llama2" +template = { + "firstName": "", + "lastName": "", + "address": { + "street": "", + "city": "", + "state": "", + "zipCode": "" + }, + "phoneNumber": "" +} + +prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in the US, and phone number. \nUse the following template: {json.dumps(template)}." + +data = { + "prompt": prompt, + "model": model, + "format": "json", + "stream": False, + "options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100}, +} + +print(f"Generating a sample user") +response = requests.post("http://localhost:11434/api/generate", json=data, stream=False) +json_data = json.loads(response.text) +print(json.dumps(json.loads(json_data["response"]), indent=2)) diff --git a/examples/python-json-datagenerator/randomaddresses.py b/examples/python-json-datagenerator/randomaddresses.py new file mode 100644 index 00000000..5f27448f --- /dev/null +++ b/examples/python-json-datagenerator/randomaddresses.py @@ -0,0 +1,31 @@ +import requests +import json +import random + +countries = [ + "United States", + "United Kingdom", + "the Netherlands", + "Germany", + "Mexico", + "Canada", + "France", +] +country = random.choice(countries) +model = "llama2" + +prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters." + +data = { + "prompt": prompt, + "model": model, + "format": "json", + "stream": False, + "options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100}, +} + +print(f"Generating a sample user in {country}") +response = requests.post("http://localhost:11434/api/generate", json=data, stream=False) +json_data = json.loads(response.text) + +print(json.dumps(json.loads(json_data["response"]), indent=2)) diff --git a/examples/python-json-datagenerator/readme.md b/examples/python-json-datagenerator/readme.md new file mode 100644 index 00000000..2dc958e7 --- /dev/null +++ b/examples/python-json-datagenerator/readme.md @@ -0,0 +1,34 @@ +# JSON Output Example + +![llmjson 2023-11-10 15_31_31](https://github.com/jmorganca/ollama/assets/633681/e599d986-9b4a-4118-81a4-4cfe7e22da25) + +There are two python scripts in this example. `randomaddresses.py` generates random addresses from different countries. `predefinedschema.py` sets a template for the model to fill in. + +## Review the Code + +Both programs are basically the same, with a different prompt for each, demonstrating two different ideas. The key part of getting JSON out of a model is to state in the prompt or system prompt that it should respond using JSON, and specifying the `format` as `json` in the data body. + +```python +prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should with no backslashes, values should use plain ascii with no special characters." + +data = { + "prompt": prompt, + "model": model, + "format": "json", + "stream": False, + "options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100}, +} +``` + +When running `randomaddresses.py` you will see that the schema changes and adapts to the chosen country. + +In `predefinedschema.py`, a template has been specified in the prompt as well. It's been defined as JSON and then dumped into the prompt string to make it easier to work with. + +Both examples turn streaming off so that we end up with the completed JSON all at once. We need to convert the `response.text` to JSON so that when we output it as a string we can set the indent spacing to make the output easy to read. + +```python +response = requests.post("http://localhost:11434/api/generate", json=data, stream=False) +json_data = json.loads(response.text) + +print(json.dumps(json.loads(json_data["response"]), indent=2)) +``` diff --git a/examples/python-json-datagenerator/requirements.txt b/examples/python-json-datagenerator/requirements.txt new file mode 100644 index 00000000..9688b8ec --- /dev/null +++ b/examples/python-json-datagenerator/requirements.txt @@ -0,0 +1 @@ +Requests==2.31.0 diff --git a/examples/python-loganalysis/Modelfile b/examples/python-loganalysis/Modelfile new file mode 100644 index 00000000..5237cb6e --- /dev/null +++ b/examples/python-loganalysis/Modelfile @@ -0,0 +1,8 @@ +FROM codebooga:latest + +SYSTEM """ +You are a log file analyzer. You will receive a set of lines from a log file for some software application, find the errors and other interesting aspects of the logs, and explain them so a new user can understand what they mean. If there are any steps they can do to resolve them, list the steps in your answer. +""" + +PARAMETER TEMPERATURE 0.3 + diff --git a/examples/python-loganalysis/loganalysis.py b/examples/python-loganalysis/loganalysis.py new file mode 100644 index 00000000..2b7ddd48 --- /dev/null +++ b/examples/python-loganalysis/loganalysis.py @@ -0,0 +1,42 @@ +import sys +import re +import requests +import json + +# prelines and postlines represent the number of lines of context to include in the output around the error +prelines = 10 +postlines = 10 + +def find_errors_in_log_file(): + if len(sys.argv) < 2: + print("Usage: python loganalysis.py ") + return + + log_file_path = sys.argv[1] + with open(log_file_path, 'r') as log_file: + log_lines = log_file.readlines() + +error_logs = [] + for i, line in enumerate(log_lines): + if "error" in line.lower(): + start_index = max(0, i - prelines) + end_index = min(len(log_lines), i + postlines + 1) + error_logs.extend(log_lines[start_index:end_index]) + + return error_logs + +error_logs = find_errors_in_log_file() + +data = { + "prompt": "\n".join(error_logs), + "model": "mattw/loganalyzer" +} + + +response = requests.post("http://localhost:11434/api/generate", json=data, stream=True) +for line in response.iter_lines(): + if line: + json_data = json.loads(line) + if json_data['done'] == False: + print(json_data['response'], end='', flush=True) + diff --git a/examples/python-loganalysis/logtest.logfile b/examples/python-loganalysis/logtest.logfile new file mode 100644 index 00000000..e4181bfe --- /dev/null +++ b/examples/python-loganalysis/logtest.logfile @@ -0,0 +1,32 @@ +2023-11-10 07:17:40 /docker-entrypoint.sh: /docker-entrypoint.d/ is not empty, will attempt to perform configuration +2023-11-10 07:17:40 /docker-entrypoint.sh: Looking for shell scripts in /docker-entrypoint.d/ +2023-11-10 07:17:40 /docker-entrypoint.sh: Launching /docker-entrypoint.d/10-listen-on-ipv6-by-default.sh +2023-11-10 07:17:40 10-listen-on-ipv6-by-default.sh: info: Getting the checksum of /etc/nginx/conf.d/default.conf +2023-11-10 07:17:40 10-listen-on-ipv6-by-default.sh: info: Enabled listen on IPv6 in /etc/nginx/conf.d/default.conf +2023-11-10 07:17:40 /docker-entrypoint.sh: Sourcing /docker-entrypoint.d/15-local-resolvers.envsh +2023-11-10 07:17:40 /docker-entrypoint.sh: Launching /docker-entrypoint.d/20-envsubst-on-templates.sh +2023-11-10 07:17:40 /docker-entrypoint.sh: Launching /docker-entrypoint.d/30-tune-worker-processes.sh +2023-11-10 07:17:40 /docker-entrypoint.sh: Configuration complete; ready for start up +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: using the "epoll" event method +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: nginx/1.25.3 +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: built by gcc 12.2.0 (Debian 12.2.0-14) +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: OS: Linux 6.4.16-linuxkit +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: getrlimit(RLIMIT_NOFILE): 1048576:1048576 +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker processes +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 29 +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 30 +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 31 +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 32 +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 33 +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 34 +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 35 +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 36 +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 37 +2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 38 +2023-11-10 07:17:44 192.168.65.1 - - [10/Nov/2023:13:17:43 +0000] "GET / HTTP/1.1" 200 615 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" "-" +2023-11-10 07:17:44 2023/11/10 13:17:44 [error] 29#29: *1 open() "/usr/share/nginx/html/favicon.ico" failed (2: No such file or directory), client: 192.168.65.1, server: localhost, request: "GET /favicon.ico HTTP/1.1", host: "localhost:8080", referrer: "http://localhost:8080/" +2023-11-10 07:17:44 192.168.65.1 - - [10/Nov/2023:13:17:44 +0000] "GET /favicon.ico HTTP/1.1" 404 555 "http://localhost:8080/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" "-" +2023-11-10 07:17:50 2023/11/10 13:17:50 [error] 29#29: *1 open() "/usr/share/nginx/html/ahstat" failed (2: No such file or directory), client: 192.168.65.1, server: localhost, request: "GET /ahstat HTTP/1.1", host: "localhost:8080" +2023-11-10 07:17:50 192.168.65.1 - - [10/Nov/2023:13:17:50 +0000] "GET /ahstat HTTP/1.1" 404 555 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" "-" +2023-11-10 07:18:53 2023/11/10 13:18:53 [error] 29#29: *1 open() "/usr/share/nginx/html/ahstat" failed (2: No such file or directory), client: 192.168.65.1, server: localhost, request: "GET /ahstat HTTP/1.1", host: "localhost:8080" +2023-11-10 07:18:53 192.168.65.1 - - [10/Nov/2023:13:18:53 +0000] "GET /ahstat HTTP/1.1" 404 555 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" "-" diff --git a/examples/python-loganalysis/readme.md b/examples/python-loganalysis/readme.md new file mode 100644 index 00000000..fbfb89a1 --- /dev/null +++ b/examples/python-loganalysis/readme.md @@ -0,0 +1,48 @@ +# Log Analysis example + +![loganalyzer 2023-11-10 08_53_29](https://github.com/jmorganca/ollama/assets/633681/ad30f1fc-321f-4953-8914-e30e24db9921) + +This example shows one possible way to create a log file analyzer. To use it, run: + +`python loganalysis.py ` + +You can try this with the `logtest.logfile` file included in this directory. + +## Review the code + +The first part of this example is a Modelfile that takes `codebooga` and applies a new System Prompt: + +```plaintext +SYSTEM """ +You are a log file analyzer. You will receive a set of lines from a log file for some software application, find the errors and other interesting aspects of the logs, and explain them so a new user can understand what they mean. If there are any steps they can do to resolve them, list the steps in your answer. +""" +``` + +This model is available at https://ollama.ai/mattw/loganalyzer. You can customize it and add to your own namespace using the command `ollama create -f ` then `ollama push `. + +Then loganalysis.py scans all the lines in the given log file and searches for the word 'error'. When the word is found, the 10 lines before and after are set as the prompt for a call to the Generate API. + +```python +data = { + "prompt": "\n".join(error_logs), + "model": "mattw/loganalyzer" +} +``` + +Finally, the streamed output is parsed and the response field in the output is printed to the line. + +```python +response = requests.post("http://localhost:11434/api/generate", json=data, stream=True) +for line in response.iter_lines(): + if line: + json_data = json.loads(line) + if json_data['done'] == False: + print(json_data['response'], end='') + +``` + +## Next Steps + +There is a lot more that can be done here. This is a simple way to detect errors, looking for the word error. Perhaps it would be interesting to find anomalous activity in the logs. It could be interesting to create embeddings for each line and compare them, looking for similar lines. Or look into applying Levenshtein Distance algorithms to find similar lines to help identify the anomalous lines. + +Also try different models and different prompts to analyze the data. You could consider adding retrieval augmented generation (RAG) to this to help understand newer log formats. diff --git a/examples/python-loganalysis/requirements.txt b/examples/python-loganalysis/requirements.txt new file mode 100644 index 00000000..9688b8ec --- /dev/null +++ b/examples/python-loganalysis/requirements.txt @@ -0,0 +1 @@ +Requests==2.31.0 diff --git a/format/bytes.go b/format/bytes.go index ca5ac640..4e5d43d1 100644 --- a/format/bytes.go +++ b/format/bytes.go @@ -1,23 +1,45 @@ package format -import "fmt" +import ( + "fmt" + "math" +) const ( Byte = 1 KiloByte = Byte * 1000 MegaByte = KiloByte * 1000 GigaByte = MegaByte * 1000 + TeraByte = GigaByte * 1000 ) func HumanBytes(b int64) string { + var value float64 + var unit string + switch { - case b > GigaByte: - return fmt.Sprintf("%d GB", b/GigaByte) - case b > MegaByte: - return fmt.Sprintf("%d MB", b/MegaByte) - case b > KiloByte: - return fmt.Sprintf("%d KB", b/KiloByte) + case b >= TeraByte: + value = float64(b) / TeraByte + unit = "TB" + case b >= GigaByte: + value = float64(b) / GigaByte + unit = "GB" + case b >= MegaByte: + value = float64(b) / MegaByte + unit = "MB" + case b >= KiloByte: + value = float64(b) / KiloByte + unit = "KB" default: return fmt.Sprintf("%d B", b) } + + switch { + case value >= 100: + return fmt.Sprintf("%d %s", int(value), unit) + case value != math.Trunc(value): + return fmt.Sprintf("%.1f %s", value, unit) + default: + return fmt.Sprintf("%d %s", int(value), unit) + } } diff --git a/go.mod b/go.mod index b0f117bd..6d4c629a 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,6 @@ module github.com/jmorganca/ollama go 1.20 require ( - github.com/dustin/go-humanize v1.0.1 github.com/emirpasic/gods v1.18.1 github.com/gin-gonic/gin v1.9.1 github.com/mattn/go-runewidth v0.0.14 diff --git a/go.sum b/go.sum index 0b94adbc..dab05d3d 100644 --- a/go.sum +++ b/go.sum @@ -9,8 +9,6 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= -github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ= github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= diff --git a/llm/llama.cpp/generate_darwin_amd64.go b/llm/llama.cpp/generate_darwin_amd64.go index 7cbc36f5..779d1239 100644 --- a/llm/llama.cpp/generate_darwin_amd64.go +++ b/llm/llama.cpp/generate_darwin_amd64.go @@ -7,13 +7,13 @@ package llm //go:generate git -C ggml apply ../patches/0002-34B-model-support.patch //go:generate git -C ggml apply ../patches/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch //go:generate git -C ggml apply ../patches/0004-metal-add-missing-barriers-for-mul-mat-2699.patch -//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 +//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 //go:generate cmake --build ggml/build/cpu --target server --config Release //go:generate mv ggml/build/cpu/bin/server ggml/build/cpu/bin/ollama-runner //go:generate git submodule update --force gguf //go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch //go:generate git -C gguf apply ../patches/0001-metal-handle-ggml_scale-for-n-4-0-close-3754.patch -//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 +//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_METAL=off -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on //go:generate cmake --build gguf/build/cpu --target server --config Release //go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner diff --git a/llm/llama.go b/llm/llama.go index d8859393..7172f91e 100644 --- a/llm/llama.go +++ b/llm/llama.go @@ -71,9 +71,10 @@ func chooseRunners(workDir, runnerType string) []ModelRunner { // IMPORTANT: the order of the runners in the array is the priority order switch runtime.GOOS { case "darwin": - runners = []ModelRunner{ - {Path: path.Join(buildPath, "metal", "bin", "ollama-runner")}, - {Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")}, + if runtime.GOARCH == "arm64" { + runners = []ModelRunner{{Path: path.Join(buildPath, "metal", "bin", "ollama-runner")}} + } else { + runners = []ModelRunner{{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")}} } case "linux": runners = []ModelRunner{ @@ -225,7 +226,7 @@ type llama struct { } var ( - errNvidiaSMI = errors.New("nvidia-smi command failed") + errNvidiaSMI = errors.New("warning: gpu support may not be enabled, check that you have installed GPU drivers: nvidia-smi command failed") errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only") ) @@ -342,6 +343,10 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers "--embedding", } + if opts.MainGPU > 0 { + params = append(params, "--main-gpu", fmt.Sprintf("%d", opts.MainGPU)) + } + if opts.RopeFrequencyBase > 0 { params = append(params, "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase)) } @@ -543,6 +548,7 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, "stream": true, "n_predict": llm.NumPredict, "n_keep": llm.NumKeep, + "main_gpu": llm.MainGPU, "temperature": llm.Temperature, "top_k": llm.TopK, "top_p": llm.TopP, diff --git a/llm/llm.go b/llm/llm.go index 22706da5..4901d9fe 100644 --- a/llm/llm.go +++ b/llm/llm.go @@ -41,20 +41,13 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error if runtime.GOOS == "darwin" { switch ggml.FileType() { - case "Q8_0": + case "F32", "Q5_0", "Q5_1", "Q8_0": if ggml.Name() != "gguf" && opts.NumGPU != 0 { // GGML Q8_0 do not support Metal API and will // cause the runner to segmentation fault so disable GPU log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0") opts.NumGPU = 0 } - case "F32", "Q5_0", "Q5_1": - if opts.NumGPU != 0 { - // F32, Q5_0, Q5_1, and Q8_0 do not support Metal API and will - // cause the runner to segmentation fault so disable GPU - log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0") - opts.NumGPU = 0 - } } var requiredMemory int64 diff --git a/progress/bar.go b/progress/bar.go new file mode 100644 index 00000000..31fb8aea --- /dev/null +++ b/progress/bar.go @@ -0,0 +1,155 @@ +package progress + +import ( + "fmt" + "math" + "os" + "strings" + "time" + + "github.com/jmorganca/ollama/format" + "golang.org/x/term" +) + +type Stats struct { + rate int64 + value int64 + remaining time.Duration +} + +type Bar struct { + message string + messageWidth int + + maxValue int64 + initialValue int64 + currentValue int64 + + started time.Time + + stats Stats + statted time.Time +} + +func NewBar(message string, maxValue, initialValue int64) *Bar { + return &Bar{ + message: message, + messageWidth: -1, + maxValue: maxValue, + initialValue: initialValue, + currentValue: initialValue, + started: time.Now(), + } +} + +func (b *Bar) String() string { + termWidth, _, err := term.GetSize(int(os.Stderr.Fd())) + if err != nil { + termWidth = 80 + } + + var pre, mid, suf strings.Builder + + if b.message != "" { + message := strings.TrimSpace(b.message) + if b.messageWidth > 0 && len(message) > b.messageWidth { + message = message[:b.messageWidth] + } + + fmt.Fprintf(&pre, "%s", message) + if b.messageWidth-pre.Len() >= 0 { + pre.WriteString(strings.Repeat(" ", b.messageWidth-pre.Len())) + } + + pre.WriteString(" ") + } + + fmt.Fprintf(&pre, "%3.0f%% ", math.Floor(b.percent())) + fmt.Fprintf(&suf, "(%s/%s", format.HumanBytes(b.currentValue), format.HumanBytes(b.maxValue)) + + stats := b.Stats() + rate := int64(stats.rate) + if rate > 0 { + fmt.Fprintf(&suf, ", %s/s", format.HumanBytes(rate)) + } + + fmt.Fprintf(&suf, ")") + + elapsed := time.Since(b.started) + if b.percent() < 100 && rate > 0 { + fmt.Fprintf(&suf, " [%s:%s]", elapsed.Round(time.Second), stats.remaining) + } else { + fmt.Fprintf(&suf, " ") + } + + mid.WriteString("▕") + + // add 3 extra spaces: 2 boundary characters and 1 space at the end + f := termWidth - pre.Len() - suf.Len() - 3 + n := int(float64(f) * b.percent() / 100) + + if n > 0 { + mid.WriteString(strings.Repeat("█", n)) + } + + if f-n > 0 { + mid.WriteString(strings.Repeat(" ", f-n)) + } + + mid.WriteString("▏") + + return pre.String() + mid.String() + suf.String() +} + +func (b *Bar) Set(value int64) { + if value >= b.maxValue { + value = b.maxValue + } + + b.currentValue = value +} + +func (b *Bar) percent() float64 { + if b.maxValue > 0 { + return float64(b.currentValue) / float64(b.maxValue) * 100 + } + + return 0 +} + +func (b *Bar) Stats() Stats { + if time.Since(b.statted) < time.Second { + return b.stats + } + + switch { + case b.statted.IsZero(): + b.stats = Stats{ + value: b.initialValue, + rate: 0, + remaining: 0, + } + case b.currentValue >= b.maxValue: + b.stats = Stats{ + value: b.maxValue, + rate: 0, + remaining: 0, + } + default: + rate := b.currentValue - b.stats.value + var remaining time.Duration + if rate > 0 { + remaining = time.Second * time.Duration((float64(b.maxValue-b.currentValue))/(float64(rate))) + } + + b.stats = Stats{ + value: b.currentValue, + rate: rate, + remaining: remaining, + } + } + + b.statted = time.Now() + + return b.stats +} diff --git a/progress/progress.go b/progress/progress.go new file mode 100644 index 00000000..78917e9c --- /dev/null +++ b/progress/progress.go @@ -0,0 +1,113 @@ +package progress + +import ( + "fmt" + "io" + "sync" + "time" +) + +type State interface { + String() string +} + +type Progress struct { + mu sync.Mutex + w io.Writer + + pos int + + ticker *time.Ticker + states []State +} + +func NewProgress(w io.Writer) *Progress { + p := &Progress{w: w} + go p.start() + return p +} + +func (p *Progress) stop() bool { + for _, state := range p.states { + if spinner, ok := state.(*Spinner); ok { + spinner.Stop() + } + } + + if p.ticker != nil { + p.ticker.Stop() + p.ticker = nil + p.render() + return true + } + + return false +} + +func (p *Progress) Stop() bool { + stopped := p.stop() + if stopped { + fmt.Fprint(p.w, "\n") + } + return stopped +} + +func (p *Progress) StopAndClear() bool { + fmt.Fprint(p.w, "\033[?25l") + defer fmt.Fprint(p.w, "\033[?25h") + + stopped := p.stop() + if stopped { + // clear all progress lines + for i := 0; i < p.pos; i++ { + if i > 0 { + fmt.Fprint(p.w, "\033[A") + } + fmt.Fprint(p.w, "\033[2K\033[1G") + } + } + + return stopped +} + +func (p *Progress) Add(key string, state State) { + p.mu.Lock() + defer p.mu.Unlock() + + p.states = append(p.states, state) +} + +func (p *Progress) render() error { + p.mu.Lock() + defer p.mu.Unlock() + + fmt.Fprint(p.w, "\033[?25l") + defer fmt.Fprint(p.w, "\033[?25h") + + // clear already rendered progress lines + for i := 0; i < p.pos; i++ { + if i > 0 { + fmt.Fprint(p.w, "\033[A") + } + fmt.Fprint(p.w, "\033[2K\033[1G") + } + + // render progress lines + for i, state := range p.states { + fmt.Fprint(p.w, state.String()) + if i < len(p.states)-1 { + fmt.Fprint(p.w, "\n") + } + } + + p.pos = len(p.states) + + return nil +} + +func (p *Progress) start() { + p.ticker = time.NewTicker(100 * time.Millisecond) + for range p.ticker.C { + p.render() + } +} diff --git a/progress/spinner.go b/progress/spinner.go new file mode 100644 index 00000000..62ed4f09 --- /dev/null +++ b/progress/spinner.go @@ -0,0 +1,73 @@ +package progress + +import ( + "fmt" + "strings" + "time" +) + +type Spinner struct { + message string + messageWidth int + + parts []string + + value int + + ticker *time.Ticker + started time.Time + stopped time.Time +} + +func NewSpinner(message string) *Spinner { + s := &Spinner{ + message: message, + parts: []string{ + "⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏", + }, + started: time.Now(), + } + go s.start() + return s +} + +func (s *Spinner) String() string { + var sb strings.Builder + if len(s.message) > 0 { + message := strings.TrimSpace(s.message) + if s.messageWidth > 0 && len(message) > s.messageWidth { + message = message[:s.messageWidth] + } + + fmt.Fprintf(&sb, "%s", message) + if s.messageWidth-sb.Len() >= 0 { + sb.WriteString(strings.Repeat(" ", s.messageWidth-sb.Len())) + } + + sb.WriteString(" ") + } + + if s.stopped.IsZero() { + spinner := s.parts[s.value] + sb.WriteString(spinner) + sb.WriteString(" ") + } + + return sb.String() +} + +func (s *Spinner) start() { + s.ticker = time.NewTicker(100 * time.Millisecond) + for range s.ticker.C { + s.value = (s.value + 1) % len(s.parts) + if !s.stopped.IsZero() { + return + } + } +} + +func (s *Spinner) Stop() { + if s.stopped.IsZero() { + s.stopped = time.Now() + } +} diff --git a/progressbar/LICENSE b/progressbar/LICENSE deleted file mode 100644 index 0ca97652..00000000 --- a/progressbar/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2017 Zack - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/progressbar/README.md b/progressbar/README.md deleted file mode 100644 index b9096dbd..00000000 --- a/progressbar/README.md +++ /dev/null @@ -1,121 +0,0 @@ -# progressbar - -[![CI](https://github.com/schollz/progressbar/actions/workflows/ci.yml/badge.svg?branch=main&event=push)](https://github.com/schollz/progressbar/actions/workflows/ci.yml) -[![go report card](https://goreportcard.com/badge/github.com/schollz/progressbar)](https://goreportcard.com/report/github.com/schollz/progressbar) -[![coverage](https://img.shields.io/badge/coverage-84%25-brightgreen.svg)](https://gocover.io/github.com/schollz/progressbar) -[![godocs](https://godoc.org/github.com/schollz/progressbar?status.svg)](https://godoc.org/github.com/schollz/progressbar/v3) - -A very simple thread-safe progress bar which should work on every OS without problems. I needed a progressbar for [croc](https://github.com/schollz/croc) and everything I tried had problems, so I made another one. In order to be OS agnostic I do not plan to support [multi-line outputs](https://github.com/schollz/progressbar/issues/6). - - -## Install - -``` -go get -u github.com/schollz/progressbar/v3 -``` - -## Usage - -### Basic usage - -```golang -bar := progressbar.Default(100) -for i := 0; i < 100; i++ { - bar.Add(1) - time.Sleep(40 * time.Millisecond) -} -``` - -which looks like: - -![Example of basic bar](examples/basic/basic.gif) - - -### I/O operations - -The `progressbar` implements an `io.Writer` so it can automatically detect the number of bytes written to a stream, so you can use it as a progressbar for an `io.Reader`. - -```golang -req, _ := http.NewRequest("GET", "https://dl.google.com/go/go1.14.2.src.tar.gz", nil) -resp, _ := http.DefaultClient.Do(req) -defer resp.Body.Close() - -f, _ := os.OpenFile("go1.14.2.src.tar.gz", os.O_CREATE|os.O_WRONLY, 0644) -defer f.Close() - -bar := progressbar.DefaultBytes( - resp.ContentLength, - "downloading", -) -io.Copy(io.MultiWriter(f, bar), resp.Body) -``` - -which looks like: - -![Example of download bar](examples/download/download.gif) - - -### Progress bar with unknown length - -A progressbar with unknown length is a spinner. Any bar with -1 length will automatically convert it to a spinner with a customizable spinner type. For example, the above code can be run and set the `resp.ContentLength` to `-1`. - -which looks like: - -![Example of download bar with unknown length](examples/download-unknown/download-unknown.gif) - - -### Customization - -There is a lot of customization that you can do - change the writer, the color, the width, description, theme, etc. See [all the options](https://pkg.go.dev/github.com/schollz/progressbar/v3?tab=doc#Option). - -```golang -bar := progressbar.NewOptions(1000, - progressbar.OptionSetWriter(ansi.NewAnsiStdout()), - progressbar.OptionEnableColorCodes(true), - progressbar.OptionShowBytes(true), - progressbar.OptionSetWidth(15), - progressbar.OptionSetDescription("[cyan][1/3][reset] Writing moshable file..."), - progressbar.OptionSetTheme(progressbar.Theme{ - Saucer: "[green]=[reset]", - SaucerHead: "[green]>[reset]", - SaucerPadding: " ", - BarStart: "[", - BarEnd: "]", - })) -for i := 0; i < 1000; i++ { - bar.Add(1) - time.Sleep(5 * time.Millisecond) -} -``` - -which looks like: - -![Example of customized bar](examples/customization/customization.gif) - - -## Contributing - -Pull requests are welcome. Feel free to... - -- Revise documentation -- Add new features -- Fix bugs -- Suggest improvements - -## Thanks - -Thanks [@Dynom](https://github.com/dynom) for massive improvements in version 2.0! - -Thanks [@CrushedPixel](https://github.com/CrushedPixel) for adding descriptions and color code support! - -Thanks [@MrMe42](https://github.com/MrMe42) for adding some minor features! - -Thanks [@tehstun](https://github.com/tehstun) for some great PRs! - -Thanks [@Benzammour](https://github.com/Benzammour) and [@haseth](https://github.com/haseth) for helping create v3! - -Thanks [@briandowns](https://github.com/briandowns) for compiling the list of spinners. - -## License - -MIT diff --git a/progressbar/progressbar.go b/progressbar/progressbar.go deleted file mode 100644 index c1a36c43..00000000 --- a/progressbar/progressbar.go +++ /dev/null @@ -1,1098 +0,0 @@ -package progressbar - -import ( - "errors" - "fmt" - "io" - "math" - "os" - "regexp" - "strings" - "sync" - "time" - - "github.com/mattn/go-runewidth" - "github.com/mitchellh/colorstring" - "golang.org/x/term" -) - -// ProgressBar is a thread-safe, simple -// progress bar -type ProgressBar struct { - state state - config config - lock sync.Mutex -} - -// State is the basic properties of the bar -type State struct { - CurrentPercent float64 - CurrentBytes float64 - SecondsSince float64 - SecondsLeft float64 - KBsPerSecond float64 -} - -type state struct { - currentNum int64 - currentPercent int - lastPercent int - currentSaucerSize int - isAltSaucerHead bool - - lastShown time.Time - startTime time.Time - - counterTime time.Time - counterNumSinceLast int64 - counterLastTenRates []float64 - - maxLineWidth int - currentBytes float64 - finished bool - exit bool // Progress bar exit halfway - - rendered string -} - -type config struct { - max int64 // max number of the counter - maxHumanized string - maxHumanizedSuffix string - width int - writer io.Writer - theme Theme - renderWithBlankState bool - description string - iterationString string - ignoreLength bool // ignoreLength if max bytes not known - - // whether the output is expected to contain color codes - colorCodes bool - - // show rate of change in kB/sec or MB/sec - showBytes bool - // show the iterations per second - showIterationsPerSecond bool - showIterationsCount bool - - // whether the progress bar should show elapsed time. - // always enabled if predictTime is true. - elapsedTime bool - - showElapsedTimeOnFinish bool - - // whether the progress bar should attempt to predict the finishing - // time of the progress based on the start time and the average - // number of seconds between increments. - predictTime bool - - // minimum time to wait in between updates - throttleDuration time.Duration - - // clear bar once finished - clearOnFinish bool - - // spinnerType should be a number between 0-75 - spinnerType int - - // spinnerTypeOptionUsed remembers if the spinnerType was changed manually - spinnerTypeOptionUsed bool - - // spinner represents the spinner as a slice of string - spinner []string - - // fullWidth specifies whether to measure and set the bar to a specific width - fullWidth bool - - // invisible doesn't render the bar at all, useful for debugging - invisible bool - - onCompletion func() - - // whether the render function should make use of ANSI codes to reduce console I/O - useANSICodes bool - - // showDescriptionAtLineEnd specifies whether description should be written at line end instead of line start - showDescriptionAtLineEnd bool -} - -// Theme defines the elements of the bar -type Theme struct { - Saucer string - AltSaucerHead string - SaucerHead string - SaucerPadding string - BarStart string - BarEnd string -} - -// Option is the type all options need to adhere to -type Option func(p *ProgressBar) - -// OptionSetWidth sets the width of the bar -func OptionSetWidth(s int) Option { - return func(p *ProgressBar) { - p.config.width = s - } -} - -// OptionSpinnerType sets the type of spinner used for indeterminate bars -func OptionSpinnerType(spinnerType int) Option { - return func(p *ProgressBar) { - p.config.spinnerTypeOptionUsed = true - p.config.spinnerType = spinnerType - } -} - -// OptionSpinnerCustom sets the spinner used for indeterminate bars to the passed -// slice of string -func OptionSpinnerCustom(spinner []string) Option { - return func(p *ProgressBar) { - p.config.spinner = spinner - } -} - -// OptionSetTheme sets the elements the bar is constructed of -func OptionSetTheme(t Theme) Option { - return func(p *ProgressBar) { - p.config.theme = t - } -} - -// OptionSetVisibility sets the visibility -func OptionSetVisibility(visibility bool) Option { - return func(p *ProgressBar) { - p.config.invisible = !visibility - } -} - -// OptionFullWidth sets the bar to be full width -func OptionFullWidth() Option { - return func(p *ProgressBar) { - p.config.fullWidth = true - } -} - -// OptionSetWriter sets the output writer (defaults to os.StdOut) -func OptionSetWriter(w io.Writer) Option { - return func(p *ProgressBar) { - p.config.writer = w - } -} - -// OptionSetRenderBlankState sets whether or not to render a 0% bar on construction -func OptionSetRenderBlankState(r bool) Option { - return func(p *ProgressBar) { - p.config.renderWithBlankState = r - } -} - -// OptionSetDescription sets the description of the bar to render in front of it -func OptionSetDescription(description string) Option { - return func(p *ProgressBar) { - p.config.description = description - } -} - -// OptionEnableColorCodes enables or disables support for color codes -// using mitchellh/colorstring -func OptionEnableColorCodes(colorCodes bool) Option { - return func(p *ProgressBar) { - p.config.colorCodes = colorCodes - } -} - -// OptionSetElapsedTime will enable elapsed time. Always enabled if OptionSetPredictTime is true. -func OptionSetElapsedTime(elapsedTime bool) Option { - return func(p *ProgressBar) { - p.config.elapsedTime = elapsedTime - } -} - -// OptionSetPredictTime will also attempt to predict the time remaining. -func OptionSetPredictTime(predictTime bool) Option { - return func(p *ProgressBar) { - p.config.predictTime = predictTime - } -} - -// OptionShowCount will also print current count out of total -func OptionShowCount() Option { - return func(p *ProgressBar) { - p.config.showIterationsCount = true - } -} - -// OptionShowIts will also print the iterations/second -func OptionShowIts() Option { - return func(p *ProgressBar) { - p.config.showIterationsPerSecond = true - } -} - -// OptionShowElapsedOnFinish will keep the display of elapsed time on finish -func OptionShowElapsedTimeOnFinish() Option { - return func(p *ProgressBar) { - p.config.showElapsedTimeOnFinish = true - } -} - -// OptionSetItsString sets what's displayed for iterations a second. The default is "it" which would display: "it/s" -func OptionSetItsString(iterationString string) Option { - return func(p *ProgressBar) { - p.config.iterationString = iterationString - } -} - -// OptionThrottle will wait the specified duration before updating again. The default -// duration is 0 seconds. -func OptionThrottle(duration time.Duration) Option { - return func(p *ProgressBar) { - p.config.throttleDuration = duration - } -} - -// OptionClearOnFinish will clear the bar once its finished -func OptionClearOnFinish() Option { - return func(p *ProgressBar) { - p.config.clearOnFinish = true - } -} - -// OptionOnCompletion will invoke cmpl function once its finished -func OptionOnCompletion(cmpl func()) Option { - return func(p *ProgressBar) { - p.config.onCompletion = cmpl - } -} - -// OptionShowBytes will update the progress bar -// configuration settings to display/hide kBytes/Sec -func OptionShowBytes(val bool) Option { - return func(p *ProgressBar) { - p.config.showBytes = val - } -} - -// OptionUseANSICodes will use more optimized terminal i/o. -// -// Only useful in environments with support for ANSI escape sequences. -func OptionUseANSICodes(val bool) Option { - return func(p *ProgressBar) { - p.config.useANSICodes = val - } -} - -// OptionShowDescriptionAtLineEnd defines whether description should be written at line end instead of line start -func OptionShowDescriptionAtLineEnd() Option { - return func(p *ProgressBar) { - p.config.showDescriptionAtLineEnd = true - } -} - -var defaultTheme = Theme{Saucer: "█", SaucerPadding: " ", BarStart: "▕", BarEnd: "▏"} - -// NewOptions constructs a new instance of ProgressBar, with any options you specify -func NewOptions(max int, options ...Option) *ProgressBar { - return NewOptions64(int64(max), options...) -} - -// NewOptions64 constructs a new instance of ProgressBar, with any options you specify -func NewOptions64(max int64, options ...Option) *ProgressBar { - b := ProgressBar{ - state: getBasicState(), - config: config{ - writer: os.Stdout, - theme: defaultTheme, - iterationString: "it", - width: 40, - max: max, - throttleDuration: 0 * time.Nanosecond, - elapsedTime: true, - predictTime: true, - spinnerType: 9, - invisible: false, - }, - } - - for _, o := range options { - o(&b) - } - - if b.config.spinnerType < 0 || b.config.spinnerType > 75 { - panic("invalid spinner type, must be between 0 and 75") - } - - // ignoreLength if max bytes not known - if b.config.max == -1 { - b.config.ignoreLength = true - b.config.max = int64(b.config.width) - b.config.predictTime = false - } - - b.config.maxHumanized, b.config.maxHumanizedSuffix = humanizeBytes(float64(b.config.max)) - - if b.config.renderWithBlankState { - b.RenderBlank() - } - - return &b -} - -func getBasicState() state { - now := time.Now() - return state{ - startTime: now, - lastShown: now, - counterTime: now, - } -} - -// New returns a new ProgressBar -// with the specified maximum -func New(max int) *ProgressBar { - return NewOptions(max) -} - -// DefaultBytes provides a progressbar to measure byte -// throughput with recommended defaults. -// Set maxBytes to -1 to use as a spinner. -func DefaultBytes(maxBytes int64, description ...string) *ProgressBar { - desc := "" - if len(description) > 0 { - desc = description[0] - } - return NewOptions64( - maxBytes, - OptionSetDescription(desc), - OptionSetWriter(os.Stderr), - OptionShowBytes(true), - OptionSetWidth(10), - OptionThrottle(65*time.Millisecond), - OptionShowCount(), - OptionOnCompletion(func() { - fmt.Fprint(os.Stderr, "\n") - }), - OptionSpinnerType(14), - OptionFullWidth(), - OptionSetRenderBlankState(true), - ) -} - -// DefaultBytesSilent is the same as DefaultBytes, but does not output anywhere. -// String() can be used to get the output instead. -func DefaultBytesSilent(maxBytes int64, description ...string) *ProgressBar { - // Mostly the same bar as DefaultBytes - - desc := "" - if len(description) > 0 { - desc = description[0] - } - return NewOptions64( - maxBytes, - OptionSetDescription(desc), - OptionSetWriter(io.Discard), - OptionShowBytes(true), - OptionSetWidth(10), - OptionThrottle(65*time.Millisecond), - OptionShowCount(), - OptionSpinnerType(14), - OptionFullWidth(), - ) -} - -// Default provides a progressbar with recommended defaults. -// Set max to -1 to use as a spinner. -func Default(max int64, description ...string) *ProgressBar { - desc := "" - if len(description) > 0 { - desc = description[0] - } - return NewOptions64( - max, - OptionSetDescription(desc), - OptionSetWriter(os.Stderr), - OptionSetWidth(10), - OptionThrottle(65*time.Millisecond), - OptionShowCount(), - OptionShowIts(), - OptionOnCompletion(func() { - fmt.Fprint(os.Stderr, "\n") - }), - OptionSpinnerType(14), - OptionFullWidth(), - OptionSetRenderBlankState(true), - ) -} - -// DefaultSilent is the same as Default, but does not output anywhere. -// String() can be used to get the output instead. -func DefaultSilent(max int64, description ...string) *ProgressBar { - // Mostly the same bar as Default - - desc := "" - if len(description) > 0 { - desc = description[0] - } - return NewOptions64( - max, - OptionSetDescription(desc), - OptionSetWriter(io.Discard), - OptionSetWidth(10), - OptionThrottle(65*time.Millisecond), - OptionShowCount(), - OptionShowIts(), - OptionSpinnerType(14), - OptionFullWidth(), - ) -} - -// String returns the current rendered version of the progress bar. -// It will never return an empty string while the progress bar is running. -func (p *ProgressBar) String() string { - return p.state.rendered -} - -// RenderBlank renders the current bar state, you can use this to render a 0% state -func (p *ProgressBar) RenderBlank() error { - if p.config.invisible { - return nil - } - if p.state.currentNum == 0 { - p.state.lastShown = time.Time{} - } - return p.render() -} - -// Reset will reset the clock that is used -// to calculate current time and the time left. -func (p *ProgressBar) Reset() { - p.lock.Lock() - defer p.lock.Unlock() - - p.state = getBasicState() -} - -// Finish will fill the bar to full -func (p *ProgressBar) Finish() error { - p.lock.Lock() - p.state.currentNum = p.config.max - p.lock.Unlock() - return p.Add(0) -} - -// Exit will exit the bar to keep current state -func (p *ProgressBar) Exit() error { - p.lock.Lock() - defer p.lock.Unlock() - - p.state.exit = true - if p.config.onCompletion != nil { - p.config.onCompletion() - } - return nil -} - -// Add will add the specified amount to the progressbar -func (p *ProgressBar) Add(num int) error { - return p.Add64(int64(num)) -} - -// Set will set the bar to a current number -func (p *ProgressBar) Set(num int) error { - return p.Set64(int64(num)) -} - -// Set64 will set the bar to a current number -func (p *ProgressBar) Set64(num int64) error { - p.lock.Lock() - toAdd := num - int64(p.state.currentBytes) - p.lock.Unlock() - return p.Add64(toAdd) -} - -// Add64 will add the specified amount to the progressbar -func (p *ProgressBar) Add64(num int64) error { - if p.config.invisible { - return nil - } - p.lock.Lock() - defer p.lock.Unlock() - - if p.state.exit { - return nil - } - - // error out since OptionSpinnerCustom will always override a manually set spinnerType - if p.config.spinnerTypeOptionUsed && len(p.config.spinner) > 0 { - return errors.New("OptionSpinnerType and OptionSpinnerCustom cannot be used together") - } - - if p.config.max == 0 { - return errors.New("max must be greater than 0") - } - - if p.state.currentNum < p.config.max { - if p.config.ignoreLength { - p.state.currentNum = (p.state.currentNum + num) % p.config.max - } else { - p.state.currentNum += num - } - } - - p.state.currentBytes += float64(num) - - // reset the countdown timer every second to take rolling average - p.state.counterNumSinceLast += num - if time.Since(p.state.counterTime).Seconds() > 0.5 { - p.state.counterLastTenRates = append(p.state.counterLastTenRates, float64(p.state.counterNumSinceLast)/time.Since(p.state.counterTime).Seconds()) - if len(p.state.counterLastTenRates) > 10 { - p.state.counterLastTenRates = p.state.counterLastTenRates[1:] - } - p.state.counterTime = time.Now() - p.state.counterNumSinceLast = 0 - } - - percent := float64(p.state.currentNum) / float64(p.config.max) - p.state.currentSaucerSize = int(percent * float64(p.config.width)) - p.state.currentPercent = int(percent * 100) - updateBar := p.state.currentPercent != p.state.lastPercent && p.state.currentPercent > 0 - - p.state.lastPercent = p.state.currentPercent - if p.state.currentNum > p.config.max { - return errors.New("current number exceeds max") - } - - // always update if show bytes/second or its/second - if updateBar || p.config.showIterationsPerSecond || p.config.showIterationsCount { - return p.render() - } - - return nil -} - -// Clear erases the progress bar from the current line -func (p *ProgressBar) Clear() error { - return clearProgressBar(p.config, p.state) -} - -// Describe will change the description shown before the progress, which -// can be changed on the fly (as for a slow running process). -func (p *ProgressBar) Describe(description string) { - p.lock.Lock() - defer p.lock.Unlock() - p.config.description = description - if p.config.invisible { - return - } - p.render() -} - -// New64 returns a new ProgressBar -// with the specified maximum -func New64(max int64) *ProgressBar { - return NewOptions64(max) -} - -// GetMax returns the max of a bar -func (p *ProgressBar) GetMax() int { - return int(p.config.max) -} - -// GetMax64 returns the current max -func (p *ProgressBar) GetMax64() int64 { - return p.config.max -} - -// ChangeMax takes in a int -// and changes the max value -// of the progress bar -func (p *ProgressBar) ChangeMax(newMax int) { - p.ChangeMax64(int64(newMax)) -} - -// ChangeMax64 is basically -// the same as ChangeMax, -// but takes in a int64 -// to avoid casting -func (p *ProgressBar) ChangeMax64(newMax int64) { - p.config.max = newMax - - if p.config.showBytes { - p.config.maxHumanized, p.config.maxHumanizedSuffix = humanizeBytes(float64(p.config.max)) - } - - p.Add(0) // re-render -} - -// IsFinished returns true if progress bar is completed -func (p *ProgressBar) IsFinished() bool { - return p.state.finished -} - -// render renders the progress bar, updating the maximum -// rendered line width. this function is not thread-safe, -// so it must be called with an acquired lock. -func (p *ProgressBar) render() error { - // make sure that the rendering is not happening too quickly - // but always show if the currentNum reaches the max - if time.Since(p.state.lastShown).Nanoseconds() < p.config.throttleDuration.Nanoseconds() && - p.state.currentNum < p.config.max { - return nil - } - - if !p.config.useANSICodes { - // first, clear the existing progress bar - err := clearProgressBar(p.config, p.state) - if err != nil { - return err - } - } - - // check if the progress bar is finished - if !p.state.finished && p.state.currentNum >= p.config.max { - p.state.finished = true - if !p.config.clearOnFinish { - renderProgressBar(p.config, &p.state) - } - if p.config.onCompletion != nil { - p.config.onCompletion() - } - } - if p.state.finished { - // when using ANSI codes we don't pre-clean the current line - if p.config.useANSICodes && p.config.clearOnFinish { - err := clearProgressBar(p.config, p.state) - if err != nil { - return err - } - } - return nil - } - - // then, re-render the current progress bar - w, err := renderProgressBar(p.config, &p.state) - if err != nil { - return err - } - - if w > p.state.maxLineWidth { - p.state.maxLineWidth = w - } - - p.state.lastShown = time.Now() - - return nil -} - -// State returns the current state -func (p *ProgressBar) State() State { - p.lock.Lock() - defer p.lock.Unlock() - s := State{} - s.CurrentPercent = float64(p.state.currentNum) / float64(p.config.max) - s.CurrentBytes = p.state.currentBytes - s.SecondsSince = time.Since(p.state.startTime).Seconds() - if p.state.currentNum > 0 { - s.SecondsLeft = s.SecondsSince / float64(p.state.currentNum) * (float64(p.config.max) - float64(p.state.currentNum)) - } - s.KBsPerSecond = float64(p.state.currentBytes) / 1000.0 / s.SecondsSince - return s -} - -// regex matching ansi escape codes -var ansiRegex = regexp.MustCompile(`\x1b\[[0-9;]*[a-zA-Z]`) - -func getStringWidth(c config, str string, colorize bool) int { - if c.colorCodes { - // convert any color codes in the progress bar into the respective ANSI codes - str = colorstring.Color(str) - } - - // the width of the string, if printed to the console - // does not include the carriage return character - cleanString := strings.Replace(str, "\r", "", -1) - - if c.colorCodes { - // the ANSI codes for the colors do not take up space in the console output, - // so they do not count towards the output string width - cleanString = ansiRegex.ReplaceAllString(cleanString, "") - } - - // get the amount of runes in the string instead of the - // character count of the string, as some runes span multiple characters. - // see https://stackoverflow.com/a/12668840/2733724 - stringWidth := runewidth.StringWidth(cleanString) - return stringWidth -} - -func renderProgressBar(c config, s *state) (int, error) { - var sb strings.Builder - - averageRate := average(s.counterLastTenRates) - if len(s.counterLastTenRates) == 0 || s.finished { - // if no average samples, or if finished, - // then average rate should be the total rate - if t := time.Since(s.startTime).Seconds(); t > 0 { - averageRate = s.currentBytes / t - } else { - averageRate = 0 - } - } - - // show iteration count in "current/total" iterations format - if c.showIterationsCount { - if sb.Len() == 0 { - sb.WriteString("(") - } else { - sb.WriteString(", ") - } - if !c.ignoreLength { - if c.showBytes { - currentHumanize, currentSuffix := humanizeBytes(s.currentBytes) - if currentSuffix == c.maxHumanizedSuffix { - sb.WriteString(fmt.Sprintf("%s/%s%s", - currentHumanize, c.maxHumanized, c.maxHumanizedSuffix)) - } else { - sb.WriteString(fmt.Sprintf("%s%s/%s%s", - currentHumanize, currentSuffix, c.maxHumanized, c.maxHumanizedSuffix)) - } - } else { - sb.WriteString(fmt.Sprintf("%.0f/%d", s.currentBytes, c.max)) - } - } else { - if c.showBytes { - currentHumanize, currentSuffix := humanizeBytes(s.currentBytes) - sb.WriteString(fmt.Sprintf("%s%s", currentHumanize, currentSuffix)) - } else { - sb.WriteString(fmt.Sprintf("%.0f/%s", s.currentBytes, "-")) - } - } - } - - // show rolling average rate - if c.showBytes && averageRate > 0 && !math.IsInf(averageRate, 1) { - if sb.Len() == 0 { - sb.WriteString("(") - } else { - sb.WriteString(", ") - } - currentHumanize, currentSuffix := humanizeBytes(averageRate) - sb.WriteString(fmt.Sprintf("%s%s/s", currentHumanize, currentSuffix)) - } - - // show iterations rate - if c.showIterationsPerSecond { - if sb.Len() == 0 { - sb.WriteString("(") - } else { - sb.WriteString(", ") - } - if averageRate > 1 { - sb.WriteString(fmt.Sprintf("%0.0f %s/s", averageRate, c.iterationString)) - } else if averageRate*60 > 1 { - sb.WriteString(fmt.Sprintf("%0.0f %s/min", 60*averageRate, c.iterationString)) - } else { - sb.WriteString(fmt.Sprintf("%0.0f %s/hr", 3600*averageRate, c.iterationString)) - } - } - if sb.Len() > 0 { - sb.WriteString(")") - } - - leftBrac, rightBrac, saucer, saucerHead := "", "", "", "" - - // show time prediction in "current/total" seconds format - switch { - case c.predictTime: - rightBracNum := (time.Duration((1/averageRate)*(float64(c.max)-float64(s.currentNum))) * time.Second) - if rightBracNum.Seconds() < 0 { - rightBracNum = 0 * time.Second - } - rightBrac = rightBracNum.String() - fallthrough - case c.elapsedTime: - leftBrac = (time.Duration(time.Since(s.startTime).Seconds()) * time.Second).String() - } - - if c.fullWidth && !c.ignoreLength { - width, err := termWidth() - if err != nil { - width = 80 - } - - amend := 1 // an extra space at eol - switch { - case leftBrac != "" && rightBrac != "": - amend = 4 // space, square brackets and colon - case leftBrac != "" && rightBrac == "": - amend = 4 // space and square brackets and another space - case leftBrac == "" && rightBrac != "": - amend = 3 // space and square brackets - } - if c.showDescriptionAtLineEnd { - amend += 1 // another space - } - - c.width = width - getStringWidth(c, c.description, true) - 10 - amend - sb.Len() - len(leftBrac) - len(rightBrac) - s.currentSaucerSize = int(float64(s.currentPercent) / 100.0 * float64(c.width)) - } - if s.currentSaucerSize > 0 { - if c.ignoreLength { - saucer = strings.Repeat(c.theme.SaucerPadding, s.currentSaucerSize-1) - } else { - saucer = strings.Repeat(c.theme.Saucer, s.currentSaucerSize-1) - } - - // Check if an alternate saucer head is set for animation - if c.theme.AltSaucerHead != "" && s.isAltSaucerHead { - saucerHead = c.theme.AltSaucerHead - s.isAltSaucerHead = false - } else if c.theme.SaucerHead == "" || s.currentSaucerSize == c.width { - // use the saucer for the saucer head if it hasn't been set - // to preserve backwards compatibility - saucerHead = c.theme.Saucer - } else { - saucerHead = c.theme.SaucerHead - s.isAltSaucerHead = true - } - } - - /* - Progress Bar format - Description % |------ | (kb/s) (iteration count) (iteration rate) (predict time) - - or if showDescriptionAtLineEnd is enabled - % |------ | (kb/s) (iteration count) (iteration rate) (predict time) Description - */ - - repeatAmount := c.width - s.currentSaucerSize - if repeatAmount < 0 { - repeatAmount = 0 - } - - str := "" - - if c.ignoreLength { - selectedSpinner := spinners[c.spinnerType] - if len(c.spinner) > 0 { - selectedSpinner = c.spinner - } - spinner := selectedSpinner[int(math.Round(math.Mod(float64(time.Since(s.startTime).Milliseconds()/100), float64(len(selectedSpinner)))))] - if c.elapsedTime { - if c.showDescriptionAtLineEnd { - str = fmt.Sprintf("\r%s %s [%s] %s ", - spinner, - sb.String(), - leftBrac, - c.description) - } else { - str = fmt.Sprintf("\r%s %s %s [%s] ", - spinner, - c.description, - sb.String(), - leftBrac) - } - } else { - if c.showDescriptionAtLineEnd { - str = fmt.Sprintf("\r%s %s %s ", - spinner, - sb.String(), - c.description) - } else { - str = fmt.Sprintf("\r%s %s %s ", - spinner, - c.description, - sb.String()) - } - } - } else if rightBrac == "" { - str = fmt.Sprintf("%4d%% %s%s%s%s%s %s", - s.currentPercent, - c.theme.BarStart, - saucer, - saucerHead, - strings.Repeat(c.theme.SaucerPadding, repeatAmount), - c.theme.BarEnd, - sb.String()) - - if s.currentPercent == 100 && c.showElapsedTimeOnFinish { - str = fmt.Sprintf("%s [%s]", str, leftBrac) - } - - if c.showDescriptionAtLineEnd { - str = fmt.Sprintf("\r%s %s ", str, c.description) - } else { - str = fmt.Sprintf("\r%s%s ", c.description, str) - } - } else { - if s.currentPercent == 100 { - str = fmt.Sprintf("%4d%% %s%s%s%s%s %s", - s.currentPercent, - c.theme.BarStart, - saucer, - saucerHead, - strings.Repeat(c.theme.SaucerPadding, repeatAmount), - c.theme.BarEnd, - sb.String()) - - if c.showElapsedTimeOnFinish { - str = fmt.Sprintf("%s [%s]", str, leftBrac) - } - - if c.showDescriptionAtLineEnd { - str = fmt.Sprintf("\r%s %s", str, c.description) - } else { - str = fmt.Sprintf("\r%s%s", c.description, str) - } - } else { - str = fmt.Sprintf("%4d%% %s%s%s%s%s %s [%s:%s]", - s.currentPercent, - c.theme.BarStart, - saucer, - saucerHead, - strings.Repeat(c.theme.SaucerPadding, repeatAmount), - c.theme.BarEnd, - sb.String(), - leftBrac, - rightBrac) - - if c.showDescriptionAtLineEnd { - str = fmt.Sprintf("\r%s %s", str, c.description) - } else { - str = fmt.Sprintf("\r%s%s", c.description, str) - } - } - } - - if c.colorCodes { - // convert any color codes in the progress bar into the respective ANSI codes - str = colorstring.Color(str) - } - - s.rendered = str - - return getStringWidth(c, str, false), writeString(c, str) -} - -func clearProgressBar(c config, s state) error { - if s.maxLineWidth == 0 { - return nil - } - if c.useANSICodes { - // write the "clear current line" ANSI escape sequence - return writeString(c, "\033[2K\r") - } - // fill the empty content - // to overwrite the progress bar and jump - // back to the beginning of the line - str := fmt.Sprintf("\r%s\r", strings.Repeat(" ", s.maxLineWidth)) - return writeString(c, str) - // the following does not show correctly if the previous line is longer than subsequent line - // return writeString(c, "\r") -} - -func writeString(c config, str string) error { - if _, err := io.WriteString(c.writer, str); err != nil { - return err - } - - if f, ok := c.writer.(*os.File); ok { - // ignore any errors in Sync(), as stdout - // can't be synced on some operating systems - // like Debian 9 (Stretch) - f.Sync() - } - - return nil -} - -// Reader is the progressbar io.Reader struct -type Reader struct { - io.Reader - bar *ProgressBar -} - -// NewReader return a new Reader with a given progress bar. -func NewReader(r io.Reader, bar *ProgressBar) Reader { - return Reader{ - Reader: r, - bar: bar, - } -} - -// Read will read the data and add the number of bytes to the progressbar -func (r *Reader) Read(p []byte) (n int, err error) { - n, err = r.Reader.Read(p) - r.bar.Add(n) - return -} - -// Close the reader when it implements io.Closer -func (r *Reader) Close() (err error) { - if closer, ok := r.Reader.(io.Closer); ok { - return closer.Close() - } - r.bar.Finish() - return -} - -// Write implement io.Writer -func (p *ProgressBar) Write(b []byte) (n int, err error) { - n = len(b) - p.Add(n) - return -} - -// Read implement io.Reader -func (p *ProgressBar) Read(b []byte) (n int, err error) { - n = len(b) - p.Add(n) - return -} - -func (p *ProgressBar) Close() (err error) { - p.Finish() - return -} - -func average(xs []float64) float64 { - total := 0.0 - for _, v := range xs { - total += v - } - return total / float64(len(xs)) -} - -func humanizeBytes(s float64) (string, string) { - sizes := []string{" B", " kB", " MB", " GB", " TB", " PB", " EB"} - base := 1000.0 - if s < 10 { - return fmt.Sprintf("%2.0f", s), sizes[0] - } - e := math.Floor(logn(float64(s), base)) - suffix := sizes[int(e)] - val := math.Floor(float64(s)/math.Pow(base, e)*10+0.5) / 10 - f := "%.0f" - if val < 10 { - f = "%.1f" - } - - return fmt.Sprintf(f, val), suffix -} - -func logn(n, b float64) float64 { - return math.Log(n) / math.Log(b) -} - -// termWidth function returns the visible width of the current terminal -// and can be redefined for testing -var termWidth = func() (width int, err error) { - width, _, err = term.GetSize(int(os.Stdout.Fd())) - if err == nil { - return width, nil - } - - return 0, err -} diff --git a/progressbar/spinners.go b/progressbar/spinners.go deleted file mode 100644 index c3ccd01f..00000000 --- a/progressbar/spinners.go +++ /dev/null @@ -1,80 +0,0 @@ -package progressbar - -var spinners = map[int][]string{ - 0: {"←", "↖", "↑", "↗", "→", "↘", "↓", "↙"}, - 1: {"▁", "▃", "▄", "▅", "▆", "▇", "█", "▇", "▆", "▅", "▄", "▃", "▁"}, - 2: {"▖", "▘", "▝", "▗"}, - 3: {"┤", "┘", "┴", "└", "├", "┌", "┬", "┐"}, - 4: {"◢", "◣", "◤", "◥"}, - 5: {"◰", "◳", "◲", "◱"}, - 6: {"◴", "◷", "◶", "◵"}, - 7: {"◐", "◓", "◑", "◒"}, - 8: {".", "o", "O", "@", "*"}, - 9: {"|", "/", "-", "\\"}, - 10: {"◡◡", "⊙⊙", "◠◠"}, - 11: {"⣾", "⣽", "⣻", "⢿", "⡿", "⣟", "⣯", "⣷"}, - 12: {">))'>", " >))'>", " >))'>", " >))'>", " >))'>", " <'((<", " <'((<", " <'((<"}, - 13: {"⠁", "⠂", "⠄", "⡀", "⢀", "⠠", "⠐", "⠈"}, - 14: {"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"}, - 15: {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"}, - 16: {"▉", "▊", "▋", "▌", "▍", "▎", "▏", "▎", "▍", "▌", "▋", "▊", "▉"}, - 17: {"■", "□", "▪", "▫"}, - 18: {"←", "↑", "→", "↓"}, - 19: {"╫", "╪"}, - 20: {"⇐", "⇖", "⇑", "⇗", "⇒", "⇘", "⇓", "⇙"}, - 21: {"⠁", "⠁", "⠉", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠤", "⠄", "⠄", "⠤", "⠠", "⠠", "⠤", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋", "⠉", "⠈", "⠈"}, - 22: {"⠈", "⠉", "⠋", "⠓", "⠒", "⠐", "⠐", "⠒", "⠖", "⠦", "⠤", "⠠", "⠠", "⠤", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋", "⠉", "⠈"}, - 23: {"⠁", "⠉", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠤", "⠄", "⠄", "⠤", "⠴", "⠲", "⠒", "⠂", "⠂", "⠒", "⠚", "⠙", "⠉", "⠁"}, - 24: {"⠋", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋"}, - 25: {"ヲ", "ァ", "ィ", "ゥ", "ェ", "ォ", "ャ", "ュ", "ョ", "ッ", "ア", "イ", "ウ", "エ", "オ", "カ", "キ", "ク", "ケ", "コ", "サ", "シ", "ス", "セ", "ソ", "タ", "チ", "ツ", "テ", "ト", "ナ", "ニ", "ヌ", "ネ", "ノ", "ハ", "ヒ", "フ", "ヘ", "ホ", "マ", "ミ", "ム", "メ", "モ", "ヤ", "ユ", "ヨ", "ラ", "リ", "ル", "レ", "ロ", "ワ", "ン"}, - 26: {".", "..", "..."}, - 27: {"▁", "▂", "▃", "▄", "▅", "▆", "▇", "█", "▉", "▊", "▋", "▌", "▍", "▎", "▏", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█", "▇", "▆", "▅", "▄", "▃", "▂", "▁"}, - 28: {".", "o", "O", "°", "O", "o", "."}, - 29: {"+", "x"}, - 30: {"v", "<", "^", ">"}, - 31: {">>--->", " >>--->", " >>--->", " >>--->", " >>--->", " <---<<", " <---<<", " <---<<", " <---<<", "<---<<"}, - 32: {"|", "||", "|||", "||||", "|||||", "|||||||", "||||||||", "|||||||", "||||||", "|||||", "||||", "|||", "||", "|"}, - 33: {"[ ]", "[= ]", "[== ]", "[=== ]", "[==== ]", "[===== ]", "[====== ]", "[======= ]", "[======== ]", "[========= ]", "[==========]"}, - 34: {"(*---------)", "(-*--------)", "(--*-------)", "(---*------)", "(----*-----)", "(-----*----)", "(------*---)", "(-------*--)", "(--------*-)", "(---------*)"}, - 35: {"█▒▒▒▒▒▒▒▒▒", "███▒▒▒▒▒▒▒", "█████▒▒▒▒▒", "███████▒▒▒", "██████████"}, - 36: {"[ ]", "[=> ]", "[===> ]", "[=====> ]", "[======> ]", "[========> ]", "[==========> ]", "[============> ]", "[==============> ]", "[================> ]", "[==================> ]", "[===================>]"}, - 37: {"ဝ", "၀"}, - 38: {"▌", "▀", "▐▄"}, - 39: {"🌍", "🌎", "🌏"}, - 40: {"◜", "◝", "◞", "◟"}, - 41: {"⬒", "⬔", "⬓", "⬕"}, - 42: {"⬖", "⬘", "⬗", "⬙"}, - 43: {"[>>> >]", "[]>>>> []", "[] >>>> []", "[] >>>> []", "[] >>>> []", "[] >>>>[]", "[>> >>]"}, - 44: {"♠", "♣", "♥", "♦"}, - 45: {"➞", "➟", "➠", "➡", "➠", "➟"}, - 46: {" | ", ` \ `, "_ ", ` \ `, " | ", " / ", " _", " / "}, - 47: {" . . . .", ". . . .", ". . . .", ". . . .", ". . . . ", ". . . . ."}, - 48: {" | ", " / ", " _ ", ` \ `, " | ", ` \ `, " _ ", " / "}, - 49: {"⎺", "⎻", "⎼", "⎽", "⎼", "⎻"}, - 50: {"▹▹▹▹▹", "▸▹▹▹▹", "▹▸▹▹▹", "▹▹▸▹▹", "▹▹▹▸▹", "▹▹▹▹▸"}, - 51: {"[ ]", "[ =]", "[ ==]", "[ ===]", "[====]", "[=== ]", "[== ]", "[= ]"}, - 52: {"( ● )", "( ● )", "( ● )", "( ● )", "( ●)", "( ● )", "( ● )", "( ● )", "( ● )"}, - 53: {"✶", "✸", "✹", "✺", "✹", "✷"}, - 54: {"▐|\\____________▌", "▐_|\\___________▌", "▐__|\\__________▌", "▐___|\\_________▌", "▐____|\\________▌", "▐_____|\\_______▌", "▐______|\\______▌", "▐_______|\\_____▌", "▐________|\\____▌", "▐_________|\\___▌", "▐__________|\\__▌", "▐___________|\\_▌", "▐____________|\\▌", "▐____________/|▌", "▐___________/|_▌", "▐__________/|__▌", "▐_________/|___▌", "▐________/|____▌", "▐_______/|_____▌", "▐______/|______▌", "▐_____/|_______▌", "▐____/|________▌", "▐___/|_________▌", "▐__/|__________▌", "▐_/|___________▌", "▐/|____________▌"}, - 55: {"▐⠂ ▌", "▐⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂▌", "▐ ⠠▌", "▐ ⡀▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐⠠ ▌"}, - 56: {"¿", "?"}, - 57: {"⢹", "⢺", "⢼", "⣸", "⣇", "⡧", "⡗", "⡏"}, - 58: {"⢄", "⢂", "⢁", "⡁", "⡈", "⡐", "⡠"}, - 59: {". ", ".. ", "...", " ..", " .", " "}, - 60: {".", "o", "O", "°", "O", "o", "."}, - 61: {"▓", "▒", "░"}, - 62: {"▌", "▀", "▐", "▄"}, - 63: {"⊶", "⊷"}, - 64: {"▪", "▫"}, - 65: {"□", "■"}, - 66: {"▮", "▯"}, - 67: {"-", "=", "≡"}, - 68: {"d", "q", "p", "b"}, - 69: {"∙∙∙", "●∙∙", "∙●∙", "∙∙●", "∙∙∙"}, - 70: {"🌑 ", "🌒 ", "🌓 ", "🌔 ", "🌕 ", "🌖 ", "🌗 ", "🌘 "}, - 71: {"☗", "☖"}, - 72: {"⧇", "⧆"}, - 73: {"◉", "◎"}, - 74: {"㊂", "㊀", "㊁"}, - 75: {"⦾", "⦿"}, -} diff --git a/scripts/build_darwin.sh b/scripts/build_darwin.sh index 54aef9a4..c35a3d8d 100755 --- a/scripts/build_darwin.sh +++ b/scripts/build_darwin.sh @@ -10,6 +10,7 @@ mkdir -p dist for TARGETARCH in arm64 amd64; do GOOS=darwin GOARCH=$TARGETARCH go generate ./... GOOS=darwin GOARCH=$TARGETARCH go build -o dist/ollama-darwin-$TARGETARCH + rm -rf llm/llama.cpp/*/build done lipo -create -output dist/ollama dist/ollama-darwin-* diff --git a/scripts/build_docker.sh b/scripts/build_docker.sh index 1f612def..9cfb4113 100755 --- a/scripts/build_docker.sh +++ b/scripts/build_docker.sh @@ -10,6 +10,8 @@ docker buildx build \ --platform=linux/arm64,linux/amd64 \ --build-arg=VERSION \ --build-arg=GOFLAGS \ + --cache-from type=local,src=.cache \ + --cache-to type=local,dest=.cache \ -f Dockerfile \ -t ollama \ . diff --git a/scripts/install.sh b/scripts/install.sh index 0cb0f915..219f60fd 100644 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -181,6 +181,9 @@ install_cuda_driver_apt() { debian) status 'Enabling contrib sources...' $SUDO sed 's/main/contrib/' < /etc/apt/sources.list | $SUDO tee /etc/apt/sources.list.d/contrib.list > /dev/null + if [ -f "/etc/apt/sources.list.d/debian.sources" ]; then + $SUDO sed 's/main/contrib/' < /etc/apt/sources.list.d/debian.sources | $SUDO tee /etc/apt/sources.list.d/contrib.sources > /dev/null + fi ;; esac diff --git a/scripts/push_docker.sh b/scripts/push_docker.sh index 3f3fb213..31865c2b 100755 --- a/scripts/push_docker.sh +++ b/scripts/push_docker.sh @@ -10,6 +10,7 @@ docker buildx build \ --platform=linux/arm64,linux/amd64 \ --build-arg=VERSION \ --build-arg=GOFLAGS \ + --cache-from type=local,src=.cache \ -f Dockerfile \ -t ollama/ollama -t ollama/ollama:$VERSION \ . diff --git a/server/download.go b/server/download.go index 13e8ee18..3c823f32 100644 --- a/server/download.go +++ b/server/download.go @@ -7,6 +7,7 @@ import ( "fmt" "io" "log" + "math" "net/http" "net/url" "os" @@ -53,8 +54,8 @@ type blobDownloadPart struct { const ( numDownloadParts = 64 - minDownloadPartSize int64 = 32 * 1000 * 1000 - maxDownloadPartSize int64 = 256 * 1000 * 1000 + minDownloadPartSize int64 = 100 * format.MegaByte + maxDownloadPartSize int64 = 1000 * format.MegaByte ) func (p *blobDownloadPart) Name() string { @@ -147,7 +148,6 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *Regis continue } - i := i g.Go(func() error { var err error for try := 0; try < maxRetries; try++ { @@ -158,12 +158,11 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *Regis // return immediately if the context is canceled or the device is out of space return err case err != nil: - log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], i, try, err) + sleep := time.Second * time.Duration(math.Pow(2, float64(try))) + log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep) + time.Sleep(sleep) continue default: - if try > 0 { - log.Printf("%s part %d completed after %d retries", b.Digest[7:19], i, try) - } return nil } } @@ -285,7 +284,7 @@ func (b *blobDownload) Wait(ctx context.Context, fn func(api.ProgressResponse)) } fn(api.ProgressResponse{ - Status: fmt.Sprintf("downloading %s", b.Digest), + Status: fmt.Sprintf("pulling %s", b.Digest[7:19]), Digest: b.Digest, Total: b.Total, Completed: b.Completed.Load(), @@ -304,7 +303,7 @@ type downloadOpts struct { fn func(api.ProgressResponse) } -const maxRetries = 3 +const maxRetries = 6 var errMaxRetriesExceeded = errors.New("max retries exceeded") @@ -322,7 +321,7 @@ func downloadBlob(ctx context.Context, opts downloadOpts) error { return err default: opts.fn(api.ProgressResponse{ - Status: fmt.Sprintf("downloading %s", opts.digest), + Status: fmt.Sprintf("pulling %s", opts.digest[7:19]), Digest: opts.digest, Total: fi.Size(), Completed: fi.Size(), diff --git a/server/images.go b/server/images.go index 8d784fef..8d2af15b 100644 --- a/server/images.go +++ b/server/images.go @@ -228,220 +228,181 @@ func GetModel(name string) (*Model, error) { return model, nil } -func filenameWithPath(path, f string) (string, error) { - // if filePath starts with ~/, replace it with the user's home directory. - if strings.HasPrefix(f, fmt.Sprintf("~%s", string(os.PathSeparator))) { - parts := strings.Split(f, string(os.PathSeparator)) - home, err := os.UserHomeDir() - if err != nil { - return "", fmt.Errorf("failed to open file: %v", err) - } - - f = filepath.Join(home, filepath.Join(parts[1:]...)) +func realpath(p string) string { + abspath, err := filepath.Abs(p) + if err != nil { + return p } - // if filePath is not an absolute path, make it relative to the modelfile path - if !filepath.IsAbs(f) { - f = filepath.Join(filepath.Dir(path), f) + home, err := os.UserHomeDir() + if err != nil { + return abspath } - return f, nil + if p == "~" { + return home + } else if strings.HasPrefix(p, "~/") { + return filepath.Join(home, p[2:]) + } + + return abspath } -func CreateModel(ctx context.Context, name string, path string, fn func(resp api.ProgressResponse)) error { - mp := ParseModelPath(name) - - var manifest *ManifestV2 - var err error - var noprune string - - // build deleteMap to prune unused layers - deleteMap := make(map[string]bool) - - if noprune = os.Getenv("OLLAMA_NOPRUNE"); noprune == "" { - manifest, _, err = GetManifest(mp) - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - if manifest != nil { - for _, l := range manifest.Layers { - deleteMap[l.Digest] = true - } - deleteMap[manifest.Config.Digest] = true - } - } - - mf, err := os.Open(path) - if err != nil { - fn(api.ProgressResponse{Status: fmt.Sprintf("couldn't open modelfile '%s'", path)}) - return fmt.Errorf("failed to open file: %w", err) - } - defer mf.Close() - - fn(api.ProgressResponse{Status: "parsing modelfile"}) - commands, err := parser.Parse(mf) - if err != nil { - return err - } - +func CreateModel(ctx context.Context, name string, commands []parser.Command, fn func(resp api.ProgressResponse)) error { config := ConfigV2{ - Architecture: "amd64", OS: "linux", + Architecture: "amd64", } + deleteMap := make(map[string]struct{}) + var layers []*LayerReader + params := make(map[string][]string) - var sourceParams map[string]any + fromParams := make(map[string]any) + for _, c := range commands { - log.Printf("[%s] - %s\n", c.Name, c.Args) + log.Printf("[%s] - %s", c.Name, c.Args) + mediatype := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name) + switch c.Name { case "model": - fn(api.ProgressResponse{Status: "looking for model"}) + if strings.HasPrefix(c.Args, "@") { + blobPath, err := GetBlobsPath(strings.TrimPrefix(c.Args, "@")) + if err != nil { + return err + } - mp := ParseModelPath(c.Args) - mf, _, err := GetManifest(mp) + c.Args = blobPath + } + + bin, err := os.Open(realpath(c.Args)) if err != nil { - modelFile, err := filenameWithPath(path, c.Args) - if err != nil { + // not a file on disk so must be a model reference + modelpath := ParseModelPath(c.Args) + manifest, _, err := GetManifest(modelpath) + switch { + case errors.Is(err, os.ErrNotExist): + fn(api.ProgressResponse{Status: "pulling model"}) + if err := PullModel(ctx, c.Args, &RegistryOptions{}, fn); err != nil { + return err + } + + manifest, _, err = GetManifest(modelpath) + if err != nil { + return err + } + case err != nil: return err } - if _, err := os.Stat(modelFile); err != nil { - // the model file does not exist, try pulling it - if errors.Is(err, os.ErrNotExist) { - fn(api.ProgressResponse{Status: "pulling model file"}) - if err := PullModel(ctx, c.Args, &RegistryOptions{}, fn); err != nil { - return err - } - mf, _, err = GetManifest(mp) - if err != nil { - return fmt.Errorf("failed to open file after pull: %v", err) - } - } else { - return err - } - } else { - // create a model from this specified file - fn(api.ProgressResponse{Status: "creating model layer"}) - file, err := os.Open(modelFile) - if err != nil { - return fmt.Errorf("failed to open file: %v", err) - } - defer file.Close() - ggml, err := llm.DecodeGGML(file) - if err != nil { - return err - } - - config.ModelFormat = ggml.Name() - config.ModelFamily = ggml.ModelFamily() - config.ModelType = ggml.ModelType() - config.FileType = ggml.FileType() - - // reset the file - file.Seek(0, io.SeekStart) - - l, err := CreateLayer(file) - if err != nil { - return fmt.Errorf("failed to create layer: %v", err) - } - l.MediaType = "application/vnd.ollama.image.model" - layers = append(layers, l) - } - } - - if mf != nil { fn(api.ProgressResponse{Status: "reading model metadata"}) - sourceBlobPath, err := GetBlobsPath(mf.Config.Digest) + fromConfigPath, err := GetBlobsPath(manifest.Config.Digest) if err != nil { return err } - sourceBlob, err := os.Open(sourceBlobPath) + fromConfigFile, err := os.Open(fromConfigPath) if err != nil { return err } - defer sourceBlob.Close() + defer fromConfigFile.Close() - var source ConfigV2 - if err := json.NewDecoder(sourceBlob).Decode(&source); err != nil { + var fromConfig ConfigV2 + if err := json.NewDecoder(fromConfigFile).Decode(&fromConfig); err != nil { return err } - // copy the model metadata - config.ModelFamily = source.ModelFamily - config.ModelType = source.ModelType - config.ModelFormat = source.ModelFormat - config.FileType = source.FileType + config.ModelFormat = fromConfig.ModelFormat + config.ModelFamily = fromConfig.ModelFamily + config.ModelType = fromConfig.ModelType + config.FileType = fromConfig.FileType - for _, l := range mf.Layers { - if l.MediaType == "application/vnd.ollama.image.params" { - sourceParamsBlobPath, err := GetBlobsPath(l.Digest) + for _, layer := range manifest.Layers { + deleteMap[layer.Digest] = struct{}{} + if layer.MediaType == "application/vnd.ollama.image.params" { + fromParamsPath, err := GetBlobsPath(layer.Digest) if err != nil { return err } - sourceParamsBlob, err := os.Open(sourceParamsBlobPath) + fromParamsFile, err := os.Open(fromParamsPath) if err != nil { return err } - defer sourceParamsBlob.Close() + defer fromParamsFile.Close() - if err := json.NewDecoder(sourceParamsBlob).Decode(&sourceParams); err != nil { + if err := json.NewDecoder(fromParamsFile).Decode(&fromParams); err != nil { return err } } - newLayer, err := GetLayerWithBufferFromLayer(l) + layer, err := GetLayerWithBufferFromLayer(layer) if err != nil { return err } - newLayer.From = mp.GetShortTagname() - layers = append(layers, newLayer) - } - } - case "adapter": - fn(api.ProgressResponse{Status: fmt.Sprintf("creating model %s layer", c.Name)}) - fp, err := filenameWithPath(path, c.Args) + layer.From = modelpath.GetShortTagname() + layers = append(layers, layer) + } + + deleteMap[manifest.Config.Digest] = struct{}{} + continue + } + defer bin.Close() + + fn(api.ProgressResponse{Status: "creating model layer"}) + ggml, err := llm.DecodeGGML(bin) if err != nil { return err } - // create a model from this specified file - fn(api.ProgressResponse{Status: "creating model layer"}) + config.ModelFormat = ggml.Name() + config.ModelFamily = ggml.ModelFamily() + config.ModelType = ggml.ModelType() + config.FileType = ggml.FileType() - file, err := os.Open(fp) + bin.Seek(0, io.SeekStart) + layer, err := CreateLayer(bin) if err != nil { - return fmt.Errorf("failed to open file: %v", err) + return err } - defer file.Close() - l, err := CreateLayer(file) + layer.MediaType = mediatype + layers = append(layers, layer) + case "adapter": + fn(api.ProgressResponse{Status: "creating adapter layer"}) + bin, err := os.Open(realpath(c.Args)) if err != nil { - return fmt.Errorf("failed to create layer: %v", err) + return err } - l.MediaType = "application/vnd.ollama.image.adapter" - layers = append(layers, l) - case "license": - fn(api.ProgressResponse{Status: fmt.Sprintf("creating model %s layer", c.Name)}) - mediaType := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name) + defer bin.Close() - layer, err := CreateLayer(strings.NewReader(c.Args)) + layer, err := CreateLayer(bin) if err != nil { return err } if layer.Size > 0 { - layer.MediaType = mediaType + layer.MediaType = mediatype layers = append(layers, layer) } - case "template", "system", "prompt": - fn(api.ProgressResponse{Status: fmt.Sprintf("creating model %s layer", c.Name)}) - // remove the layer if one exists - mediaType := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name) - layers = removeLayerFromLayers(layers, mediaType) + case "license": + fn(api.ProgressResponse{Status: "creating license layer"}) + layer, err := CreateLayer(strings.NewReader(c.Args)) + if err != nil { + return err + } + + if layer.Size > 0 { + layer.MediaType = mediatype + layers = append(layers, layer) + } + case "template", "system": + fn(api.ProgressResponse{Status: fmt.Sprintf("creating %s layer", c.Name)}) + + // remove duplicate layers + layers = removeLayerFromLayers(layers, mediatype) layer, err := CreateLayer(strings.NewReader(c.Args)) if err != nil { @@ -449,48 +410,47 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api } if layer.Size > 0 { - layer.MediaType = mediaType + layer.MediaType = mediatype layers = append(layers, layer) } default: - // runtime parameters, build a list of args for each parameter to allow multiple values to be specified (ex: multiple stop sequences) params[c.Name] = append(params[c.Name], c.Args) } } - // Create a single layer for the parameters if len(params) > 0 { - fn(api.ProgressResponse{Status: "creating parameter layer"}) + fn(api.ProgressResponse{Status: "creating parameters layer"}) - layers = removeLayerFromLayers(layers, "application/vnd.ollama.image.params") formattedParams, err := formatParams(params) if err != nil { - return fmt.Errorf("couldn't create params json: %v", err) + return err } - for k, v := range sourceParams { + for k, v := range fromParams { if _, ok := formattedParams[k]; !ok { formattedParams[k] = v } } if config.ModelType == "65B" { - if numGQA, ok := formattedParams["num_gqa"].(int); ok && numGQA == 8 { + if gqa, ok := formattedParams["gqa"].(int); ok && gqa == 8 { config.ModelType = "70B" } } - bts, err := json.Marshal(formattedParams) + var b bytes.Buffer + if err := json.NewEncoder(&b).Encode(formattedParams); err != nil { + return err + } + + fn(api.ProgressResponse{Status: "creating config layer"}) + layer, err := CreateLayer(bytes.NewReader(b.Bytes())) if err != nil { return err } - l, err := CreateLayer(bytes.NewReader(bts)) - if err != nil { - return fmt.Errorf("failed to create layer: %v", err) - } - l.MediaType = "application/vnd.ollama.image.params" - layers = append(layers, l) + layer.MediaType = "application/vnd.ollama.image.params" + layers = append(layers, layer) } digests, err := getLayerDigests(layers) @@ -498,36 +458,31 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api return err } - var manifestLayers []*Layer - for _, l := range layers { - manifestLayers = append(manifestLayers, &l.Layer) - delete(deleteMap, l.Layer.Digest) - } - - // Create a layer for the config object - fn(api.ProgressResponse{Status: "creating config layer"}) - cfg, err := createConfigLayer(config, digests) + configLayer, err := createConfigLayer(config, digests) if err != nil { return err } - layers = append(layers, cfg) - delete(deleteMap, cfg.Layer.Digest) + + layers = append(layers, configLayer) + delete(deleteMap, configLayer.Digest) if err := SaveLayers(layers, fn, false); err != nil { return err } - // Create the manifest + var contentLayers []*Layer + for _, layer := range layers { + contentLayers = append(contentLayers, &layer.Layer) + delete(deleteMap, layer.Digest) + } + fn(api.ProgressResponse{Status: "writing manifest"}) - err = CreateManifest(name, cfg, manifestLayers) - if err != nil { + if err := CreateManifest(name, configLayer, contentLayers); err != nil { return err } - if noprune == "" { - fn(api.ProgressResponse{Status: "removing any unused layers"}) - err = deleteUnusedLayers(nil, deleteMap, false) - if err != nil { + if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" { + if err := deleteUnusedLayers(nil, deleteMap, false); err != nil { return err } } @@ -739,7 +694,7 @@ func CopyModel(src, dest string) error { return nil } -func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]bool, dryRun bool) error { +func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]struct{}, dryRun bool) error { fp, err := GetManifestPath() if err != nil { return err @@ -779,21 +734,19 @@ func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]bool, dry } // only delete the files which are still in the deleteMap - for k, v := range deleteMap { - if v { - fp, err := GetBlobsPath(k) - if err != nil { - log.Printf("couldn't get file path for '%s': %v", k, err) + for k := range deleteMap { + fp, err := GetBlobsPath(k) + if err != nil { + log.Printf("couldn't get file path for '%s': %v", k, err) + continue + } + if !dryRun { + if err := os.Remove(fp); err != nil { + log.Printf("couldn't remove file '%s': %v", fp, err) continue } - if !dryRun { - if err := os.Remove(fp); err != nil { - log.Printf("couldn't remove file '%s': %v", fp, err) - continue - } - } else { - log.Printf("wanted to remove: %s", fp) - } + } else { + log.Printf("wanted to remove: %s", fp) } } @@ -801,7 +754,7 @@ func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]bool, dry } func PruneLayers() error { - deleteMap := make(map[string]bool) + deleteMap := make(map[string]struct{}) p, err := GetBlobsPath("") if err != nil { return err @@ -818,7 +771,9 @@ func PruneLayers() error { if runtime.GOOS == "windows" { name = strings.ReplaceAll(name, "-", ":") } - deleteMap[name] = true + if strings.HasPrefix(name, "sha256:") { + deleteMap[name] = struct{}{} + } } log.Printf("total blobs: %d", len(deleteMap)) @@ -873,11 +828,11 @@ func DeleteModel(name string) error { return err } - deleteMap := make(map[string]bool) + deleteMap := make(map[string]struct{}) for _, layer := range manifest.Layers { - deleteMap[layer.Digest] = true + deleteMap[layer.Digest] = struct{}{} } - deleteMap[manifest.Config.Digest] = true + deleteMap[manifest.Config.Digest] = struct{}{} err = deleteUnusedLayers(&mp, deleteMap, false) if err != nil { @@ -979,6 +934,9 @@ func PushModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu for _, layer := range layers { if err := uploadBlob(ctx, mp, layer, regOpts, fn); err != nil { log.Printf("error uploading blob: %v", err) + if errors.Is(err, errUnauthorized) { + return fmt.Errorf("unable to push %s, make sure this namespace exists and you are authorized to push to it", ParseModelPath(name).GetNamespaceRepository()) + } return err } } @@ -1013,7 +971,7 @@ func PullModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu var noprune string // build deleteMap to prune unused layers - deleteMap := make(map[string]bool) + deleteMap := make(map[string]struct{}) if noprune = os.Getenv("OLLAMA_NOPRUNE"); noprune == "" { manifest, _, err = GetManifest(mp) @@ -1023,9 +981,9 @@ func PullModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu if manifest != nil { for _, l := range manifest.Layers { - deleteMap[l.Digest] = true + deleteMap[l.Digest] = struct{}{} } - deleteMap[manifest.Config.Digest] = true + deleteMap[manifest.Config.Digest] = struct{}{} } } @@ -1165,44 +1123,52 @@ func GetSHA256Digest(r io.Reader) (string, int64) { return fmt.Sprintf("sha256:%x", h.Sum(nil)), n } +var errUnauthorized = fmt.Errorf("unauthorized") + func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.ReadSeeker, regOpts *RegistryOptions) (*http.Response, error) { - for try := 0; try < maxRetries; try++ { - resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts) - if err != nil { - log.Printf("couldn't start upload: %v", err) - return nil, err + resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts) + if err != nil { + if !errors.Is(err, context.Canceled) { + log.Printf("request failed: %v", err) } - switch { - case resp.StatusCode == http.StatusUnauthorized: - auth := resp.Header.Get("www-authenticate") - authRedir := ParseAuthRedirectString(auth) - token, err := getAuthToken(ctx, authRedir) + return nil, err + } + + switch { + case resp.StatusCode == http.StatusUnauthorized: + // Handle authentication error with one retry + auth := resp.Header.Get("www-authenticate") + authRedir := ParseAuthRedirectString(auth) + token, err := getAuthToken(ctx, authRedir) + if err != nil { + return nil, err + } + regOpts.Token = token + if body != nil { + _, err = body.Seek(0, io.SeekStart) if err != nil { return nil, err } - - regOpts.Token = token - if body != nil { - body.Seek(0, io.SeekStart) - } - - continue - case resp.StatusCode == http.StatusNotFound: - return nil, os.ErrNotExist - case resp.StatusCode >= http.StatusBadRequest: - body, err := io.ReadAll(resp.Body) - if err != nil { - return nil, fmt.Errorf("%d: %s", resp.StatusCode, err) - } - - return nil, fmt.Errorf("%d: %s", resp.StatusCode, body) - default: - return resp, nil } + + resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts) + if resp.StatusCode == http.StatusUnauthorized { + return nil, errUnauthorized + } + + return resp, err + case resp.StatusCode == http.StatusNotFound: + return nil, os.ErrNotExist + case resp.StatusCode >= http.StatusBadRequest: + responseBody, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("%d: %s", resp.StatusCode, err) + } + return nil, fmt.Errorf("%d: %s", resp.StatusCode, responseBody) } - return nil, errMaxRetriesExceeded + return resp, nil } func makeRequest(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.Reader, regOpts *RegistryOptions) (*http.Response, error) { diff --git a/server/routes.go b/server/routes.go index a543b10e..8a5a5a24 100644 --- a/server/routes.go +++ b/server/routes.go @@ -2,6 +2,7 @@ package server import ( "context" + "crypto/sha256" "encoding/json" "errors" "fmt" @@ -26,6 +27,7 @@ import ( "github.com/jmorganca/ollama/api" "github.com/jmorganca/ollama/llm" + "github.com/jmorganca/ollama/parser" "github.com/jmorganca/ollama/version" ) @@ -409,8 +411,31 @@ func CreateModelHandler(c *gin.Context) { return } - if req.Name == "" || req.Path == "" { - c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name and path are required"}) + if req.Name == "" { + c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name is required"}) + return + } + + if req.Path == "" && req.Modelfile == "" { + c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "path or modelfile are required"}) + return + } + + var modelfile io.Reader = strings.NewReader(req.Modelfile) + if req.Path != "" && req.Modelfile == "" { + bin, err := os.Open(req.Path) + if err != nil { + c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("error reading modelfile: %s", err)}) + return + } + defer bin.Close() + + modelfile = bin + } + + commands, err := parser.Parse(modelfile) + if err != nil { + c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()}) return } @@ -424,7 +449,7 @@ func CreateModelHandler(c *gin.Context) { ctx, cancel := context.WithCancel(c.Request.Context()) defer cancel() - if err := CreateModel(ctx, req.Name, req.Path, fn); err != nil { + if err := CreateModel(ctx, req.Name, commands, fn); err != nil { ch <- gin.H{"error": err.Error()} } }() @@ -625,6 +650,60 @@ func CopyModelHandler(c *gin.Context) { } } +func HeadBlobHandler(c *gin.Context) { + path, err := GetBlobsPath(c.Param("digest")) + if err != nil { + c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } + + if _, err := os.Stat(path); err != nil { + c.AbortWithStatusJSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("blob %q not found", c.Param("digest"))}) + return + } + + c.Status(http.StatusOK) +} + +func CreateBlobHandler(c *gin.Context) { + targetPath, err := GetBlobsPath(c.Param("digest")) + if err != nil { + c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + + hash := sha256.New() + temp, err := os.CreateTemp(filepath.Dir(targetPath), c.Param("digest")+"-") + if err != nil { + c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + defer temp.Close() + defer os.Remove(temp.Name()) + + if _, err := io.Copy(temp, io.TeeReader(c.Request.Body, hash)); err != nil { + c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + + if fmt.Sprintf("sha256:%x", hash.Sum(nil)) != c.Param("digest") { + c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "digest does not match body"}) + return + } + + if err := temp.Close(); err != nil { + c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + + if err := os.Rename(temp.Name(), targetPath); err != nil { + c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + + c.Status(http.StatusCreated) +} + var defaultAllowOrigins = []string{ "localhost", "127.0.0.1", @@ -684,6 +763,8 @@ func Serve(ln net.Listener, allowOrigins []string) error { r.POST("/api/copy", CopyModelHandler) r.DELETE("/api/delete", DeleteModelHandler) r.POST("/api/show", ShowModelHandler) + r.POST("/api/blobs/:digest", CreateBlobHandler) + r.HEAD("/api/blobs/:digest", HeadBlobHandler) for _, method := range []string{http.MethodGet, http.MethodHead} { r.Handle(method, "/", func(c *gin.Context) { @@ -713,7 +794,7 @@ func Serve(ln net.Listener, allowOrigins []string) error { if runtime.GOOS == "linux" { // check compatibility to log warnings if _, err := llm.CheckVRAM(); err != nil { - log.Printf("Warning: GPU support may not be enabled, check you have installed GPU drivers: %v", err) + log.Printf(err.Error()) } } diff --git a/server/upload.go b/server/upload.go index 04575560..04cd5ac0 100644 --- a/server/upload.go +++ b/server/upload.go @@ -5,9 +5,9 @@ import ( "crypto/md5" "errors" "fmt" - "hash" "io" "log" + "math" "net/http" "net/url" "os" @@ -35,6 +35,8 @@ type blobUpload struct { context.CancelFunc + file *os.File + done bool err error references atomic.Int32 @@ -42,8 +44,8 @@ type blobUpload struct { const ( numUploadParts = 64 - minUploadPartSize int64 = 95 * 1000 * 1000 - maxUploadPartSize int64 = 1000 * 1000 * 1000 + minUploadPartSize int64 = 100 * format.MegaByte + maxUploadPartSize int64 = 1000 * format.MegaByte ) func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *RegistryOptions) error { @@ -55,7 +57,7 @@ func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *Reg if b.From != "" { values := requestURL.Query() values.Add("mount", b.Digest) - values.Add("from", b.From) + values.Add("from", ParseModelPath(b.From).GetNamespaceRepository()) requestURL.RawQuery = values.Encode() } @@ -77,6 +79,14 @@ func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *Reg b.Total = fi.Size() + // http.StatusCreated indicates a blob has been mounted + // ref: https://distribution.github.io/distribution/spec/api/#cross-repository-blob-mount + if resp.StatusCode == http.StatusCreated { + b.Completed.Store(b.Total) + b.done = true + return nil + } + var size = b.Total / numUploadParts switch { case size < minUploadPartSize: @@ -120,12 +130,12 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) { return } - f, err := os.Open(p) + b.file, err = os.Open(p) if err != nil { b.err = err return } - defer f.Close() + defer b.file.Close() g, inner := errgroup.WithContext(ctx) g.SetLimit(numUploadParts) @@ -137,7 +147,6 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) { g.Go(func() error { var err error for try := 0; try < maxRetries; try++ { - part.ReadSeeker = io.NewSectionReader(f, part.Offset, part.Size) err = b.uploadChunk(inner, http.MethodPatch, requestURL, part, opts) switch { case errors.Is(err, context.Canceled): @@ -145,7 +154,10 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) { case errors.Is(err, errMaxRetriesExceeded): return err case err != nil: - log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], part.N, try, err) + part.Reset() + sleep := time.Second * time.Duration(math.Pow(2, float64(try))) + log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep) + time.Sleep(sleep) continue } @@ -165,8 +177,16 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) { requestURL := <-b.nextURL var sb strings.Builder + + // calculate md5 checksum and add it to the commit request for _, part := range b.Parts { - sb.Write(part.Sum(nil)) + hash := md5.New() + if _, err := io.Copy(hash, io.NewSectionReader(b.file, part.Offset, part.Size)); err != nil { + b.err = err + return + } + + sb.Write(hash.Sum(nil)) } md5sum := md5.Sum([]byte(sb.String())) @@ -180,29 +200,39 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) { headers.Set("Content-Type", "application/octet-stream") headers.Set("Content-Length", "0") - resp, err := makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, nil, opts) - if err != nil { - b.err = err + for try := 0; try < maxRetries; try++ { + resp, err := makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, nil, opts) + if err != nil { + b.err = err + if errors.Is(err, context.Canceled) { + return + } + + sleep := time.Second * time.Duration(math.Pow(2, float64(try))) + log.Printf("%s complete upload attempt %d failed: %v, retrying in %s", b.Digest[7:19], try, err, sleep) + time.Sleep(sleep) + continue + } + defer resp.Body.Close() + + b.err = nil + b.done = true return } - defer resp.Body.Close() - - b.done = true } func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL *url.URL, part *blobUploadPart, opts *RegistryOptions) error { - part.Reset() - headers := make(http.Header) headers.Set("Content-Type", "application/octet-stream") headers.Set("Content-Length", fmt.Sprintf("%d", part.Size)) - headers.Set("X-Redirect-Uploads", "1") if method == http.MethodPatch { + headers.Set("X-Redirect-Uploads", "1") headers.Set("Content-Range", fmt.Sprintf("%d-%d", part.Offset, part.Offset+part.Size-1)) } - resp, err := makeRequest(ctx, method, requestURL, headers, io.TeeReader(part.ReadSeeker, io.MultiWriter(part, part.Hash)), opts) + sr := io.NewSectionReader(b.file, part.Offset, part.Size) + resp, err := makeRequest(ctx, method, requestURL, headers, io.TeeReader(sr, part), opts) if err != nil { return err } @@ -227,6 +257,7 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL return err } + // retry uploading to the redirect URL for try := 0; try < maxRetries; try++ { err = b.uploadChunk(ctx, http.MethodPut, redirectURL, part, nil) switch { @@ -235,7 +266,10 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL case errors.Is(err, errMaxRetriesExceeded): return err case err != nil: - log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], part.N, try, err) + part.Reset() + sleep := time.Second * time.Duration(math.Pow(2, float64(try))) + log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep) + time.Sleep(sleep) continue } @@ -260,7 +294,7 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL return err } - return fmt.Errorf("http status %d %s: %s", resp.StatusCode, resp.Status, body) + return fmt.Errorf("http status %s: %s", resp.Status, body) } if method == http.MethodPatch { @@ -293,7 +327,7 @@ func (b *blobUpload) Wait(ctx context.Context, fn func(api.ProgressResponse)) er } fn(api.ProgressResponse{ - Status: fmt.Sprintf("uploading %s", b.Digest), + Status: fmt.Sprintf("pushing %s", b.Digest[7:19]), Digest: b.Digest, Total: b.Total, Completed: b.Completed.Load(), @@ -307,14 +341,10 @@ func (b *blobUpload) Wait(ctx context.Context, fn func(api.ProgressResponse)) er type blobUploadPart struct { // N is the part number - N int - Offset int64 - Size int64 - hash.Hash - + N int + Offset int64 + Size int64 written int64 - - io.ReadSeeker *blobUpload } @@ -326,10 +356,8 @@ func (p *blobUploadPart) Write(b []byte) (n int, err error) { } func (p *blobUploadPart) Reset() { - p.Seek(0, io.SeekStart) p.Completed.Add(-int64(p.written)) p.written = 0 - p.Hash = md5.New() } func uploadBlob(ctx context.Context, mp ModelPath, layer *Layer, opts *RegistryOptions, fn func(api.ProgressResponse)) error { @@ -344,7 +372,7 @@ func uploadBlob(ctx context.Context, mp ModelPath, layer *Layer, opts *RegistryO default: defer resp.Body.Close() fn(api.ProgressResponse{ - Status: fmt.Sprintf("uploading %s", layer.Digest), + Status: fmt.Sprintf("pushing %s", layer.Digest[7:19]), Digest: layer.Digest, Total: layer.Size, Completed: layer.Size,