Merge branch 'jmorganca:main' into main

This commit is contained in:
Dane Madsen 2023-11-21 20:01:13 +10:00 committed by GitHub
commit 6ebab38b89
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
44 changed files with 1470 additions and 1779 deletions

View file

@ -6,3 +6,4 @@ scripts
llm/llama.cpp/ggml llm/llama.cpp/ggml
llm/llama.cpp/gguf llm/llama.cpp/gguf
.env .env
.cache

1
.gitignore vendored
View file

@ -6,3 +6,4 @@
dist dist
ollama ollama
ggml-metal.metal ggml-metal.metal
.cache

View file

@ -206,7 +206,7 @@ Ollama has a REST API for running and managing models.
For example, to generate text from a model: For example, to generate text from a model:
``` ```
curl -X POST http://localhost:11434/api/generate -d '{ curl http://localhost:11434/api/generate -d '{
"model": "llama2", "model": "llama2",
"prompt":"Why is the sky blue?" "prompt":"Why is the sky blue?"
}' }'
@ -229,6 +229,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Web UI](https://github.com/ollama-webui/ollama-webui) - [Web UI](https://github.com/ollama-webui/ollama-webui)
- [Ollamac](https://github.com/kevinhermawan/Ollamac) - [Ollamac](https://github.com/kevinhermawan/Ollamac)
- [big-AGI](https://github.com/enricoros/big-agi/blob/main/docs/config-ollama.md) - [big-AGI](https://github.com/enricoros/big-agi/blob/main/docs/config-ollama.md)
- [Cheshire Cat assistant framework](https://github.com/cheshire-cat-ai/core)
### Terminal ### Terminal
@ -237,11 +238,13 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Emacs client](https://github.com/zweifisch/ollama) - [Emacs client](https://github.com/zweifisch/ollama)
- [gen.nvim](https://github.com/David-Kunz/gen.nvim) - [gen.nvim](https://github.com/David-Kunz/gen.nvim)
- [ollama.nvim](https://github.com/nomnivore/ollama.nvim) - [ollama.nvim](https://github.com/nomnivore/ollama.nvim)
- [ogpt.nvim](https://github.com/huynle/ogpt.nvim)
- [gptel Emacs client](https://github.com/karthink/gptel) - [gptel Emacs client](https://github.com/karthink/gptel)
### Libraries ### Libraries
- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa) - [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
- [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html) - [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
- [LiteLLM](https://github.com/BerriAI/litellm) - [LiteLLM](https://github.com/BerriAI/litellm)
- [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp) - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
@ -250,6 +253,11 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama) - [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
- [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit) - [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
- [Ollama for Dart](https://github.com/breitburg/dart-ollama) - [Ollama for Dart](https://github.com/breitburg/dart-ollama)
- [Ollama for Laravel](https://github.com/cloudstudio/ollama-laravel)
### Mobile
- [Maid](https://github.com/danemadsen/Maid) (Mobile Artificial Intelligence Distribution)
### Extensions & Plugins ### Extensions & Plugins
@ -261,3 +269,4 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Dagger Chatbot](https://github.com/samalba/dagger-chatbot) - [Dagger Chatbot](https://github.com/samalba/dagger-chatbot)
- [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot) - [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)
- [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation) - [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)

View file

@ -5,6 +5,7 @@ import (
"bytes" "bytes"
"context" "context"
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"io" "io"
"net" "net"
@ -95,11 +96,19 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
var reqBody io.Reader var reqBody io.Reader
var data []byte var data []byte
var err error var err error
if reqData != nil {
switch reqData := reqData.(type) {
case io.Reader:
// reqData is already an io.Reader
reqBody = reqData
case nil:
// noop
default:
data, err = json.Marshal(reqData) data, err = json.Marshal(reqData)
if err != nil { if err != nil {
return err return err
} }
reqBody = bytes.NewReader(data) reqBody = bytes.NewReader(data)
} }
@ -287,3 +296,18 @@ func (c *Client) Heartbeat(ctx context.Context) error {
} }
return nil return nil
} }
func (c *Client) CreateBlob(ctx context.Context, digest string, r io.Reader) error {
if err := c.do(ctx, http.MethodHead, fmt.Sprintf("/api/blobs/%s", digest), nil, nil); err != nil {
var statusError StatusError
if !errors.As(err, &statusError) || statusError.StatusCode != http.StatusNotFound {
return err
}
if err := c.do(ctx, http.MethodPost, fmt.Sprintf("/api/blobs/%s", digest), r, nil); err != nil {
return err
}
}
return nil
}

View file

@ -101,6 +101,7 @@ type EmbeddingResponse struct {
type CreateRequest struct { type CreateRequest struct {
Name string `json:"name"` Name string `json:"name"`
Path string `json:"path"` Path string `json:"path"`
Modelfile string `json:"modelfile"`
Stream *bool `json:"stream,omitempty"` Stream *bool `json:"stream,omitempty"`
} }

View file

@ -1,9 +1,11 @@
package cmd package cmd
import ( import (
"bytes"
"context" "context"
"crypto/ed25519" "crypto/ed25519"
"crypto/rand" "crypto/rand"
"crypto/sha256"
"encoding/pem" "encoding/pem"
"errors" "errors"
"fmt" "fmt"
@ -20,7 +22,6 @@ import (
"syscall" "syscall"
"time" "time"
"github.com/dustin/go-humanize"
"github.com/olekukonko/tablewriter" "github.com/olekukonko/tablewriter"
"github.com/spf13/cobra" "github.com/spf13/cobra"
"golang.org/x/crypto/ssh" "golang.org/x/crypto/ssh"
@ -28,7 +29,8 @@ import (
"github.com/jmorganca/ollama/api" "github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/format" "github.com/jmorganca/ollama/format"
"github.com/jmorganca/ollama/progressbar" "github.com/jmorganca/ollama/parser"
"github.com/jmorganca/ollama/progress"
"github.com/jmorganca/ollama/readline" "github.com/jmorganca/ollama/readline"
"github.com/jmorganca/ollama/server" "github.com/jmorganca/ollama/server"
"github.com/jmorganca/ollama/version" "github.com/jmorganca/ollama/version"
@ -46,49 +48,95 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
return err return err
} }
var spinner *Spinner p := progress.NewProgress(os.Stderr)
defer p.Stop()
var currentDigest string bars := make(map[string]*progress.Bar)
var bar *progressbar.ProgressBar
modelfile, err := os.ReadFile(filename)
if err != nil {
return err
}
commands, err := parser.Parse(bytes.NewReader(modelfile))
if err != nil {
return err
}
home, err := os.UserHomeDir()
if err != nil {
return err
}
status := "transferring model data"
spinner := progress.NewSpinner(status)
p.Add(status, spinner)
for _, c := range commands {
switch c.Name {
case "model", "adapter":
path := c.Args
if path == "~" {
path = home
} else if strings.HasPrefix(path, "~/") {
path = filepath.Join(home, path[2:])
}
if !filepath.IsAbs(path) {
path = filepath.Join(filepath.Dir(filename), path)
}
bin, err := os.Open(path)
if errors.Is(err, os.ErrNotExist) && c.Name == "model" {
continue
} else if err != nil {
return err
}
defer bin.Close()
hash := sha256.New()
if _, err := io.Copy(hash, bin); err != nil {
return err
}
bin.Seek(0, io.SeekStart)
digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil {
return err
}
modelfile = bytes.ReplaceAll(modelfile, []byte(c.Args), []byte("@"+digest))
}
}
request := api.CreateRequest{Name: args[0], Path: filename}
fn := func(resp api.ProgressResponse) error { fn := func(resp api.ProgressResponse) error {
if resp.Digest != currentDigest && resp.Digest != "" { if resp.Digest != "" {
if spinner != nil {
spinner.Stop() spinner.Stop()
bar, ok := bars[resp.Digest]
if !ok {
bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
bars[resp.Digest] = bar
p.Add(resp.Digest, bar)
} }
currentDigest = resp.Digest
// pulling bar.Set(resp.Completed)
bar = progressbar.DefaultBytes( } else if status != resp.Status {
resp.Total,
resp.Status,
)
bar.Set64(resp.Completed)
} else if resp.Digest == currentDigest && resp.Digest != "" {
bar.Set64(resp.Completed)
} else {
currentDigest = ""
if spinner != nil {
spinner.Stop() spinner.Stop()
}
spinner = NewSpinner(resp.Status) status = resp.Status
go spinner.Spin(100 * time.Millisecond) spinner = progress.NewSpinner(status)
p.Add(status, spinner)
} }
return nil return nil
} }
request := api.CreateRequest{Name: args[0], Modelfile: string(modelfile)}
if err := client.Create(context.Background(), &request, fn); err != nil { if err := client.Create(context.Background(), &request, fn); err != nil {
return err return err
} }
if spinner != nil {
spinner.Stop()
if spinner.description != "success" {
return errors.New("unexpected end to create model")
}
}
return nil return nil
} }
@ -125,36 +173,46 @@ func PushHandler(cmd *cobra.Command, args []string) error {
return err return err
} }
var currentDigest string p := progress.NewProgress(os.Stderr)
var bar *progressbar.ProgressBar defer p.Stop()
bars := make(map[string]*progress.Bar)
var status string
var spinner *progress.Spinner
request := api.PushRequest{Name: args[0], Insecure: insecure}
fn := func(resp api.ProgressResponse) error { fn := func(resp api.ProgressResponse) error {
if resp.Digest != currentDigest && resp.Digest != "" { if resp.Digest != "" {
currentDigest = resp.Digest if spinner != nil {
bar = progressbar.DefaultBytes( spinner.Stop()
resp.Total,
fmt.Sprintf("pushing %s...", resp.Digest[7:19]),
)
bar.Set64(resp.Completed)
} else if resp.Digest == currentDigest && resp.Digest != "" {
bar.Set64(resp.Completed)
} else {
currentDigest = ""
fmt.Println(resp.Status)
} }
bar, ok := bars[resp.Digest]
if !ok {
bar = progress.NewBar(fmt.Sprintf("pushing %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
bars[resp.Digest] = bar
p.Add(resp.Digest, bar)
}
bar.Set(resp.Completed)
} else if status != resp.Status {
if spinner != nil {
spinner.Stop()
}
status = resp.Status
spinner = progress.NewSpinner(status)
p.Add(status, spinner)
}
return nil return nil
} }
request := api.PushRequest{Name: args[0], Insecure: insecure}
if err := client.Push(context.Background(), &request, fn); err != nil { if err := client.Push(context.Background(), &request, fn); err != nil {
return err return err
} }
if bar != nil && !bar.IsFinished() { spinner.Stop()
return errors.New("unexpected end to push model")
}
return nil return nil
} }
@ -173,7 +231,7 @@ func ListHandler(cmd *cobra.Command, args []string) error {
for _, m := range models.Models { for _, m := range models.Models {
if len(args) == 0 || strings.HasPrefix(m.Name, args[0]) { if len(args) == 0 || strings.HasPrefix(m.Name, args[0]) {
data = append(data, []string{m.Name, m.Digest[:12], humanize.Bytes(uint64(m.Size)), format.HumanTime(m.ModifiedAt, "Never")}) data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), format.HumanTime(m.ModifiedAt, "Never")})
} }
} }
@ -305,46 +363,51 @@ func PullHandler(cmd *cobra.Command, args []string) error {
return err return err
} }
return pull(args[0], insecure)
}
func pull(model string, insecure bool) error {
client, err := api.ClientFromEnvironment() client, err := api.ClientFromEnvironment()
if err != nil { if err != nil {
return err return err
} }
var currentDigest string p := progress.NewProgress(os.Stderr)
var bar *progressbar.ProgressBar defer p.Stop()
bars := make(map[string]*progress.Bar)
var status string
var spinner *progress.Spinner
request := api.PullRequest{Name: model, Insecure: insecure}
fn := func(resp api.ProgressResponse) error { fn := func(resp api.ProgressResponse) error {
if resp.Digest != currentDigest && resp.Digest != "" { if resp.Digest != "" {
currentDigest = resp.Digest if spinner != nil {
bar = progressbar.DefaultBytes( spinner.Stop()
resp.Total, }
fmt.Sprintf("pulling %s...", resp.Digest[7:19]),
)
bar.Set64(resp.Completed) bar, ok := bars[resp.Digest]
} else if resp.Digest == currentDigest && resp.Digest != "" { if !ok {
bar.Set64(resp.Completed) bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
} else { bars[resp.Digest] = bar
currentDigest = "" p.Add(resp.Digest, bar)
fmt.Println(resp.Status) }
bar.Set(resp.Completed)
} else if status != resp.Status {
if spinner != nil {
spinner.Stop()
}
status = resp.Status
spinner = progress.NewSpinner(status)
p.Add(status, spinner)
} }
return nil return nil
} }
request := api.PullRequest{Name: args[0], Insecure: insecure}
if err := client.Pull(context.Background(), &request, fn); err != nil { if err := client.Pull(context.Background(), &request, fn); err != nil {
return err return err
} }
if bar != nil && !bar.IsFinished() {
return errors.New("unexpected end to pull model")
}
return nil return nil
} }
@ -397,8 +460,11 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st
return err return err
} }
spinner := NewSpinner("") p := progress.NewProgress(os.Stderr)
go spinner.Spin(60 * time.Millisecond) defer p.StopAndClear()
spinner := progress.NewSpinner("")
p.Add("", spinner)
var latest api.GenerateResponse var latest api.GenerateResponse
@ -430,9 +496,7 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st
request := api.GenerateRequest{Model: model, Prompt: prompt, Context: generateContext, Format: format} request := api.GenerateRequest{Model: model, Prompt: prompt, Context: generateContext, Format: format}
fn := func(response api.GenerateResponse) error { fn := func(response api.GenerateResponse) error {
if !spinner.IsFinished() { p.StopAndClear()
spinner.Finish()
}
latest = response latest = response
@ -466,7 +530,6 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st
if err := client.Generate(cancelCtx, &request, fn); err != nil { if err := client.Generate(cancelCtx, &request, fn); err != nil {
if strings.Contains(err.Error(), "context canceled") && abort { if strings.Contains(err.Error(), "context canceled") && abort {
spinner.Finish()
return nil return nil
} }
return err return err

View file

@ -1,44 +0,0 @@
package cmd
import (
"fmt"
"os"
"time"
"github.com/jmorganca/ollama/progressbar"
)
type Spinner struct {
description string
*progressbar.ProgressBar
}
func NewSpinner(description string) *Spinner {
return &Spinner{
description: description,
ProgressBar: progressbar.NewOptions(-1,
progressbar.OptionSetWriter(os.Stderr),
progressbar.OptionThrottle(60*time.Millisecond),
progressbar.OptionSpinnerType(14),
progressbar.OptionSetRenderBlankState(true),
progressbar.OptionSetElapsedTime(false),
progressbar.OptionClearOnFinish(),
progressbar.OptionSetDescription(description),
),
}
}
func (s *Spinner) Spin(tick time.Duration) {
for range time.Tick(tick) {
if s.IsFinished() {
break
}
s.Add(1)
}
}
func (s *Spinner) Stop() {
s.Finish()
fmt.Println(s.description)
}

View file

@ -51,14 +51,16 @@ Advanced parameters (optional):
### JSON mode ### JSON mode
Enable JSON mode by setting the `format` parameter to `json` and specifying the model should use JSON in the `prompt`. This will structure the response as valid JSON. See the JSON mode [example](#request-json-mode) below. Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as valid JSON. See the JSON mode [example](#request-json-mode) below.
> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
### Examples ### Examples
#### Request #### Request
```shell ```shell
curl -X POST http://localhost:11434/api/generate -d '{ curl http://localhost:11434/api/generate -d '{
"model": "llama2", "model": "llama2",
"prompt": "Why is the sky blue?" "prompt": "Why is the sky blue?"
}' }'
@ -113,8 +115,8 @@ To calculate how fast the response is generated in tokens per second (token/s),
#### Request (No streaming) #### Request (No streaming)
```shell ```shell
curl -X POST http://localhost:11434/api/generate -d '{ curl http://localhost:11434/api/generate -d '{
"model": "llama2:7b", "model": "llama2",
"prompt": "Why is the sky blue?", "prompt": "Why is the sky blue?",
"stream": false "stream": false
}' }'
@ -126,7 +128,7 @@ If `stream` is set to `false`, the response will be a single JSON object:
```json ```json
{ {
"model": "llama2:7b", "model": "llama2",
"created_at": "2023-08-04T19:22:45.499127Z", "created_at": "2023-08-04T19:22:45.499127Z",
"response": "The sky is blue because it is the color of the sky.", "response": "The sky is blue because it is the color of the sky.",
"context": [1, 2, 3], "context": [1, 2, 3],
@ -147,7 +149,7 @@ If `stream` is set to `false`, the response will be a single JSON object:
In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context. In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context.
```shell ```shell
curl -X POST http://localhost:11434/api/generate -d '{ curl http://localhost:11434/api/generate -d '{
"model": "mistral", "model": "mistral",
"prompt": "[INST] why is the sky blue? [/INST]", "prompt": "[INST] why is the sky blue? [/INST]",
"raw": true, "raw": true,
@ -175,7 +177,7 @@ curl -X POST http://localhost:11434/api/generate -d '{
#### Request (JSON mode) #### Request (JSON mode)
```shell ```shell
curl -X POST http://localhost:11434/api/generate -d '{ curl http://localhost:11434/api/generate -d '{
"model": "llama2", "model": "llama2",
"prompt": "What color is the sky at different times of the day? Respond using JSON", "prompt": "What color is the sky at different times of the day? Respond using JSON",
"format": "json", "format": "json",
@ -224,8 +226,8 @@ The value of `response` will be a string containing JSON similar to:
If you want to set custom options for the model at runtime rather than in the Modelfile, you can do so with the `options` parameter. This example sets every available option, but you can set any of them individually and omit the ones you do not want to override. If you want to set custom options for the model at runtime rather than in the Modelfile, you can do so with the `options` parameter. This example sets every available option, but you can set any of them individually and omit the ones you do not want to override.
```shell ```shell
curl -X POST http://localhost:11434/api/generate -d '{ curl http://localhost:11434/api/generate -d '{
"model": "llama2:7b", "model": "llama2",
"prompt": "Why is the sky blue?", "prompt": "Why is the sky blue?",
"stream": false, "stream": false,
"options": { "options": {
@ -270,7 +272,7 @@ curl -X POST http://localhost:11434/api/generate -d '{
```json ```json
{ {
"model": "llama2:7b", "model": "llama2",
"created_at": "2023-08-04T19:22:45.499127Z", "created_at": "2023-08-04T19:22:45.499127Z",
"response": "The sky is blue because it is the color of the sky.", "response": "The sky is blue because it is the color of the sky.",
"context": [1, 2, 3], "context": [1, 2, 3],
@ -292,22 +294,23 @@ curl -X POST http://localhost:11434/api/generate -d '{
POST /api/create POST /api/create
``` ```
Create a model from a [`Modelfile`](./modelfile.md) Create a model from a [`Modelfile`](./modelfile.md). It is recommended to set `modelfile` to the content of the Modelfile rather than just set `path`. This is a requirement for remote create. Remote model creation should also create any file blobs, fields such as `FROM` and `ADAPTER`, explicitly with the server using [Create a Blob](#create-a-blob) and the value to the path indicated in the response.
### Parameters ### Parameters
- `name`: name of the model to create - `name`: name of the model to create
- `path`: path to the Modelfile - `modelfile`: contents of the Modelfile
- `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects - `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects
- `path` (deprecated): path to the Modelfile
### Examples ### Examples
#### Request #### Request
```shell ```shell
curl -X POST http://localhost:11434/api/create -d '{ curl http://localhost:11434/api/create -d '{
"name": "mario", "name": "mario",
"path": "~/Modelfile" "modelfile": "FROM llama2\nSYSTEM You are mario from Super Mario Bros."
}' }'
``` ```
@ -321,6 +324,54 @@ A stream of JSON objects. When finished, `status` is `success`.
} }
``` ```
### Check if a Blob Exists
```shell
HEAD /api/blobs/:digest
```
Check if a blob is known to the server.
#### Query Parameters
- `digest`: the SHA256 digest of the blob
#### Examples
##### Request
```shell
curl -I http://localhost:11434/api/blobs/sha256:29fdb92e57cf0827ded04ae6461b5931d01fa595843f55d36f5b275a52087dd2
```
##### Response
Return 200 OK if the blob exists, 404 Not Found if it does not.
### Create a Blob
```shell
POST /api/blobs/:digest
```
Create a blob from a file. Returns the server file path.
#### Query Parameters
- `digest`: the expected SHA256 digest of the file
#### Examples
##### Request
```shell
curl -T model.bin -X POST http://localhost:11434/api/blobs/sha256:29fdb92e57cf0827ded04ae6461b5931d01fa595843f55d36f5b275a52087dd2
```
##### Response
Return 201 Created if the blob was successfully created.
## List Local Models ## List Local Models
```shell ```shell
@ -345,7 +396,7 @@ A single JSON object will be returned.
{ {
"models": [ "models": [
{ {
"name": "llama2:7b", "name": "llama2",
"modified_at": "2023-08-02T17:02:23.713454393-07:00", "modified_at": "2023-08-02T17:02:23.713454393-07:00",
"size": 3791730596 "size": 3791730596
}, },
@ -376,7 +427,7 @@ Show details about a model including modelfile, template, parameters, license, a
```shell ```shell
curl http://localhost:11434/api/show -d '{ curl http://localhost:11434/api/show -d '{
"name": "llama2:7b" "name": "llama2"
}' }'
``` ```
@ -405,7 +456,7 @@ Copy a model. Creates a model with another name from an existing model.
```shell ```shell
curl http://localhost:11434/api/copy -d '{ curl http://localhost:11434/api/copy -d '{
"source": "llama2:7b", "source": "llama2",
"destination": "llama2-backup" "destination": "llama2-backup"
}' }'
``` ```
@ -459,8 +510,8 @@ Download a model from the ollama library. Cancelled pulls are resumed from where
#### Request #### Request
```shell ```shell
curl -X POST http://localhost:11434/api/pull -d '{ curl http://localhost:11434/api/pull -d '{
"name": "llama2:7b" "name": "llama2"
}' }'
``` ```
@ -531,7 +582,7 @@ Upload a model to a model library. Requires registering for ollama.ai and adding
#### Request #### Request
```shell ```shell
curl -X POST http://localhost:11434/api/push -d '{ curl http://localhost:11434/api/push -d '{
"name": "mattw/pygmalion:latest" "name": "mattw/pygmalion:latest"
}' }'
``` ```
@ -599,8 +650,8 @@ Advanced parameters:
#### Request #### Request
```shell ```shell
curl -X POST http://localhost:11434/api/embeddings -d '{ curl http://localhost:11434/api/embeddings -d '{
"model": "llama2:7b", "model": "llama2",
"prompt": "Here is an article about llamas..." "prompt": "Here is an article about llamas..."
}' }'
``` ```

View file

@ -32,11 +32,11 @@ Create a `systemd` drop-in directory and set `Environment=OLLAMA_HOST`
```bash ```bash
mkdir -p /etc/systemd/system/ollama.service.d mkdir -p /etc/systemd/system/ollama.service.d
echo "[Service]" >>/etc/systemd/system/ollama.service.d/environment.conf echo '[Service]' >>/etc/systemd/system/ollama.service.d/environment.conf
``` ```
```bash ```bash
echo "Environment=OLLAMA_HOST=0.0.0.0:11434" >>/etc/systemd/system/ollama.service.d/environment.conf echo 'Environment="OLLAMA_HOST=0.0.0.0:11434"' >>/etc/systemd/system/ollama.service.d/environment.conf
``` ```
Reload `systemd` and restart Ollama: Reload `systemd` and restart Ollama:
@ -59,7 +59,7 @@ OLLAMA_ORIGINS=http://192.168.1.1:*,https://example.com ollama serve
On Linux: On Linux:
```bash ```bash
echo "Environment=OLLAMA_ORIGINS=http://129.168.1.1:*,https://example.com" >>/etc/systemd/system/ollama.service.d/environment.conf echo 'Environment="OLLAMA_ORIGINS=http://129.168.1.1:*,https://example.com"' >>/etc/systemd/system/ollama.service.d/environment.conf
``` ```
Reload `systemd` and restart Ollama: Reload `systemd` and restart Ollama:
@ -74,8 +74,6 @@ systemctl restart ollama
- macOS: Raw model data is stored under `~/.ollama/models`. - macOS: Raw model data is stored under `~/.ollama/models`.
- Linux: Raw model data is stored under `/usr/share/ollama/.ollama/models` - Linux: Raw model data is stored under `/usr/share/ollama/.ollama/models`
Below the models directory you will find a structure similar to the following: Below the models directory you will find a structure similar to the following:
```shell ```shell
@ -96,3 +94,63 @@ The manifest lists all the layers used in this model. You will see a `media type
### How can I change where Ollama stores models? ### How can I change where Ollama stores models?
To modify where models are stored, you can use the `OLLAMA_MODELS` environment variable. Note that on Linux this means defining `OLLAMA_MODELS` in a drop-in `/etc/systemd/system/ollama.service.d` service file, reloading systemd, and restarting the ollama service. To modify where models are stored, you can use the `OLLAMA_MODELS` environment variable. Note that on Linux this means defining `OLLAMA_MODELS` in a drop-in `/etc/systemd/system/ollama.service.d` service file, reloading systemd, and restarting the ollama service.
## Does Ollama send my prompts and answers back to Ollama.ai to use in any way?
No. Anything you do with Ollama, such as generate a response from the model, stays with you. We don't collect any data about how you use the model. You are always in control of your own data.
## How can I use Ollama in Visual Studio Code?
There is already a large collection of plugins available for VSCode as well as other editors that leverage Ollama. You can see the list of [extensions & plugins](https://github.com/jmorganca/ollama#extensions--plugins) at the bottom of the main repository readme.
## How do I use Ollama behind a proxy?
Ollama is compatible with proxy servers if `HTTP_PROXY` or `HTTPS_PROXY` are configured. When using either variables, ensure it is set where `ollama serve` can access the values.
When using `HTTPS_PROXY`, ensure the proxy certificate is installed as a system certificate.
On macOS:
```bash
HTTPS_PROXY=http://proxy.example.com ollama serve
```
On Linux:
```bash
echo 'Environment="HTTPS_PROXY=https://proxy.example.com"' >>/etc/systemd/system/ollama.service.d/environment.conf
```
Reload `systemd` and restart Ollama:
```bash
systemctl daemon-reload
systemctl restart ollama
```
### How do I use Ollama behind a proxy in Docker?
The Ollama Docker container image can be configured to use a proxy by passing `-e HTTPS_PROXY=https://proxy.example.com` when starting the container.
Alternatively, Docker daemon can be configured to use a proxy. Instructions are available for Docker Desktop on [macOS](https://docs.docker.com/desktop/settings/mac/#proxies), [Windows](https://docs.docker.com/desktop/settings/windows/#proxies), and [Linux](https://docs.docker.com/desktop/settings/linux/#proxies), and Docker [daemon with systemd](https://docs.docker.com/config/daemon/systemd/#httphttps-proxy).
Ensure the certificate is installed as a system certificate when using HTTPS. This may require a new Docker image when using a self-signed certificate.
```dockerfile
FROM ollama/ollama
COPY my-ca.pem /usr/local/share/ca-certificates/my-ca.crt
RUN update-ca-certificate
```
Build and run this image:
```shell
docker build -t ollama-with-ca .
docker run -d -e HTTPS_PROXY=https://my.proxy.example.com -p 11434:11434 ollama-with-ca
```
## How do I use Ollama with GPU acceleration in Docker?
The Ollama Docker container can be configured with GPU acceleration in Linux or Windows (with WSL2). This requires the [nvidia-container-toolkit](https://github.com/NVIDIA/nvidia-container-toolkit). See [ollama/ollama](https://hub.docker.com/r/ollama/ollama) for more details.
GPU acceleration is not available for Docker Desktop in macOS due to the lack of GPU passthrough and emulation.

View file

@ -41,6 +41,8 @@ INSTRUCTION arguments
## Examples ## Examples
### Basic `Modelfile`
An example of a `Modelfile` creating a mario blueprint: An example of a `Modelfile` creating a mario blueprint:
```modelfile ```modelfile
@ -63,6 +65,35 @@ To use this:
More examples are available in the [examples directory](../examples). More examples are available in the [examples directory](../examples).
### `Modelfile`s in [ollama.ai/library][1]
There are two ways to view `Modelfile`s underlying the models in [ollama.ai/library][1]:
- Option 1: view a details page from a model's tags page:
1. Go to a particular model's tags (e.g. https://ollama.ai/library/llama2/tags)
2. Click on a tag (e.g. https://ollama.ai/library/llama2:13b)
3. Scroll down to "Layers"
- Note: if the [`FROM` instruction](#from-required) is not present,
it means the model was created from a local file
- Option 2: use `ollama show` to print the `Modelfile` like so:
```bash
> ollama show --modelfile llama2:13b
# Modelfile generated by "ollama show"
# To build a new Modelfile based on this one, replace the FROM line with:
# FROM llama2:13b
FROM /root/.ollama/models/blobs/sha256:123abc
TEMPLATE """[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>
{{ end }}{{ .Prompt }} [/INST] """
SYSTEM """"""
PARAMETER stop [INST]
PARAMETER stop [/INST]
PARAMETER stop <<SYS>>
PARAMETER stop <</SYS>>
```
## Instructions ## Instructions
### FROM (Required) ### FROM (Required)
@ -177,3 +208,5 @@ LICENSE """
- the **`Modelfile` is not case sensitive**. In the examples, we use uppercase for instructions to make it easier to distinguish it from arguments. - the **`Modelfile` is not case sensitive**. In the examples, we use uppercase for instructions to make it easier to distinguish it from arguments.
- Instructions can be in any order. In the examples, we start with FROM instruction to keep it easily readable. - Instructions can be in any order. In the examples, we start with FROM instruction to keep it easily readable.
[1]: https://ollama.ai/library

View file

@ -4,5 +4,6 @@ Here is a list of ways you can use Ollama with other tools to build interesting
- [Using LangChain with Ollama in JavaScript](./tutorials/langchainjs.md) - [Using LangChain with Ollama in JavaScript](./tutorials/langchainjs.md)
- [Using LangChain with Ollama in Python](./tutorials/langchainpy.md) - [Using LangChain with Ollama in Python](./tutorials/langchainpy.md)
- [Running Ollama on NVIDIA Jetson Devices](./tutorials/nvidia-jetson.md)
Also be sure to check out the [examples](../examples) directory for more ways to use Ollama. Also be sure to check out the [examples](../examples) directory for more ways to use Ollama.

View file

@ -0,0 +1,38 @@
# Running Ollama on NVIDIA Jetson Devices
With some minor configuration, Ollama runs well on [NVIDIA Jetson Devices](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/). The following has been tested on [JetPack 5.1.2](https://developer.nvidia.com/embedded/jetpack).
NVIDIA Jetson devices are Linux-based embedded AI computers that are purpose-built for AI applications.
Jetsons have an integrated GPU that is wired directly to the memory controller of the machine. For this reason, the `nvidia-smi` command is unrecognized, and Ollama proceeds to operate in "CPU only"
mode. This can be verified by using a monitoring tool like jtop.
In order to address this, we simply pass the path to the Jetson's pre-installed CUDA libraries into `ollama serve` (while in a tmux session). We then hardcode the num_gpu parameters into a cloned
version of our target model.
Prerequisites:
- curl
- tmux
Here are the steps:
- Install Ollama via standard Linux command (ignore the 404 error): `curl https://ollama.ai/install.sh | sh`
- Stop the Ollama service: `sudo systemctl stop ollama`
- Start Ollama serve in a tmux session called ollama_jetson and reference the CUDA libraries path: `tmux has-session -t ollama_jetson 2>/dev/null || tmux new-session -d -s ollama_jetson
'LD_LIBRARY_PATH=/usr/local/cuda/lib64 ollama serve'`
- Pull the model you want to use (e.g. mistral): `ollama pull mistral`
- Create a new Modelfile specifically for enabling GPU support on the Jetson: `touch ModelfileMistralJetson`
- In the ModelfileMistralJetson file, specify the FROM model and the num_gpu PARAMETER as shown below:
```
FROM mistral
PARAMETER num_gpu 999
```
- Create a new model from your Modelfile: `ollama create mistral-jetson -f ./ModelfileMistralJetson`
- Run the new model: `ollama run mistral-jetson`
If you run a monitoring tool like jtop you should now see that Ollama is using the Jetson's integrated GPU.
And that's it!

View file

@ -0,0 +1,5 @@
# Ollama Jupyter Notebook
This example downloads and installs Ollama in a Jupyter instance such as Google Colab. It will start the Ollama service and expose an endpoint using `ngrok` which can be used to communicate with the Ollama instance remotely.
For best results, use an instance with GPU accelerator.

View file

@ -0,0 +1,102 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "93f59dcb-c588-41b8-a792-55d88ade739c",
"metadata": {},
"outputs": [],
"source": [
"# Download and run the Ollama Linux install script\n",
"!curl https://ollama.ai/install.sh | sh\n",
"!command -v systemctl >/dev/null && sudo systemctl stop ollama"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "658c147e-c7f8-490e-910e-62b80f577dda",
"metadata": {},
"outputs": [],
"source": [
"!pip install aiohttp pyngrok\n",
"\n",
"import os\n",
"import asyncio\n",
"from aiohttp import ClientSession\n",
"\n",
"# Set LD_LIBRARY_PATH so the system NVIDIA library becomes preferred\n",
"# over the built-in library. This is particularly important for \n",
"# Google Colab which installs older drivers\n",
"os.environ.update({'LD_LIBRARY_PATH': '/usr/lib64-nvidia'})\n",
"\n",
"async def run(cmd):\n",
" '''\n",
" run is a helper function to run subcommands asynchronously.\n",
" '''\n",
" print('>>> starting', *cmd)\n",
" p = await asyncio.subprocess.create_subprocess_exec(\n",
" *cmd,\n",
" stdout=asyncio.subprocess.PIPE,\n",
" stderr=asyncio.subprocess.PIPE,\n",
" )\n",
"\n",
" async def pipe(lines):\n",
" async for line in lines:\n",
" print(line.strip().decode('utf-8'))\n",
"\n",
" await asyncio.gather(\n",
" pipe(p.stdout),\n",
" pipe(p.stderr),\n",
" )\n",
"\n",
"\n",
"await asyncio.gather(\n",
" run(['ollama', 'serve']),\n",
" run(['ngrok', 'http', '--log', 'stderr', '11434']),\n",
")"
]
},
{
"cell_type": "markdown",
"id": "e7735a55-9aad-4caf-8683-52e2163ba53b",
"metadata": {},
"source": [
"The previous cell starts two processes, `ollama` and `ngrok`. The log output will show a line like the following which describes the external address.\n",
"\n",
"```\n",
"t=2023-11-12T22:55:56+0000 lvl=info msg=\"started tunnel\" obj=tunnels name=command_line addr=http://localhost:11434 url=https://8249-34-125-179-11.ngrok.io\n",
"```\n",
"\n",
"The external address in this case is `https://8249-34-125-179-11.ngrok.io` which can be passed into `OLLAMA_HOST` to access this instance.\n",
"\n",
"```bash\n",
"export OLLAMA_HOST=https://8249-34-125-179-11.ngrok.io\n",
"ollama list\n",
"ollama run mistral\n",
"```"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -0,0 +1,31 @@
import requests
import json
import random
model = "llama2"
template = {
"firstName": "",
"lastName": "",
"address": {
"street": "",
"city": "",
"state": "",
"zipCode": ""
},
"phoneNumber": ""
}
prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in the US, and phone number. \nUse the following template: {json.dumps(template)}."
data = {
"prompt": prompt,
"model": model,
"format": "json",
"stream": False,
"options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100},
}
print(f"Generating a sample user")
response = requests.post("http://localhost:11434/api/generate", json=data, stream=False)
json_data = json.loads(response.text)
print(json.dumps(json.loads(json_data["response"]), indent=2))

View file

@ -0,0 +1,31 @@
import requests
import json
import random
countries = [
"United States",
"United Kingdom",
"the Netherlands",
"Germany",
"Mexico",
"Canada",
"France",
]
country = random.choice(countries)
model = "llama2"
prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters."
data = {
"prompt": prompt,
"model": model,
"format": "json",
"stream": False,
"options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100},
}
print(f"Generating a sample user in {country}")
response = requests.post("http://localhost:11434/api/generate", json=data, stream=False)
json_data = json.loads(response.text)
print(json.dumps(json.loads(json_data["response"]), indent=2))

View file

@ -0,0 +1,34 @@
# JSON Output Example
![llmjson 2023-11-10 15_31_31](https://github.com/jmorganca/ollama/assets/633681/e599d986-9b4a-4118-81a4-4cfe7e22da25)
There are two python scripts in this example. `randomaddresses.py` generates random addresses from different countries. `predefinedschema.py` sets a template for the model to fill in.
## Review the Code
Both programs are basically the same, with a different prompt for each, demonstrating two different ideas. The key part of getting JSON out of a model is to state in the prompt or system prompt that it should respond using JSON, and specifying the `format` as `json` in the data body.
```python
prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should with no backslashes, values should use plain ascii with no special characters."
data = {
"prompt": prompt,
"model": model,
"format": "json",
"stream": False,
"options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100},
}
```
When running `randomaddresses.py` you will see that the schema changes and adapts to the chosen country.
In `predefinedschema.py`, a template has been specified in the prompt as well. It's been defined as JSON and then dumped into the prompt string to make it easier to work with.
Both examples turn streaming off so that we end up with the completed JSON all at once. We need to convert the `response.text` to JSON so that when we output it as a string we can set the indent spacing to make the output easy to read.
```python
response = requests.post("http://localhost:11434/api/generate", json=data, stream=False)
json_data = json.loads(response.text)
print(json.dumps(json.loads(json_data["response"]), indent=2))
```

View file

@ -0,0 +1 @@
Requests==2.31.0

View file

@ -0,0 +1,8 @@
FROM codebooga:latest
SYSTEM """
You are a log file analyzer. You will receive a set of lines from a log file for some software application, find the errors and other interesting aspects of the logs, and explain them so a new user can understand what they mean. If there are any steps they can do to resolve them, list the steps in your answer.
"""
PARAMETER TEMPERATURE 0.3

View file

@ -0,0 +1,42 @@
import sys
import re
import requests
import json
# prelines and postlines represent the number of lines of context to include in the output around the error
prelines = 10
postlines = 10
def find_errors_in_log_file():
if len(sys.argv) < 2:
print("Usage: python loganalysis.py <filename>")
return
log_file_path = sys.argv[1]
with open(log_file_path, 'r') as log_file:
log_lines = log_file.readlines()
error_logs = []
for i, line in enumerate(log_lines):
if "error" in line.lower():
start_index = max(0, i - prelines)
end_index = min(len(log_lines), i + postlines + 1)
error_logs.extend(log_lines[start_index:end_index])
return error_logs
error_logs = find_errors_in_log_file()
data = {
"prompt": "\n".join(error_logs),
"model": "mattw/loganalyzer"
}
response = requests.post("http://localhost:11434/api/generate", json=data, stream=True)
for line in response.iter_lines():
if line:
json_data = json.loads(line)
if json_data['done'] == False:
print(json_data['response'], end='', flush=True)

View file

@ -0,0 +1,32 @@
2023-11-10 07:17:40 /docker-entrypoint.sh: /docker-entrypoint.d/ is not empty, will attempt to perform configuration
2023-11-10 07:17:40 /docker-entrypoint.sh: Looking for shell scripts in /docker-entrypoint.d/
2023-11-10 07:17:40 /docker-entrypoint.sh: Launching /docker-entrypoint.d/10-listen-on-ipv6-by-default.sh
2023-11-10 07:17:40 10-listen-on-ipv6-by-default.sh: info: Getting the checksum of /etc/nginx/conf.d/default.conf
2023-11-10 07:17:40 10-listen-on-ipv6-by-default.sh: info: Enabled listen on IPv6 in /etc/nginx/conf.d/default.conf
2023-11-10 07:17:40 /docker-entrypoint.sh: Sourcing /docker-entrypoint.d/15-local-resolvers.envsh
2023-11-10 07:17:40 /docker-entrypoint.sh: Launching /docker-entrypoint.d/20-envsubst-on-templates.sh
2023-11-10 07:17:40 /docker-entrypoint.sh: Launching /docker-entrypoint.d/30-tune-worker-processes.sh
2023-11-10 07:17:40 /docker-entrypoint.sh: Configuration complete; ready for start up
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: using the "epoll" event method
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: nginx/1.25.3
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: built by gcc 12.2.0 (Debian 12.2.0-14)
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: OS: Linux 6.4.16-linuxkit
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: getrlimit(RLIMIT_NOFILE): 1048576:1048576
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker processes
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 29
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 30
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 31
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 32
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 33
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 34
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 35
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 36
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 37
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 38
2023-11-10 07:17:44 192.168.65.1 - - [10/Nov/2023:13:17:43 +0000] "GET / HTTP/1.1" 200 615 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" "-"
2023-11-10 07:17:44 2023/11/10 13:17:44 [error] 29#29: *1 open() "/usr/share/nginx/html/favicon.ico" failed (2: No such file or directory), client: 192.168.65.1, server: localhost, request: "GET /favicon.ico HTTP/1.1", host: "localhost:8080", referrer: "http://localhost:8080/"
2023-11-10 07:17:44 192.168.65.1 - - [10/Nov/2023:13:17:44 +0000] "GET /favicon.ico HTTP/1.1" 404 555 "http://localhost:8080/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" "-"
2023-11-10 07:17:50 2023/11/10 13:17:50 [error] 29#29: *1 open() "/usr/share/nginx/html/ahstat" failed (2: No such file or directory), client: 192.168.65.1, server: localhost, request: "GET /ahstat HTTP/1.1", host: "localhost:8080"
2023-11-10 07:17:50 192.168.65.1 - - [10/Nov/2023:13:17:50 +0000] "GET /ahstat HTTP/1.1" 404 555 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" "-"
2023-11-10 07:18:53 2023/11/10 13:18:53 [error] 29#29: *1 open() "/usr/share/nginx/html/ahstat" failed (2: No such file or directory), client: 192.168.65.1, server: localhost, request: "GET /ahstat HTTP/1.1", host: "localhost:8080"
2023-11-10 07:18:53 192.168.65.1 - - [10/Nov/2023:13:18:53 +0000] "GET /ahstat HTTP/1.1" 404 555 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" "-"

View file

@ -0,0 +1,48 @@
# Log Analysis example
![loganalyzer 2023-11-10 08_53_29](https://github.com/jmorganca/ollama/assets/633681/ad30f1fc-321f-4953-8914-e30e24db9921)
This example shows one possible way to create a log file analyzer. To use it, run:
`python loganalysis.py <logfile>`
You can try this with the `logtest.logfile` file included in this directory.
## Review the code
The first part of this example is a Modelfile that takes `codebooga` and applies a new System Prompt:
```plaintext
SYSTEM """
You are a log file analyzer. You will receive a set of lines from a log file for some software application, find the errors and other interesting aspects of the logs, and explain them so a new user can understand what they mean. If there are any steps they can do to resolve them, list the steps in your answer.
"""
```
This model is available at https://ollama.ai/mattw/loganalyzer. You can customize it and add to your own namespace using the command `ollama create <namespace/modelname> -f <path-to-modelfile>` then `ollama push <namespace/modelname>`.
Then loganalysis.py scans all the lines in the given log file and searches for the word 'error'. When the word is found, the 10 lines before and after are set as the prompt for a call to the Generate API.
```python
data = {
"prompt": "\n".join(error_logs),
"model": "mattw/loganalyzer"
}
```
Finally, the streamed output is parsed and the response field in the output is printed to the line.
```python
response = requests.post("http://localhost:11434/api/generate", json=data, stream=True)
for line in response.iter_lines():
if line:
json_data = json.loads(line)
if json_data['done'] == False:
print(json_data['response'], end='')
```
## Next Steps
There is a lot more that can be done here. This is a simple way to detect errors, looking for the word error. Perhaps it would be interesting to find anomalous activity in the logs. It could be interesting to create embeddings for each line and compare them, looking for similar lines. Or look into applying Levenshtein Distance algorithms to find similar lines to help identify the anomalous lines.
Also try different models and different prompts to analyze the data. You could consider adding retrieval augmented generation (RAG) to this to help understand newer log formats.

View file

@ -0,0 +1 @@
Requests==2.31.0

View file

@ -1,23 +1,45 @@
package format package format
import "fmt" import (
"fmt"
"math"
)
const ( const (
Byte = 1 Byte = 1
KiloByte = Byte * 1000 KiloByte = Byte * 1000
MegaByte = KiloByte * 1000 MegaByte = KiloByte * 1000
GigaByte = MegaByte * 1000 GigaByte = MegaByte * 1000
TeraByte = GigaByte * 1000
) )
func HumanBytes(b int64) string { func HumanBytes(b int64) string {
var value float64
var unit string
switch { switch {
case b > GigaByte: case b >= TeraByte:
return fmt.Sprintf("%d GB", b/GigaByte) value = float64(b) / TeraByte
case b > MegaByte: unit = "TB"
return fmt.Sprintf("%d MB", b/MegaByte) case b >= GigaByte:
case b > KiloByte: value = float64(b) / GigaByte
return fmt.Sprintf("%d KB", b/KiloByte) unit = "GB"
case b >= MegaByte:
value = float64(b) / MegaByte
unit = "MB"
case b >= KiloByte:
value = float64(b) / KiloByte
unit = "KB"
default: default:
return fmt.Sprintf("%d B", b) return fmt.Sprintf("%d B", b)
} }
switch {
case value >= 100:
return fmt.Sprintf("%d %s", int(value), unit)
case value != math.Trunc(value):
return fmt.Sprintf("%.1f %s", value, unit)
default:
return fmt.Sprintf("%d %s", int(value), unit)
}
} }

1
go.mod
View file

@ -3,7 +3,6 @@ module github.com/jmorganca/ollama
go 1.20 go 1.20
require ( require (
github.com/dustin/go-humanize v1.0.1
github.com/emirpasic/gods v1.18.1 github.com/emirpasic/gods v1.18.1
github.com/gin-gonic/gin v1.9.1 github.com/gin-gonic/gin v1.9.1
github.com/mattn/go-runewidth v0.0.14 github.com/mattn/go-runewidth v0.0.14

2
go.sum
View file

@ -9,8 +9,6 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ= github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=

View file

@ -7,13 +7,13 @@ package llm
//go:generate git -C ggml apply ../patches/0002-34B-model-support.patch //go:generate git -C ggml apply ../patches/0002-34B-model-support.patch
//go:generate git -C ggml apply ../patches/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch //go:generate git -C ggml apply ../patches/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
//go:generate git -C ggml apply ../patches/0004-metal-add-missing-barriers-for-mul-mat-2699.patch //go:generate git -C ggml apply ../patches/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 //go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate cmake --build ggml/build/cpu --target server --config Release //go:generate cmake --build ggml/build/cpu --target server --config Release
//go:generate mv ggml/build/cpu/bin/server ggml/build/cpu/bin/ollama-runner //go:generate mv ggml/build/cpu/bin/server ggml/build/cpu/bin/ollama-runner
//go:generate git submodule update --force gguf //go:generate git submodule update --force gguf
//go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch //go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch
//go:generate git -C gguf apply ../patches/0001-metal-handle-ggml_scale-for-n-4-0-close-3754.patch //go:generate git -C gguf apply ../patches/0001-metal-handle-ggml_scale-for-n-4-0-close-3754.patch
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 //go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_METAL=off -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on
//go:generate cmake --build gguf/build/cpu --target server --config Release //go:generate cmake --build gguf/build/cpu --target server --config Release
//go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner //go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner

View file

@ -71,9 +71,10 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
// IMPORTANT: the order of the runners in the array is the priority order // IMPORTANT: the order of the runners in the array is the priority order
switch runtime.GOOS { switch runtime.GOOS {
case "darwin": case "darwin":
runners = []ModelRunner{ if runtime.GOARCH == "arm64" {
{Path: path.Join(buildPath, "metal", "bin", "ollama-runner")}, runners = []ModelRunner{{Path: path.Join(buildPath, "metal", "bin", "ollama-runner")}}
{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")}, } else {
runners = []ModelRunner{{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")}}
} }
case "linux": case "linux":
runners = []ModelRunner{ runners = []ModelRunner{
@ -225,7 +226,7 @@ type llama struct {
} }
var ( var (
errNvidiaSMI = errors.New("nvidia-smi command failed") errNvidiaSMI = errors.New("warning: gpu support may not be enabled, check that you have installed GPU drivers: nvidia-smi command failed")
errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only") errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only")
) )
@ -342,6 +343,10 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
"--embedding", "--embedding",
} }
if opts.MainGPU > 0 {
params = append(params, "--main-gpu", fmt.Sprintf("%d", opts.MainGPU))
}
if opts.RopeFrequencyBase > 0 { if opts.RopeFrequencyBase > 0 {
params = append(params, "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase)) params = append(params, "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase))
} }
@ -543,6 +548,7 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
"stream": true, "stream": true,
"n_predict": llm.NumPredict, "n_predict": llm.NumPredict,
"n_keep": llm.NumKeep, "n_keep": llm.NumKeep,
"main_gpu": llm.MainGPU,
"temperature": llm.Temperature, "temperature": llm.Temperature,
"top_k": llm.TopK, "top_k": llm.TopK,
"top_p": llm.TopP, "top_p": llm.TopP,

View file

@ -41,20 +41,13 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
if runtime.GOOS == "darwin" { if runtime.GOOS == "darwin" {
switch ggml.FileType() { switch ggml.FileType() {
case "Q8_0": case "F32", "Q5_0", "Q5_1", "Q8_0":
if ggml.Name() != "gguf" && opts.NumGPU != 0 { if ggml.Name() != "gguf" && opts.NumGPU != 0 {
// GGML Q8_0 do not support Metal API and will // GGML Q8_0 do not support Metal API and will
// cause the runner to segmentation fault so disable GPU // cause the runner to segmentation fault so disable GPU
log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0") log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
opts.NumGPU = 0 opts.NumGPU = 0
} }
case "F32", "Q5_0", "Q5_1":
if opts.NumGPU != 0 {
// F32, Q5_0, Q5_1, and Q8_0 do not support Metal API and will
// cause the runner to segmentation fault so disable GPU
log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
opts.NumGPU = 0
}
} }
var requiredMemory int64 var requiredMemory int64

155
progress/bar.go Normal file
View file

@ -0,0 +1,155 @@
package progress
import (
"fmt"
"math"
"os"
"strings"
"time"
"github.com/jmorganca/ollama/format"
"golang.org/x/term"
)
type Stats struct {
rate int64
value int64
remaining time.Duration
}
type Bar struct {
message string
messageWidth int
maxValue int64
initialValue int64
currentValue int64
started time.Time
stats Stats
statted time.Time
}
func NewBar(message string, maxValue, initialValue int64) *Bar {
return &Bar{
message: message,
messageWidth: -1,
maxValue: maxValue,
initialValue: initialValue,
currentValue: initialValue,
started: time.Now(),
}
}
func (b *Bar) String() string {
termWidth, _, err := term.GetSize(int(os.Stderr.Fd()))
if err != nil {
termWidth = 80
}
var pre, mid, suf strings.Builder
if b.message != "" {
message := strings.TrimSpace(b.message)
if b.messageWidth > 0 && len(message) > b.messageWidth {
message = message[:b.messageWidth]
}
fmt.Fprintf(&pre, "%s", message)
if b.messageWidth-pre.Len() >= 0 {
pre.WriteString(strings.Repeat(" ", b.messageWidth-pre.Len()))
}
pre.WriteString(" ")
}
fmt.Fprintf(&pre, "%3.0f%% ", math.Floor(b.percent()))
fmt.Fprintf(&suf, "(%s/%s", format.HumanBytes(b.currentValue), format.HumanBytes(b.maxValue))
stats := b.Stats()
rate := int64(stats.rate)
if rate > 0 {
fmt.Fprintf(&suf, ", %s/s", format.HumanBytes(rate))
}
fmt.Fprintf(&suf, ")")
elapsed := time.Since(b.started)
if b.percent() < 100 && rate > 0 {
fmt.Fprintf(&suf, " [%s:%s]", elapsed.Round(time.Second), stats.remaining)
} else {
fmt.Fprintf(&suf, " ")
}
mid.WriteString("▕")
// add 3 extra spaces: 2 boundary characters and 1 space at the end
f := termWidth - pre.Len() - suf.Len() - 3
n := int(float64(f) * b.percent() / 100)
if n > 0 {
mid.WriteString(strings.Repeat("█", n))
}
if f-n > 0 {
mid.WriteString(strings.Repeat(" ", f-n))
}
mid.WriteString("▏")
return pre.String() + mid.String() + suf.String()
}
func (b *Bar) Set(value int64) {
if value >= b.maxValue {
value = b.maxValue
}
b.currentValue = value
}
func (b *Bar) percent() float64 {
if b.maxValue > 0 {
return float64(b.currentValue) / float64(b.maxValue) * 100
}
return 0
}
func (b *Bar) Stats() Stats {
if time.Since(b.statted) < time.Second {
return b.stats
}
switch {
case b.statted.IsZero():
b.stats = Stats{
value: b.initialValue,
rate: 0,
remaining: 0,
}
case b.currentValue >= b.maxValue:
b.stats = Stats{
value: b.maxValue,
rate: 0,
remaining: 0,
}
default:
rate := b.currentValue - b.stats.value
var remaining time.Duration
if rate > 0 {
remaining = time.Second * time.Duration((float64(b.maxValue-b.currentValue))/(float64(rate)))
}
b.stats = Stats{
value: b.currentValue,
rate: rate,
remaining: remaining,
}
}
b.statted = time.Now()
return b.stats
}

113
progress/progress.go Normal file
View file

@ -0,0 +1,113 @@
package progress
import (
"fmt"
"io"
"sync"
"time"
)
type State interface {
String() string
}
type Progress struct {
mu sync.Mutex
w io.Writer
pos int
ticker *time.Ticker
states []State
}
func NewProgress(w io.Writer) *Progress {
p := &Progress{w: w}
go p.start()
return p
}
func (p *Progress) stop() bool {
for _, state := range p.states {
if spinner, ok := state.(*Spinner); ok {
spinner.Stop()
}
}
if p.ticker != nil {
p.ticker.Stop()
p.ticker = nil
p.render()
return true
}
return false
}
func (p *Progress) Stop() bool {
stopped := p.stop()
if stopped {
fmt.Fprint(p.w, "\n")
}
return stopped
}
func (p *Progress) StopAndClear() bool {
fmt.Fprint(p.w, "\033[?25l")
defer fmt.Fprint(p.w, "\033[?25h")
stopped := p.stop()
if stopped {
// clear all progress lines
for i := 0; i < p.pos; i++ {
if i > 0 {
fmt.Fprint(p.w, "\033[A")
}
fmt.Fprint(p.w, "\033[2K\033[1G")
}
}
return stopped
}
func (p *Progress) Add(key string, state State) {
p.mu.Lock()
defer p.mu.Unlock()
p.states = append(p.states, state)
}
func (p *Progress) render() error {
p.mu.Lock()
defer p.mu.Unlock()
fmt.Fprint(p.w, "\033[?25l")
defer fmt.Fprint(p.w, "\033[?25h")
// clear already rendered progress lines
for i := 0; i < p.pos; i++ {
if i > 0 {
fmt.Fprint(p.w, "\033[A")
}
fmt.Fprint(p.w, "\033[2K\033[1G")
}
// render progress lines
for i, state := range p.states {
fmt.Fprint(p.w, state.String())
if i < len(p.states)-1 {
fmt.Fprint(p.w, "\n")
}
}
p.pos = len(p.states)
return nil
}
func (p *Progress) start() {
p.ticker = time.NewTicker(100 * time.Millisecond)
for range p.ticker.C {
p.render()
}
}

73
progress/spinner.go Normal file
View file

@ -0,0 +1,73 @@
package progress
import (
"fmt"
"strings"
"time"
)
type Spinner struct {
message string
messageWidth int
parts []string
value int
ticker *time.Ticker
started time.Time
stopped time.Time
}
func NewSpinner(message string) *Spinner {
s := &Spinner{
message: message,
parts: []string{
"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏",
},
started: time.Now(),
}
go s.start()
return s
}
func (s *Spinner) String() string {
var sb strings.Builder
if len(s.message) > 0 {
message := strings.TrimSpace(s.message)
if s.messageWidth > 0 && len(message) > s.messageWidth {
message = message[:s.messageWidth]
}
fmt.Fprintf(&sb, "%s", message)
if s.messageWidth-sb.Len() >= 0 {
sb.WriteString(strings.Repeat(" ", s.messageWidth-sb.Len()))
}
sb.WriteString(" ")
}
if s.stopped.IsZero() {
spinner := s.parts[s.value]
sb.WriteString(spinner)
sb.WriteString(" ")
}
return sb.String()
}
func (s *Spinner) start() {
s.ticker = time.NewTicker(100 * time.Millisecond)
for range s.ticker.C {
s.value = (s.value + 1) % len(s.parts)
if !s.stopped.IsZero() {
return
}
}
}
func (s *Spinner) Stop() {
if s.stopped.IsZero() {
s.stopped = time.Now()
}
}

View file

@ -1,21 +0,0 @@
MIT License
Copyright (c) 2017 Zack
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View file

@ -1,121 +0,0 @@
# progressbar
[![CI](https://github.com/schollz/progressbar/actions/workflows/ci.yml/badge.svg?branch=main&event=push)](https://github.com/schollz/progressbar/actions/workflows/ci.yml)
[![go report card](https://goreportcard.com/badge/github.com/schollz/progressbar)](https://goreportcard.com/report/github.com/schollz/progressbar)
[![coverage](https://img.shields.io/badge/coverage-84%25-brightgreen.svg)](https://gocover.io/github.com/schollz/progressbar)
[![godocs](https://godoc.org/github.com/schollz/progressbar?status.svg)](https://godoc.org/github.com/schollz/progressbar/v3)
A very simple thread-safe progress bar which should work on every OS without problems. I needed a progressbar for [croc](https://github.com/schollz/croc) and everything I tried had problems, so I made another one. In order to be OS agnostic I do not plan to support [multi-line outputs](https://github.com/schollz/progressbar/issues/6).
## Install
```
go get -u github.com/schollz/progressbar/v3
```
## Usage
### Basic usage
```golang
bar := progressbar.Default(100)
for i := 0; i < 100; i++ {
bar.Add(1)
time.Sleep(40 * time.Millisecond)
}
```
which looks like:
![Example of basic bar](examples/basic/basic.gif)
### I/O operations
The `progressbar` implements an `io.Writer` so it can automatically detect the number of bytes written to a stream, so you can use it as a progressbar for an `io.Reader`.
```golang
req, _ := http.NewRequest("GET", "https://dl.google.com/go/go1.14.2.src.tar.gz", nil)
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
f, _ := os.OpenFile("go1.14.2.src.tar.gz", os.O_CREATE|os.O_WRONLY, 0644)
defer f.Close()
bar := progressbar.DefaultBytes(
resp.ContentLength,
"downloading",
)
io.Copy(io.MultiWriter(f, bar), resp.Body)
```
which looks like:
![Example of download bar](examples/download/download.gif)
### Progress bar with unknown length
A progressbar with unknown length is a spinner. Any bar with -1 length will automatically convert it to a spinner with a customizable spinner type. For example, the above code can be run and set the `resp.ContentLength` to `-1`.
which looks like:
![Example of download bar with unknown length](examples/download-unknown/download-unknown.gif)
### Customization
There is a lot of customization that you can do - change the writer, the color, the width, description, theme, etc. See [all the options](https://pkg.go.dev/github.com/schollz/progressbar/v3?tab=doc#Option).
```golang
bar := progressbar.NewOptions(1000,
progressbar.OptionSetWriter(ansi.NewAnsiStdout()),
progressbar.OptionEnableColorCodes(true),
progressbar.OptionShowBytes(true),
progressbar.OptionSetWidth(15),
progressbar.OptionSetDescription("[cyan][1/3][reset] Writing moshable file..."),
progressbar.OptionSetTheme(progressbar.Theme{
Saucer: "[green]=[reset]",
SaucerHead: "[green]>[reset]",
SaucerPadding: " ",
BarStart: "[",
BarEnd: "]",
}))
for i := 0; i < 1000; i++ {
bar.Add(1)
time.Sleep(5 * time.Millisecond)
}
```
which looks like:
![Example of customized bar](examples/customization/customization.gif)
## Contributing
Pull requests are welcome. Feel free to...
- Revise documentation
- Add new features
- Fix bugs
- Suggest improvements
## Thanks
Thanks [@Dynom](https://github.com/dynom) for massive improvements in version 2.0!
Thanks [@CrushedPixel](https://github.com/CrushedPixel) for adding descriptions and color code support!
Thanks [@MrMe42](https://github.com/MrMe42) for adding some minor features!
Thanks [@tehstun](https://github.com/tehstun) for some great PRs!
Thanks [@Benzammour](https://github.com/Benzammour) and [@haseth](https://github.com/haseth) for helping create v3!
Thanks [@briandowns](https://github.com/briandowns) for compiling the list of spinners.
## License
MIT

File diff suppressed because it is too large Load diff

View file

@ -1,80 +0,0 @@
package progressbar
var spinners = map[int][]string{
0: {"←", "↖", "↑", "↗", "→", "↘", "↓", "↙"},
1: {"▁", "▃", "▄", "▅", "▆", "▇", "█", "▇", "▆", "▅", "▄", "▃", "▁"},
2: {"▖", "▘", "▝", "▗"},
3: {"┤", "┘", "┴", "└", "├", "┌", "┬", "┐"},
4: {"◢", "◣", "◤", "◥"},
5: {"◰", "◳", "◲", "◱"},
6: {"◴", "◷", "◶", "◵"},
7: {"◐", "◓", "◑", "◒"},
8: {".", "o", "O", "@", "*"},
9: {"|", "/", "-", "\\"},
10: {"◡◡", "⊙⊙", "◠◠"},
11: {"⣾", "⣽", "⣻", "⢿", "⡿", "⣟", "⣯", "⣷"},
12: {">))'>", " >))'>", " >))'>", " >))'>", " >))'>", " <'((<", " <'((<", " <'((<"},
13: {"⠁", "⠂", "⠄", "⡀", "⢀", "⠠", "⠐", "⠈"},
14: {"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"},
15: {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"},
16: {"▉", "▊", "▋", "▌", "▍", "▎", "▏", "▎", "▍", "▌", "▋", "▊", "▉"},
17: {"■", "□", "▪", "▫"},
18: {"←", "↑", "→", "↓"},
19: {"╫", "╪"},
20: {"⇐", "⇖", "⇑", "⇗", "⇒", "⇘", "⇓", "⇙"},
21: {"⠁", "⠁", "⠉", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠤", "⠄", "⠄", "⠤", "⠠", "⠠", "⠤", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋", "⠉", "⠈", "⠈"},
22: {"⠈", "⠉", "⠋", "⠓", "⠒", "⠐", "⠐", "⠒", "⠖", "⠦", "⠤", "⠠", "⠠", "⠤", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋", "⠉", "⠈"},
23: {"⠁", "⠉", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠤", "⠄", "⠄", "⠤", "⠴", "⠲", "⠒", "⠂", "⠂", "⠒", "⠚", "⠙", "⠉", "⠁"},
24: {"⠋", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋"},
25: {"ヲ", "ァ", "ィ", "ゥ", "ェ", "ォ", "ャ", "ュ", "ョ", "ッ", "ア", "イ", "ウ", "エ", "オ", "カ", "キ", "ク", "ケ", "コ", "サ", "シ", "ス", "セ", "ソ", "タ", "チ", "ツ", "テ", "ト", "ナ", "ニ", "ヌ", "ネ", "ノ", "ハ", "ヒ", "フ", "ヘ", "ホ", "マ", "ミ", "ム", "メ", "モ", "ヤ", "ユ", "ヨ", "ラ", "リ", "ル", "レ", "ロ", "ワ", "ン"},
26: {".", "..", "..."},
27: {"▁", "▂", "▃", "▄", "▅", "▆", "▇", "█", "▉", "▊", "▋", "▌", "▍", "▎", "▏", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█", "▇", "▆", "▅", "▄", "▃", "▂", "▁"},
28: {".", "o", "O", "°", "O", "o", "."},
29: {"+", "x"},
30: {"v", "<", "^", ">"},
31: {">>--->", " >>--->", " >>--->", " >>--->", " >>--->", " <---<<", " <---<<", " <---<<", " <---<<", "<---<<"},
32: {"|", "||", "|||", "||||", "|||||", "|||||||", "||||||||", "|||||||", "||||||", "|||||", "||||", "|||", "||", "|"},
33: {"[ ]", "[= ]", "[== ]", "[=== ]", "[==== ]", "[===== ]", "[====== ]", "[======= ]", "[======== ]", "[========= ]", "[==========]"},
34: {"(*---------)", "(-*--------)", "(--*-------)", "(---*------)", "(----*-----)", "(-----*----)", "(------*---)", "(-------*--)", "(--------*-)", "(---------*)"},
35: {"█▒▒▒▒▒▒▒▒▒", "███▒▒▒▒▒▒▒", "█████▒▒▒▒▒", "███████▒▒▒", "██████████"},
36: {"[ ]", "[=> ]", "[===> ]", "[=====> ]", "[======> ]", "[========> ]", "[==========> ]", "[============> ]", "[==============> ]", "[================> ]", "[==================> ]", "[===================>]"},
37: {"", ""},
38: {"▌", "▀", "▐▄"},
39: {"🌍", "🌎", "🌏"},
40: {"◜", "◝", "◞", "◟"},
41: {"⬒", "⬔", "⬓", "⬕"},
42: {"⬖", "⬘", "⬗", "⬙"},
43: {"[>>> >]", "[]>>>> []", "[] >>>> []", "[] >>>> []", "[] >>>> []", "[] >>>>[]", "[>> >>]"},
44: {"♠", "♣", "♥", "♦"},
45: {"➞", "➟", "➠", "➡", "➠", "➟"},
46: {" | ", ` \ `, "_ ", ` \ `, " | ", " / ", " _", " / "},
47: {" . . . .", ". . . .", ". . . .", ". . . .", ". . . . ", ". . . . ."},
48: {" | ", " / ", " _ ", ` \ `, " | ", ` \ `, " _ ", " / "},
49: {"⎺", "⎻", "⎼", "⎽", "⎼", "⎻"},
50: {"▹▹▹▹▹", "▸▹▹▹▹", "▹▸▹▹▹", "▹▹▸▹▹", "▹▹▹▸▹", "▹▹▹▹▸"},
51: {"[ ]", "[ =]", "[ ==]", "[ ===]", "[====]", "[=== ]", "[== ]", "[= ]"},
52: {"( ● )", "( ● )", "( ● )", "( ● )", "( ●)", "( ● )", "( ● )", "( ● )", "( ● )"},
53: {"✶", "✸", "✹", "✺", "✹", "✷"},
54: {"▐|\\____________▌", "▐_|\\___________▌", "▐__|\\__________▌", "▐___|\\_________▌", "▐____|\\________▌", "▐_____|\\_______▌", "▐______|\\______▌", "▐_______|\\_____▌", "▐________|\\____▌", "▐_________|\\___▌", "▐__________|\\__▌", "▐___________|\\_▌", "▐____________|\\▌", "▐____________/|▌", "▐___________/|_▌", "▐__________/|__▌", "▐_________/|___▌", "▐________/|____▌", "▐_______/|_____▌", "▐______/|______▌", "▐_____/|_______▌", "▐____/|________▌", "▐___/|_________▌", "▐__/|__________▌", "▐_/|___________▌", "▐/|____________▌"},
55: {"▐⠂ ▌", "▐⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂▌", "▐ ⠠▌", "▐ ⡀▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐⠠ ▌"},
56: {"¿", "?"},
57: {"⢹", "⢺", "⢼", "⣸", "⣇", "⡧", "⡗", "⡏"},
58: {"⢄", "⢂", "⢁", "⡁", "⡈", "⡐", "⡠"},
59: {". ", ".. ", "...", " ..", " .", " "},
60: {".", "o", "O", "°", "O", "o", "."},
61: {"▓", "▒", "░"},
62: {"▌", "▀", "▐", "▄"},
63: {"⊶", "⊷"},
64: {"▪", "▫"},
65: {"□", "■"},
66: {"▮", "▯"},
67: {"-", "=", "≡"},
68: {"d", "q", "p", "b"},
69: {"∙∙∙", "●∙∙", "∙●∙", "∙∙●", "∙∙∙"},
70: {"🌑 ", "🌒 ", "🌓 ", "🌔 ", "🌕 ", "🌖 ", "🌗 ", "🌘 "},
71: {"☗", "☖"},
72: {"⧇", "⧆"},
73: {"◉", "◎"},
74: {"㊂", "㊀", "㊁"},
75: {"⦾", "⦿"},
}

View file

@ -10,6 +10,7 @@ mkdir -p dist
for TARGETARCH in arm64 amd64; do for TARGETARCH in arm64 amd64; do
GOOS=darwin GOARCH=$TARGETARCH go generate ./... GOOS=darwin GOARCH=$TARGETARCH go generate ./...
GOOS=darwin GOARCH=$TARGETARCH go build -o dist/ollama-darwin-$TARGETARCH GOOS=darwin GOARCH=$TARGETARCH go build -o dist/ollama-darwin-$TARGETARCH
rm -rf llm/llama.cpp/*/build
done done
lipo -create -output dist/ollama dist/ollama-darwin-* lipo -create -output dist/ollama dist/ollama-darwin-*

View file

@ -10,6 +10,8 @@ docker buildx build \
--platform=linux/arm64,linux/amd64 \ --platform=linux/arm64,linux/amd64 \
--build-arg=VERSION \ --build-arg=VERSION \
--build-arg=GOFLAGS \ --build-arg=GOFLAGS \
--cache-from type=local,src=.cache \
--cache-to type=local,dest=.cache \
-f Dockerfile \ -f Dockerfile \
-t ollama \ -t ollama \
. .

View file

@ -181,6 +181,9 @@ install_cuda_driver_apt() {
debian) debian)
status 'Enabling contrib sources...' status 'Enabling contrib sources...'
$SUDO sed 's/main/contrib/' < /etc/apt/sources.list | $SUDO tee /etc/apt/sources.list.d/contrib.list > /dev/null $SUDO sed 's/main/contrib/' < /etc/apt/sources.list | $SUDO tee /etc/apt/sources.list.d/contrib.list > /dev/null
if [ -f "/etc/apt/sources.list.d/debian.sources" ]; then
$SUDO sed 's/main/contrib/' < /etc/apt/sources.list.d/debian.sources | $SUDO tee /etc/apt/sources.list.d/contrib.sources > /dev/null
fi
;; ;;
esac esac

View file

@ -10,6 +10,7 @@ docker buildx build \
--platform=linux/arm64,linux/amd64 \ --platform=linux/arm64,linux/amd64 \
--build-arg=VERSION \ --build-arg=VERSION \
--build-arg=GOFLAGS \ --build-arg=GOFLAGS \
--cache-from type=local,src=.cache \
-f Dockerfile \ -f Dockerfile \
-t ollama/ollama -t ollama/ollama:$VERSION \ -t ollama/ollama -t ollama/ollama:$VERSION \
. .

View file

@ -7,6 +7,7 @@ import (
"fmt" "fmt"
"io" "io"
"log" "log"
"math"
"net/http" "net/http"
"net/url" "net/url"
"os" "os"
@ -53,8 +54,8 @@ type blobDownloadPart struct {
const ( const (
numDownloadParts = 64 numDownloadParts = 64
minDownloadPartSize int64 = 32 * 1000 * 1000 minDownloadPartSize int64 = 100 * format.MegaByte
maxDownloadPartSize int64 = 256 * 1000 * 1000 maxDownloadPartSize int64 = 1000 * format.MegaByte
) )
func (p *blobDownloadPart) Name() string { func (p *blobDownloadPart) Name() string {
@ -147,7 +148,6 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *Regis
continue continue
} }
i := i
g.Go(func() error { g.Go(func() error {
var err error var err error
for try := 0; try < maxRetries; try++ { for try := 0; try < maxRetries; try++ {
@ -158,12 +158,11 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *Regis
// return immediately if the context is canceled or the device is out of space // return immediately if the context is canceled or the device is out of space
return err return err
case err != nil: case err != nil:
log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], i, try, err) sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
time.Sleep(sleep)
continue continue
default: default:
if try > 0 {
log.Printf("%s part %d completed after %d retries", b.Digest[7:19], i, try)
}
return nil return nil
} }
} }
@ -285,7 +284,7 @@ func (b *blobDownload) Wait(ctx context.Context, fn func(api.ProgressResponse))
} }
fn(api.ProgressResponse{ fn(api.ProgressResponse{
Status: fmt.Sprintf("downloading %s", b.Digest), Status: fmt.Sprintf("pulling %s", b.Digest[7:19]),
Digest: b.Digest, Digest: b.Digest,
Total: b.Total, Total: b.Total,
Completed: b.Completed.Load(), Completed: b.Completed.Load(),
@ -304,7 +303,7 @@ type downloadOpts struct {
fn func(api.ProgressResponse) fn func(api.ProgressResponse)
} }
const maxRetries = 3 const maxRetries = 6
var errMaxRetriesExceeded = errors.New("max retries exceeded") var errMaxRetriesExceeded = errors.New("max retries exceeded")
@ -322,7 +321,7 @@ func downloadBlob(ctx context.Context, opts downloadOpts) error {
return err return err
default: default:
opts.fn(api.ProgressResponse{ opts.fn(api.ProgressResponse{
Status: fmt.Sprintf("downloading %s", opts.digest), Status: fmt.Sprintf("pulling %s", opts.digest[7:19]),
Digest: opts.digest, Digest: opts.digest,
Total: fi.Size(), Total: fi.Size(),
Completed: fi.Size(), Completed: fi.Size(),

View file

@ -228,108 +228,131 @@ func GetModel(name string) (*Model, error) {
return model, nil return model, nil
} }
func filenameWithPath(path, f string) (string, error) { func realpath(p string) string {
// if filePath starts with ~/, replace it with the user's home directory. abspath, err := filepath.Abs(p)
if strings.HasPrefix(f, fmt.Sprintf("~%s", string(os.PathSeparator))) { if err != nil {
parts := strings.Split(f, string(os.PathSeparator)) return p
}
home, err := os.UserHomeDir() home, err := os.UserHomeDir()
if err != nil { if err != nil {
return "", fmt.Errorf("failed to open file: %v", err) return abspath
} }
f = filepath.Join(home, filepath.Join(parts[1:]...)) if p == "~" {
return home
} else if strings.HasPrefix(p, "~/") {
return filepath.Join(home, p[2:])
} }
// if filePath is not an absolute path, make it relative to the modelfile path return abspath
if !filepath.IsAbs(f) {
f = filepath.Join(filepath.Dir(path), f)
}
return f, nil
}
func CreateModel(ctx context.Context, name string, path string, fn func(resp api.ProgressResponse)) error {
mp := ParseModelPath(name)
var manifest *ManifestV2
var err error
var noprune string
// build deleteMap to prune unused layers
deleteMap := make(map[string]bool)
if noprune = os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
manifest, _, err = GetManifest(mp)
if err != nil && !errors.Is(err, os.ErrNotExist) {
return err
}
if manifest != nil {
for _, l := range manifest.Layers {
deleteMap[l.Digest] = true
}
deleteMap[manifest.Config.Digest] = true
}
}
mf, err := os.Open(path)
if err != nil {
fn(api.ProgressResponse{Status: fmt.Sprintf("couldn't open modelfile '%s'", path)})
return fmt.Errorf("failed to open file: %w", err)
}
defer mf.Close()
fn(api.ProgressResponse{Status: "parsing modelfile"})
commands, err := parser.Parse(mf)
if err != nil {
return err
} }
func CreateModel(ctx context.Context, name string, commands []parser.Command, fn func(resp api.ProgressResponse)) error {
config := ConfigV2{ config := ConfigV2{
Architecture: "amd64",
OS: "linux", OS: "linux",
Architecture: "amd64",
} }
deleteMap := make(map[string]struct{})
var layers []*LayerReader var layers []*LayerReader
params := make(map[string][]string) params := make(map[string][]string)
var sourceParams map[string]any fromParams := make(map[string]any)
for _, c := range commands { for _, c := range commands {
log.Printf("[%s] - %s\n", c.Name, c.Args) log.Printf("[%s] - %s", c.Name, c.Args)
mediatype := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name)
switch c.Name { switch c.Name {
case "model": case "model":
fn(api.ProgressResponse{Status: "looking for model"}) if strings.HasPrefix(c.Args, "@") {
blobPath, err := GetBlobsPath(strings.TrimPrefix(c.Args, "@"))
mp := ParseModelPath(c.Args)
mf, _, err := GetManifest(mp)
if err != nil {
modelFile, err := filenameWithPath(path, c.Args)
if err != nil { if err != nil {
return err return err
} }
if _, err := os.Stat(modelFile); err != nil {
// the model file does not exist, try pulling it c.Args = blobPath
if errors.Is(err, os.ErrNotExist) { }
fn(api.ProgressResponse{Status: "pulling model file"})
bin, err := os.Open(realpath(c.Args))
if err != nil {
// not a file on disk so must be a model reference
modelpath := ParseModelPath(c.Args)
manifest, _, err := GetManifest(modelpath)
switch {
case errors.Is(err, os.ErrNotExist):
fn(api.ProgressResponse{Status: "pulling model"})
if err := PullModel(ctx, c.Args, &RegistryOptions{}, fn); err != nil { if err := PullModel(ctx, c.Args, &RegistryOptions{}, fn); err != nil {
return err return err
} }
mf, _, err = GetManifest(mp)
manifest, _, err = GetManifest(modelpath)
if err != nil { if err != nil {
return fmt.Errorf("failed to open file after pull: %v", err)
}
} else {
return err return err
} }
} else { case err != nil:
// create a model from this specified file return err
fn(api.ProgressResponse{Status: "creating model layer"})
file, err := os.Open(modelFile)
if err != nil {
return fmt.Errorf("failed to open file: %v", err)
} }
defer file.Close()
ggml, err := llm.DecodeGGML(file) fn(api.ProgressResponse{Status: "reading model metadata"})
fromConfigPath, err := GetBlobsPath(manifest.Config.Digest)
if err != nil {
return err
}
fromConfigFile, err := os.Open(fromConfigPath)
if err != nil {
return err
}
defer fromConfigFile.Close()
var fromConfig ConfigV2
if err := json.NewDecoder(fromConfigFile).Decode(&fromConfig); err != nil {
return err
}
config.ModelFormat = fromConfig.ModelFormat
config.ModelFamily = fromConfig.ModelFamily
config.ModelType = fromConfig.ModelType
config.FileType = fromConfig.FileType
for _, layer := range manifest.Layers {
deleteMap[layer.Digest] = struct{}{}
if layer.MediaType == "application/vnd.ollama.image.params" {
fromParamsPath, err := GetBlobsPath(layer.Digest)
if err != nil {
return err
}
fromParamsFile, err := os.Open(fromParamsPath)
if err != nil {
return err
}
defer fromParamsFile.Close()
if err := json.NewDecoder(fromParamsFile).Decode(&fromParams); err != nil {
return err
}
}
layer, err := GetLayerWithBufferFromLayer(layer)
if err != nil {
return err
}
layer.From = modelpath.GetShortTagname()
layers = append(layers, layer)
}
deleteMap[manifest.Config.Digest] = struct{}{}
continue
}
defer bin.Close()
fn(api.ProgressResponse{Status: "creating model layer"})
ggml, err := llm.DecodeGGML(bin)
if err != nil { if err != nil {
return err return err
} }
@ -339,109 +362,47 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
config.ModelType = ggml.ModelType() config.ModelType = ggml.ModelType()
config.FileType = ggml.FileType() config.FileType = ggml.FileType()
// reset the file bin.Seek(0, io.SeekStart)
file.Seek(0, io.SeekStart) layer, err := CreateLayer(bin)
l, err := CreateLayer(file)
if err != nil {
return fmt.Errorf("failed to create layer: %v", err)
}
l.MediaType = "application/vnd.ollama.image.model"
layers = append(layers, l)
}
}
if mf != nil {
fn(api.ProgressResponse{Status: "reading model metadata"})
sourceBlobPath, err := GetBlobsPath(mf.Config.Digest)
if err != nil { if err != nil {
return err return err
} }
sourceBlob, err := os.Open(sourceBlobPath) layer.MediaType = mediatype
if err != nil { layers = append(layers, layer)
return err
}
defer sourceBlob.Close()
var source ConfigV2
if err := json.NewDecoder(sourceBlob).Decode(&source); err != nil {
return err
}
// copy the model metadata
config.ModelFamily = source.ModelFamily
config.ModelType = source.ModelType
config.ModelFormat = source.ModelFormat
config.FileType = source.FileType
for _, l := range mf.Layers {
if l.MediaType == "application/vnd.ollama.image.params" {
sourceParamsBlobPath, err := GetBlobsPath(l.Digest)
if err != nil {
return err
}
sourceParamsBlob, err := os.Open(sourceParamsBlobPath)
if err != nil {
return err
}
defer sourceParamsBlob.Close()
if err := json.NewDecoder(sourceParamsBlob).Decode(&sourceParams); err != nil {
return err
}
}
newLayer, err := GetLayerWithBufferFromLayer(l)
if err != nil {
return err
}
newLayer.From = mp.GetShortTagname()
layers = append(layers, newLayer)
}
}
case "adapter": case "adapter":
fn(api.ProgressResponse{Status: fmt.Sprintf("creating model %s layer", c.Name)}) fn(api.ProgressResponse{Status: "creating adapter layer"})
bin, err := os.Open(realpath(c.Args))
fp, err := filenameWithPath(path, c.Args)
if err != nil { if err != nil {
return err return err
} }
defer bin.Close()
// create a model from this specified file layer, err := CreateLayer(bin)
fn(api.ProgressResponse{Status: "creating model layer"})
file, err := os.Open(fp)
if err != nil {
return fmt.Errorf("failed to open file: %v", err)
}
defer file.Close()
l, err := CreateLayer(file)
if err != nil {
return fmt.Errorf("failed to create layer: %v", err)
}
l.MediaType = "application/vnd.ollama.image.adapter"
layers = append(layers, l)
case "license":
fn(api.ProgressResponse{Status: fmt.Sprintf("creating model %s layer", c.Name)})
mediaType := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name)
layer, err := CreateLayer(strings.NewReader(c.Args))
if err != nil { if err != nil {
return err return err
} }
if layer.Size > 0 { if layer.Size > 0 {
layer.MediaType = mediaType layer.MediaType = mediatype
layers = append(layers, layer) layers = append(layers, layer)
} }
case "template", "system", "prompt": case "license":
fn(api.ProgressResponse{Status: fmt.Sprintf("creating model %s layer", c.Name)}) fn(api.ProgressResponse{Status: "creating license layer"})
// remove the layer if one exists layer, err := CreateLayer(strings.NewReader(c.Args))
mediaType := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name) if err != nil {
layers = removeLayerFromLayers(layers, mediaType) return err
}
if layer.Size > 0 {
layer.MediaType = mediatype
layers = append(layers, layer)
}
case "template", "system":
fn(api.ProgressResponse{Status: fmt.Sprintf("creating %s layer", c.Name)})
// remove duplicate layers
layers = removeLayerFromLayers(layers, mediatype)
layer, err := CreateLayer(strings.NewReader(c.Args)) layer, err := CreateLayer(strings.NewReader(c.Args))
if err != nil { if err != nil {
@ -449,48 +410,47 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
} }
if layer.Size > 0 { if layer.Size > 0 {
layer.MediaType = mediaType layer.MediaType = mediatype
layers = append(layers, layer) layers = append(layers, layer)
} }
default: default:
// runtime parameters, build a list of args for each parameter to allow multiple values to be specified (ex: multiple stop sequences)
params[c.Name] = append(params[c.Name], c.Args) params[c.Name] = append(params[c.Name], c.Args)
} }
} }
// Create a single layer for the parameters
if len(params) > 0 { if len(params) > 0 {
fn(api.ProgressResponse{Status: "creating parameter layer"}) fn(api.ProgressResponse{Status: "creating parameters layer"})
layers = removeLayerFromLayers(layers, "application/vnd.ollama.image.params")
formattedParams, err := formatParams(params) formattedParams, err := formatParams(params)
if err != nil { if err != nil {
return fmt.Errorf("couldn't create params json: %v", err) return err
} }
for k, v := range sourceParams { for k, v := range fromParams {
if _, ok := formattedParams[k]; !ok { if _, ok := formattedParams[k]; !ok {
formattedParams[k] = v formattedParams[k] = v
} }
} }
if config.ModelType == "65B" { if config.ModelType == "65B" {
if numGQA, ok := formattedParams["num_gqa"].(int); ok && numGQA == 8 { if gqa, ok := formattedParams["gqa"].(int); ok && gqa == 8 {
config.ModelType = "70B" config.ModelType = "70B"
} }
} }
bts, err := json.Marshal(formattedParams) var b bytes.Buffer
if err := json.NewEncoder(&b).Encode(formattedParams); err != nil {
return err
}
fn(api.ProgressResponse{Status: "creating config layer"})
layer, err := CreateLayer(bytes.NewReader(b.Bytes()))
if err != nil { if err != nil {
return err return err
} }
l, err := CreateLayer(bytes.NewReader(bts)) layer.MediaType = "application/vnd.ollama.image.params"
if err != nil { layers = append(layers, layer)
return fmt.Errorf("failed to create layer: %v", err)
}
l.MediaType = "application/vnd.ollama.image.params"
layers = append(layers, l)
} }
digests, err := getLayerDigests(layers) digests, err := getLayerDigests(layers)
@ -498,36 +458,31 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
return err return err
} }
var manifestLayers []*Layer configLayer, err := createConfigLayer(config, digests)
for _, l := range layers {
manifestLayers = append(manifestLayers, &l.Layer)
delete(deleteMap, l.Layer.Digest)
}
// Create a layer for the config object
fn(api.ProgressResponse{Status: "creating config layer"})
cfg, err := createConfigLayer(config, digests)
if err != nil { if err != nil {
return err return err
} }
layers = append(layers, cfg)
delete(deleteMap, cfg.Layer.Digest) layers = append(layers, configLayer)
delete(deleteMap, configLayer.Digest)
if err := SaveLayers(layers, fn, false); err != nil { if err := SaveLayers(layers, fn, false); err != nil {
return err return err
} }
// Create the manifest var contentLayers []*Layer
for _, layer := range layers {
contentLayers = append(contentLayers, &layer.Layer)
delete(deleteMap, layer.Digest)
}
fn(api.ProgressResponse{Status: "writing manifest"}) fn(api.ProgressResponse{Status: "writing manifest"})
err = CreateManifest(name, cfg, manifestLayers) if err := CreateManifest(name, configLayer, contentLayers); err != nil {
if err != nil {
return err return err
} }
if noprune == "" { if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
fn(api.ProgressResponse{Status: "removing any unused layers"}) if err := deleteUnusedLayers(nil, deleteMap, false); err != nil {
err = deleteUnusedLayers(nil, deleteMap, false)
if err != nil {
return err return err
} }
} }
@ -739,7 +694,7 @@ func CopyModel(src, dest string) error {
return nil return nil
} }
func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]bool, dryRun bool) error { func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]struct{}, dryRun bool) error {
fp, err := GetManifestPath() fp, err := GetManifestPath()
if err != nil { if err != nil {
return err return err
@ -779,8 +734,7 @@ func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]bool, dry
} }
// only delete the files which are still in the deleteMap // only delete the files which are still in the deleteMap
for k, v := range deleteMap { for k := range deleteMap {
if v {
fp, err := GetBlobsPath(k) fp, err := GetBlobsPath(k)
if err != nil { if err != nil {
log.Printf("couldn't get file path for '%s': %v", k, err) log.Printf("couldn't get file path for '%s': %v", k, err)
@ -795,13 +749,12 @@ func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]bool, dry
log.Printf("wanted to remove: %s", fp) log.Printf("wanted to remove: %s", fp)
} }
} }
}
return nil return nil
} }
func PruneLayers() error { func PruneLayers() error {
deleteMap := make(map[string]bool) deleteMap := make(map[string]struct{})
p, err := GetBlobsPath("") p, err := GetBlobsPath("")
if err != nil { if err != nil {
return err return err
@ -818,7 +771,9 @@ func PruneLayers() error {
if runtime.GOOS == "windows" { if runtime.GOOS == "windows" {
name = strings.ReplaceAll(name, "-", ":") name = strings.ReplaceAll(name, "-", ":")
} }
deleteMap[name] = true if strings.HasPrefix(name, "sha256:") {
deleteMap[name] = struct{}{}
}
} }
log.Printf("total blobs: %d", len(deleteMap)) log.Printf("total blobs: %d", len(deleteMap))
@ -873,11 +828,11 @@ func DeleteModel(name string) error {
return err return err
} }
deleteMap := make(map[string]bool) deleteMap := make(map[string]struct{})
for _, layer := range manifest.Layers { for _, layer := range manifest.Layers {
deleteMap[layer.Digest] = true deleteMap[layer.Digest] = struct{}{}
} }
deleteMap[manifest.Config.Digest] = true deleteMap[manifest.Config.Digest] = struct{}{}
err = deleteUnusedLayers(&mp, deleteMap, false) err = deleteUnusedLayers(&mp, deleteMap, false)
if err != nil { if err != nil {
@ -979,6 +934,9 @@ func PushModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu
for _, layer := range layers { for _, layer := range layers {
if err := uploadBlob(ctx, mp, layer, regOpts, fn); err != nil { if err := uploadBlob(ctx, mp, layer, regOpts, fn); err != nil {
log.Printf("error uploading blob: %v", err) log.Printf("error uploading blob: %v", err)
if errors.Is(err, errUnauthorized) {
return fmt.Errorf("unable to push %s, make sure this namespace exists and you are authorized to push to it", ParseModelPath(name).GetNamespaceRepository())
}
return err return err
} }
} }
@ -1013,7 +971,7 @@ func PullModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu
var noprune string var noprune string
// build deleteMap to prune unused layers // build deleteMap to prune unused layers
deleteMap := make(map[string]bool) deleteMap := make(map[string]struct{})
if noprune = os.Getenv("OLLAMA_NOPRUNE"); noprune == "" { if noprune = os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
manifest, _, err = GetManifest(mp) manifest, _, err = GetManifest(mp)
@ -1023,9 +981,9 @@ func PullModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu
if manifest != nil { if manifest != nil {
for _, l := range manifest.Layers { for _, l := range manifest.Layers {
deleteMap[l.Digest] = true deleteMap[l.Digest] = struct{}{}
} }
deleteMap[manifest.Config.Digest] = true deleteMap[manifest.Config.Digest] = struct{}{}
} }
} }
@ -1165,45 +1123,53 @@ func GetSHA256Digest(r io.Reader) (string, int64) {
return fmt.Sprintf("sha256:%x", h.Sum(nil)), n return fmt.Sprintf("sha256:%x", h.Sum(nil)), n
} }
var errUnauthorized = fmt.Errorf("unauthorized")
func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.ReadSeeker, regOpts *RegistryOptions) (*http.Response, error) { func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.ReadSeeker, regOpts *RegistryOptions) (*http.Response, error) {
for try := 0; try < maxRetries; try++ {
resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts) resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
if err != nil { if err != nil {
log.Printf("couldn't start upload: %v", err) if !errors.Is(err, context.Canceled) {
log.Printf("request failed: %v", err)
}
return nil, err return nil, err
} }
switch { switch {
case resp.StatusCode == http.StatusUnauthorized: case resp.StatusCode == http.StatusUnauthorized:
// Handle authentication error with one retry
auth := resp.Header.Get("www-authenticate") auth := resp.Header.Get("www-authenticate")
authRedir := ParseAuthRedirectString(auth) authRedir := ParseAuthRedirectString(auth)
token, err := getAuthToken(ctx, authRedir) token, err := getAuthToken(ctx, authRedir)
if err != nil { if err != nil {
return nil, err return nil, err
} }
regOpts.Token = token regOpts.Token = token
if body != nil { if body != nil {
body.Seek(0, io.SeekStart) _, err = body.Seek(0, io.SeekStart)
if err != nil {
return nil, err
}
} }
continue resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
if resp.StatusCode == http.StatusUnauthorized {
return nil, errUnauthorized
}
return resp, err
case resp.StatusCode == http.StatusNotFound: case resp.StatusCode == http.StatusNotFound:
return nil, os.ErrNotExist return nil, os.ErrNotExist
case resp.StatusCode >= http.StatusBadRequest: case resp.StatusCode >= http.StatusBadRequest:
body, err := io.ReadAll(resp.Body) responseBody, err := io.ReadAll(resp.Body)
if err != nil { if err != nil {
return nil, fmt.Errorf("%d: %s", resp.StatusCode, err) return nil, fmt.Errorf("%d: %s", resp.StatusCode, err)
} }
return nil, fmt.Errorf("%d: %s", resp.StatusCode, responseBody)
}
return nil, fmt.Errorf("%d: %s", resp.StatusCode, body)
default:
return resp, nil return resp, nil
} }
}
return nil, errMaxRetriesExceeded
}
func makeRequest(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.Reader, regOpts *RegistryOptions) (*http.Response, error) { func makeRequest(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.Reader, regOpts *RegistryOptions) (*http.Response, error) {
if requestURL.Scheme != "http" && regOpts != nil && regOpts.Insecure { if requestURL.Scheme != "http" && regOpts != nil && regOpts.Insecure {

View file

@ -2,6 +2,7 @@ package server
import ( import (
"context" "context"
"crypto/sha256"
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
@ -26,6 +27,7 @@ import (
"github.com/jmorganca/ollama/api" "github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/llm" "github.com/jmorganca/ollama/llm"
"github.com/jmorganca/ollama/parser"
"github.com/jmorganca/ollama/version" "github.com/jmorganca/ollama/version"
) )
@ -409,8 +411,31 @@ func CreateModelHandler(c *gin.Context) {
return return
} }
if req.Name == "" || req.Path == "" { if req.Name == "" {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name and path are required"}) c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name is required"})
return
}
if req.Path == "" && req.Modelfile == "" {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "path or modelfile are required"})
return
}
var modelfile io.Reader = strings.NewReader(req.Modelfile)
if req.Path != "" && req.Modelfile == "" {
bin, err := os.Open(req.Path)
if err != nil {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("error reading modelfile: %s", err)})
return
}
defer bin.Close()
modelfile = bin
}
commands, err := parser.Parse(modelfile)
if err != nil {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return return
} }
@ -424,7 +449,7 @@ func CreateModelHandler(c *gin.Context) {
ctx, cancel := context.WithCancel(c.Request.Context()) ctx, cancel := context.WithCancel(c.Request.Context())
defer cancel() defer cancel()
if err := CreateModel(ctx, req.Name, req.Path, fn); err != nil { if err := CreateModel(ctx, req.Name, commands, fn); err != nil {
ch <- gin.H{"error": err.Error()} ch <- gin.H{"error": err.Error()}
} }
}() }()
@ -625,6 +650,60 @@ func CopyModelHandler(c *gin.Context) {
} }
} }
func HeadBlobHandler(c *gin.Context) {
path, err := GetBlobsPath(c.Param("digest"))
if err != nil {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
if _, err := os.Stat(path); err != nil {
c.AbortWithStatusJSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("blob %q not found", c.Param("digest"))})
return
}
c.Status(http.StatusOK)
}
func CreateBlobHandler(c *gin.Context) {
targetPath, err := GetBlobsPath(c.Param("digest"))
if err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
hash := sha256.New()
temp, err := os.CreateTemp(filepath.Dir(targetPath), c.Param("digest")+"-")
if err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
defer temp.Close()
defer os.Remove(temp.Name())
if _, err := io.Copy(temp, io.TeeReader(c.Request.Body, hash)); err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
if fmt.Sprintf("sha256:%x", hash.Sum(nil)) != c.Param("digest") {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "digest does not match body"})
return
}
if err := temp.Close(); err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
if err := os.Rename(temp.Name(), targetPath); err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.Status(http.StatusCreated)
}
var defaultAllowOrigins = []string{ var defaultAllowOrigins = []string{
"localhost", "localhost",
"127.0.0.1", "127.0.0.1",
@ -684,6 +763,8 @@ func Serve(ln net.Listener, allowOrigins []string) error {
r.POST("/api/copy", CopyModelHandler) r.POST("/api/copy", CopyModelHandler)
r.DELETE("/api/delete", DeleteModelHandler) r.DELETE("/api/delete", DeleteModelHandler)
r.POST("/api/show", ShowModelHandler) r.POST("/api/show", ShowModelHandler)
r.POST("/api/blobs/:digest", CreateBlobHandler)
r.HEAD("/api/blobs/:digest", HeadBlobHandler)
for _, method := range []string{http.MethodGet, http.MethodHead} { for _, method := range []string{http.MethodGet, http.MethodHead} {
r.Handle(method, "/", func(c *gin.Context) { r.Handle(method, "/", func(c *gin.Context) {
@ -713,7 +794,7 @@ func Serve(ln net.Listener, allowOrigins []string) error {
if runtime.GOOS == "linux" { if runtime.GOOS == "linux" {
// check compatibility to log warnings // check compatibility to log warnings
if _, err := llm.CheckVRAM(); err != nil { if _, err := llm.CheckVRAM(); err != nil {
log.Printf("Warning: GPU support may not be enabled, check you have installed GPU drivers: %v", err) log.Printf(err.Error())
} }
} }

View file

@ -5,9 +5,9 @@ import (
"crypto/md5" "crypto/md5"
"errors" "errors"
"fmt" "fmt"
"hash"
"io" "io"
"log" "log"
"math"
"net/http" "net/http"
"net/url" "net/url"
"os" "os"
@ -35,6 +35,8 @@ type blobUpload struct {
context.CancelFunc context.CancelFunc
file *os.File
done bool done bool
err error err error
references atomic.Int32 references atomic.Int32
@ -42,8 +44,8 @@ type blobUpload struct {
const ( const (
numUploadParts = 64 numUploadParts = 64
minUploadPartSize int64 = 95 * 1000 * 1000 minUploadPartSize int64 = 100 * format.MegaByte
maxUploadPartSize int64 = 1000 * 1000 * 1000 maxUploadPartSize int64 = 1000 * format.MegaByte
) )
func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *RegistryOptions) error { func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *RegistryOptions) error {
@ -55,7 +57,7 @@ func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *Reg
if b.From != "" { if b.From != "" {
values := requestURL.Query() values := requestURL.Query()
values.Add("mount", b.Digest) values.Add("mount", b.Digest)
values.Add("from", b.From) values.Add("from", ParseModelPath(b.From).GetNamespaceRepository())
requestURL.RawQuery = values.Encode() requestURL.RawQuery = values.Encode()
} }
@ -77,6 +79,14 @@ func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *Reg
b.Total = fi.Size() b.Total = fi.Size()
// http.StatusCreated indicates a blob has been mounted
// ref: https://distribution.github.io/distribution/spec/api/#cross-repository-blob-mount
if resp.StatusCode == http.StatusCreated {
b.Completed.Store(b.Total)
b.done = true
return nil
}
var size = b.Total / numUploadParts var size = b.Total / numUploadParts
switch { switch {
case size < minUploadPartSize: case size < minUploadPartSize:
@ -120,12 +130,12 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
return return
} }
f, err := os.Open(p) b.file, err = os.Open(p)
if err != nil { if err != nil {
b.err = err b.err = err
return return
} }
defer f.Close() defer b.file.Close()
g, inner := errgroup.WithContext(ctx) g, inner := errgroup.WithContext(ctx)
g.SetLimit(numUploadParts) g.SetLimit(numUploadParts)
@ -137,7 +147,6 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
g.Go(func() error { g.Go(func() error {
var err error var err error
for try := 0; try < maxRetries; try++ { for try := 0; try < maxRetries; try++ {
part.ReadSeeker = io.NewSectionReader(f, part.Offset, part.Size)
err = b.uploadChunk(inner, http.MethodPatch, requestURL, part, opts) err = b.uploadChunk(inner, http.MethodPatch, requestURL, part, opts)
switch { switch {
case errors.Is(err, context.Canceled): case errors.Is(err, context.Canceled):
@ -145,7 +154,10 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
case errors.Is(err, errMaxRetriesExceeded): case errors.Is(err, errMaxRetriesExceeded):
return err return err
case err != nil: case err != nil:
log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], part.N, try, err) part.Reset()
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
time.Sleep(sleep)
continue continue
} }
@ -165,8 +177,16 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
requestURL := <-b.nextURL requestURL := <-b.nextURL
var sb strings.Builder var sb strings.Builder
// calculate md5 checksum and add it to the commit request
for _, part := range b.Parts { for _, part := range b.Parts {
sb.Write(part.Sum(nil)) hash := md5.New()
if _, err := io.Copy(hash, io.NewSectionReader(b.file, part.Offset, part.Size)); err != nil {
b.err = err
return
}
sb.Write(hash.Sum(nil))
} }
md5sum := md5.Sum([]byte(sb.String())) md5sum := md5.Sum([]byte(sb.String()))
@ -180,29 +200,39 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
headers.Set("Content-Type", "application/octet-stream") headers.Set("Content-Type", "application/octet-stream")
headers.Set("Content-Length", "0") headers.Set("Content-Length", "0")
for try := 0; try < maxRetries; try++ {
resp, err := makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, nil, opts) resp, err := makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, nil, opts)
if err != nil { if err != nil {
b.err = err b.err = err
if errors.Is(err, context.Canceled) {
return return
} }
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
log.Printf("%s complete upload attempt %d failed: %v, retrying in %s", b.Digest[7:19], try, err, sleep)
time.Sleep(sleep)
continue
}
defer resp.Body.Close() defer resp.Body.Close()
b.err = nil
b.done = true b.done = true
return
}
} }
func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL *url.URL, part *blobUploadPart, opts *RegistryOptions) error { func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL *url.URL, part *blobUploadPart, opts *RegistryOptions) error {
part.Reset()
headers := make(http.Header) headers := make(http.Header)
headers.Set("Content-Type", "application/octet-stream") headers.Set("Content-Type", "application/octet-stream")
headers.Set("Content-Length", fmt.Sprintf("%d", part.Size)) headers.Set("Content-Length", fmt.Sprintf("%d", part.Size))
headers.Set("X-Redirect-Uploads", "1")
if method == http.MethodPatch { if method == http.MethodPatch {
headers.Set("X-Redirect-Uploads", "1")
headers.Set("Content-Range", fmt.Sprintf("%d-%d", part.Offset, part.Offset+part.Size-1)) headers.Set("Content-Range", fmt.Sprintf("%d-%d", part.Offset, part.Offset+part.Size-1))
} }
resp, err := makeRequest(ctx, method, requestURL, headers, io.TeeReader(part.ReadSeeker, io.MultiWriter(part, part.Hash)), opts) sr := io.NewSectionReader(b.file, part.Offset, part.Size)
resp, err := makeRequest(ctx, method, requestURL, headers, io.TeeReader(sr, part), opts)
if err != nil { if err != nil {
return err return err
} }
@ -227,6 +257,7 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
return err return err
} }
// retry uploading to the redirect URL
for try := 0; try < maxRetries; try++ { for try := 0; try < maxRetries; try++ {
err = b.uploadChunk(ctx, http.MethodPut, redirectURL, part, nil) err = b.uploadChunk(ctx, http.MethodPut, redirectURL, part, nil)
switch { switch {
@ -235,7 +266,10 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
case errors.Is(err, errMaxRetriesExceeded): case errors.Is(err, errMaxRetriesExceeded):
return err return err
case err != nil: case err != nil:
log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], part.N, try, err) part.Reset()
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
time.Sleep(sleep)
continue continue
} }
@ -260,7 +294,7 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
return err return err
} }
return fmt.Errorf("http status %d %s: %s", resp.StatusCode, resp.Status, body) return fmt.Errorf("http status %s: %s", resp.Status, body)
} }
if method == http.MethodPatch { if method == http.MethodPatch {
@ -293,7 +327,7 @@ func (b *blobUpload) Wait(ctx context.Context, fn func(api.ProgressResponse)) er
} }
fn(api.ProgressResponse{ fn(api.ProgressResponse{
Status: fmt.Sprintf("uploading %s", b.Digest), Status: fmt.Sprintf("pushing %s", b.Digest[7:19]),
Digest: b.Digest, Digest: b.Digest,
Total: b.Total, Total: b.Total,
Completed: b.Completed.Load(), Completed: b.Completed.Load(),
@ -310,11 +344,7 @@ type blobUploadPart struct {
N int N int
Offset int64 Offset int64
Size int64 Size int64
hash.Hash
written int64 written int64
io.ReadSeeker
*blobUpload *blobUpload
} }
@ -326,10 +356,8 @@ func (p *blobUploadPart) Write(b []byte) (n int, err error) {
} }
func (p *blobUploadPart) Reset() { func (p *blobUploadPart) Reset() {
p.Seek(0, io.SeekStart)
p.Completed.Add(-int64(p.written)) p.Completed.Add(-int64(p.written))
p.written = 0 p.written = 0
p.Hash = md5.New()
} }
func uploadBlob(ctx context.Context, mp ModelPath, layer *Layer, opts *RegistryOptions, fn func(api.ProgressResponse)) error { func uploadBlob(ctx context.Context, mp ModelPath, layer *Layer, opts *RegistryOptions, fn func(api.ProgressResponse)) error {
@ -344,7 +372,7 @@ func uploadBlob(ctx context.Context, mp ModelPath, layer *Layer, opts *RegistryO
default: default:
defer resp.Body.Close() defer resp.Body.Close()
fn(api.ProgressResponse{ fn(api.ProgressResponse{
Status: fmt.Sprintf("uploading %s", layer.Digest), Status: fmt.Sprintf("pushing %s", layer.Digest[7:19]),
Digest: layer.Digest, Digest: layer.Digest,
Total: layer.Size, Total: layer.Size,
Completed: layer.Size, Completed: layer.Size,