Merge branch 'main' into install-instructions-archlinux

This commit is contained in:
Matt Williams 2023-11-21 06:32:50 -08:00 committed by GitHub
commit 5ebcde1541
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
27 changed files with 733 additions and 1557 deletions

View file

@ -6,3 +6,4 @@ scripts
llm/llama.cpp/ggml llm/llama.cpp/ggml
llm/llama.cpp/gguf llm/llama.cpp/gguf
.env .env
.cache

1
.gitignore vendored
View file

@ -6,3 +6,4 @@
dist dist
ollama ollama
ggml-metal.metal ggml-metal.metal
.cache

View file

@ -216,6 +216,10 @@ See the [API documentation](./docs/api.md) for all endpoints.
## Community Integrations ## Community Integrations
### Mobile
- [Mobile Artificial Intelligence Distribution](https://github.com/MaidFoundation/Maid) (Maid)
### Web & Desktop ### Web & Desktop
- [HTML UI](https://github.com/rtcfirefly/ollama-ui) - [HTML UI](https://github.com/rtcfirefly/ollama-ui)
@ -234,12 +238,15 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Emacs client](https://github.com/zweifisch/ollama) - [Emacs client](https://github.com/zweifisch/ollama)
- [gen.nvim](https://github.com/David-Kunz/gen.nvim) - [gen.nvim](https://github.com/David-Kunz/gen.nvim)
- [ollama.nvim](https://github.com/nomnivore/ollama.nvim) - [ollama.nvim](https://github.com/nomnivore/ollama.nvim)
- [ogpt.nvim](https://github.com/huynle/ogpt.nvim)
- [gptel Emacs client](https://github.com/karthink/gptel) - [gptel Emacs client](https://github.com/karthink/gptel)
- [ollama package for archlinux](https://archlinux.org/packages/extra/x86_64/ollama/) - [ollama package for archlinux](https://archlinux.org/packages/extra/x86_64/ollama/)
- [Oatmeal](https://github.com/dustinblackman/oatmeal)
### Libraries ### Libraries
- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa) - [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
- [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html) - [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
- [LiteLLM](https://github.com/BerriAI/litellm) - [LiteLLM](https://github.com/BerriAI/litellm)
- [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp) - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
@ -248,6 +255,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama) - [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
- [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit) - [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
- [Ollama for Dart](https://github.com/breitburg/dart-ollama) - [Ollama for Dart](https://github.com/breitburg/dart-ollama)
- [Ollama for Laravel](https://github.com/cloudstudio/ollama-laravel)
### Mobile ### Mobile
@ -263,3 +271,4 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Dagger Chatbot](https://github.com/samalba/dagger-chatbot) - [Dagger Chatbot](https://github.com/samalba/dagger-chatbot)
- [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot) - [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)
- [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation) - [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)

View file

@ -30,7 +30,7 @@ import (
"github.com/jmorganca/ollama/api" "github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/format" "github.com/jmorganca/ollama/format"
"github.com/jmorganca/ollama/parser" "github.com/jmorganca/ollama/parser"
"github.com/jmorganca/ollama/progressbar" "github.com/jmorganca/ollama/progress"
"github.com/jmorganca/ollama/readline" "github.com/jmorganca/ollama/readline"
"github.com/jmorganca/ollama/server" "github.com/jmorganca/ollama/server"
"github.com/jmorganca/ollama/version" "github.com/jmorganca/ollama/version"
@ -48,14 +48,16 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
return err return err
} }
p := progress.NewProgress(os.Stderr)
defer p.Stop()
bars := make(map[string]*progress.Bar)
modelfile, err := os.ReadFile(filename) modelfile, err := os.ReadFile(filename)
if err != nil { if err != nil {
return err return err
} }
spinner := NewSpinner("transferring context")
go spinner.Spin(100 * time.Millisecond)
commands, err := parser.Parse(bytes.NewReader(modelfile)) commands, err := parser.Parse(bytes.NewReader(modelfile))
if err != nil { if err != nil {
return err return err
@ -66,6 +68,10 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
return err return err
} }
status := "transferring model data"
spinner := progress.NewSpinner(status)
p.Add(status, spinner)
for _, c := range commands { for _, c := range commands {
switch c.Name { switch c.Name {
case "model", "adapter": case "model", "adapter":
@ -76,6 +82,10 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
path = filepath.Join(home, path[2:]) path = filepath.Join(home, path[2:])
} }
if !filepath.IsAbs(path) {
path = filepath.Join(filepath.Dir(filename), path)
}
bin, err := os.Open(path) bin, err := os.Open(path)
if errors.Is(err, os.ErrNotExist) && c.Name == "model" { if errors.Is(err, os.ErrNotExist) && c.Name == "model" {
continue continue
@ -99,41 +109,34 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
} }
} }
var currentDigest string
var bar *progressbar.ProgressBar
request := api.CreateRequest{Name: args[0], Path: filename, Modelfile: string(modelfile)}
fn := func(resp api.ProgressResponse) error { fn := func(resp api.ProgressResponse) error {
if resp.Digest != currentDigest && resp.Digest != "" { if resp.Digest != "" {
spinner.Stop() spinner.Stop()
currentDigest = resp.Digest
// pulling bar, ok := bars[resp.Digest]
bar = progressbar.DefaultBytes( if !ok {
resp.Total, bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
resp.Status, bars[resp.Digest] = bar
) p.Add(resp.Digest, bar)
bar.Set64(resp.Completed) }
} else if resp.Digest == currentDigest && resp.Digest != "" {
bar.Set64(resp.Completed) bar.Set(resp.Completed)
} else { } else if status != resp.Status {
currentDigest = ""
spinner.Stop() spinner.Stop()
spinner = NewSpinner(resp.Status)
go spinner.Spin(100 * time.Millisecond) status = resp.Status
spinner = progress.NewSpinner(status)
p.Add(status, spinner)
} }
return nil return nil
} }
request := api.CreateRequest{Name: args[0], Modelfile: string(modelfile)}
if err := client.Create(context.Background(), &request, fn); err != nil { if err := client.Create(context.Background(), &request, fn); err != nil {
return err return err
} }
spinner.Stop()
if spinner.description != "success" {
return errors.New("unexpected end to create model")
}
return nil return nil
} }
@ -170,36 +173,46 @@ func PushHandler(cmd *cobra.Command, args []string) error {
return err return err
} }
var currentDigest string p := progress.NewProgress(os.Stderr)
var bar *progressbar.ProgressBar defer p.Stop()
bars := make(map[string]*progress.Bar)
var status string
var spinner *progress.Spinner
request := api.PushRequest{Name: args[0], Insecure: insecure}
fn := func(resp api.ProgressResponse) error { fn := func(resp api.ProgressResponse) error {
if resp.Digest != currentDigest && resp.Digest != "" { if resp.Digest != "" {
currentDigest = resp.Digest if spinner != nil {
bar = progressbar.DefaultBytes( spinner.Stop()
resp.Total, }
fmt.Sprintf("pushing %s...", resp.Digest[7:19]),
)
bar.Set64(resp.Completed) bar, ok := bars[resp.Digest]
} else if resp.Digest == currentDigest && resp.Digest != "" { if !ok {
bar.Set64(resp.Completed) bar = progress.NewBar(fmt.Sprintf("pushing %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
} else { bars[resp.Digest] = bar
currentDigest = "" p.Add(resp.Digest, bar)
fmt.Println(resp.Status) }
bar.Set(resp.Completed)
} else if status != resp.Status {
if spinner != nil {
spinner.Stop()
}
status = resp.Status
spinner = progress.NewSpinner(status)
p.Add(status, spinner)
} }
return nil return nil
} }
request := api.PushRequest{Name: args[0], Insecure: insecure}
if err := client.Push(context.Background(), &request, fn); err != nil { if err := client.Push(context.Background(), &request, fn); err != nil {
return err return err
} }
if bar != nil && !bar.IsFinished() { spinner.Stop()
return errors.New("unexpected end to push model")
}
return nil return nil
} }
@ -350,46 +363,51 @@ func PullHandler(cmd *cobra.Command, args []string) error {
return err return err
} }
return pull(args[0], insecure)
}
func pull(model string, insecure bool) error {
client, err := api.ClientFromEnvironment() client, err := api.ClientFromEnvironment()
if err != nil { if err != nil {
return err return err
} }
var currentDigest string p := progress.NewProgress(os.Stderr)
var bar *progressbar.ProgressBar defer p.Stop()
bars := make(map[string]*progress.Bar)
var status string
var spinner *progress.Spinner
request := api.PullRequest{Name: model, Insecure: insecure}
fn := func(resp api.ProgressResponse) error { fn := func(resp api.ProgressResponse) error {
if resp.Digest != currentDigest && resp.Digest != "" { if resp.Digest != "" {
currentDigest = resp.Digest if spinner != nil {
bar = progressbar.DefaultBytes( spinner.Stop()
resp.Total, }
fmt.Sprintf("pulling %s...", resp.Digest[7:19]),
)
bar.Set64(resp.Completed) bar, ok := bars[resp.Digest]
} else if resp.Digest == currentDigest && resp.Digest != "" { if !ok {
bar.Set64(resp.Completed) bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
} else { bars[resp.Digest] = bar
currentDigest = "" p.Add(resp.Digest, bar)
fmt.Println(resp.Status) }
bar.Set(resp.Completed)
} else if status != resp.Status {
if spinner != nil {
spinner.Stop()
}
status = resp.Status
spinner = progress.NewSpinner(status)
p.Add(status, spinner)
} }
return nil return nil
} }
request := api.PullRequest{Name: args[0], Insecure: insecure}
if err := client.Pull(context.Background(), &request, fn); err != nil { if err := client.Pull(context.Background(), &request, fn); err != nil {
return err return err
} }
if bar != nil && !bar.IsFinished() {
return errors.New("unexpected end to pull model")
}
return nil return nil
} }
@ -442,8 +460,11 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st
return err return err
} }
spinner := NewSpinner("") p := progress.NewProgress(os.Stderr)
go spinner.Spin(60 * time.Millisecond) defer p.StopAndClear()
spinner := progress.NewSpinner("")
p.Add("", spinner)
var latest api.GenerateResponse var latest api.GenerateResponse
@ -475,9 +496,7 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st
request := api.GenerateRequest{Model: model, Prompt: prompt, Context: generateContext, Format: format} request := api.GenerateRequest{Model: model, Prompt: prompt, Context: generateContext, Format: format}
fn := func(response api.GenerateResponse) error { fn := func(response api.GenerateResponse) error {
if !spinner.IsFinished() { p.StopAndClear()
spinner.Finish()
}
latest = response latest = response
@ -511,7 +530,6 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st
if err := client.Generate(cancelCtx, &request, fn); err != nil { if err := client.Generate(cancelCtx, &request, fn); err != nil {
if strings.Contains(err.Error(), "context canceled") && abort { if strings.Contains(err.Error(), "context canceled") && abort {
spinner.Finish()
return nil return nil
} }
return err return err

View file

@ -1,44 +0,0 @@
package cmd
import (
"fmt"
"os"
"time"
"github.com/jmorganca/ollama/progressbar"
)
type Spinner struct {
description string
*progressbar.ProgressBar
}
func NewSpinner(description string) *Spinner {
return &Spinner{
description: description,
ProgressBar: progressbar.NewOptions(-1,
progressbar.OptionSetWriter(os.Stderr),
progressbar.OptionThrottle(60*time.Millisecond),
progressbar.OptionSpinnerType(14),
progressbar.OptionSetRenderBlankState(true),
progressbar.OptionSetElapsedTime(false),
progressbar.OptionClearOnFinish(),
progressbar.OptionSetDescription(description),
),
}
}
func (s *Spinner) Spin(tick time.Duration) {
for range time.Tick(tick) {
if s.IsFinished() {
break
}
s.Add(1)
}
}
func (s *Spinner) Stop() {
s.Finish()
fmt.Println(s.description)
}

View file

@ -51,7 +51,9 @@ Advanced parameters (optional):
### JSON mode ### JSON mode
Enable JSON mode by setting the `format` parameter to `json` and specifying the model should use JSON in the `prompt`. This will structure the response as valid JSON. See the JSON mode [example](#request-json-mode) below. Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as valid JSON. See the JSON mode [example](#request-json-mode) below.
> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
### Examples ### Examples

View file

@ -149,8 +149,8 @@ docker build -t ollama-with-ca .
docker run -d -e HTTPS_PROXY=https://my.proxy.example.com -p 11434:11434 ollama-with-ca docker run -d -e HTTPS_PROXY=https://my.proxy.example.com -p 11434:11434 ollama-with-ca
``` ```
### How do I use Ollama with GPU acceleration in Docker? ## How do I use Ollama with GPU acceleration in Docker?
The Ollama Docker container can be configured with GPU acceleration in Linux or Windows (with WSL2). This requires the [nvidia-container-toolkit](https://github.com/NVIDIA/nvidia-container-toolkit). The Ollama Docker container can be configured with GPU acceleration in Linux or Windows (with WSL2). This requires the [nvidia-container-toolkit](https://github.com/NVIDIA/nvidia-container-toolkit). See [ollama/ollama](https://hub.docker.com/r/ollama/ollama) for more details.
GPU acceleration is not available for Docker Desktop in macOS due to the lack of GPU passthrough and emulation. GPU acceleration is not available for Docker Desktop in macOS due to the lack of GPU passthrough and emulation.

View file

@ -41,6 +41,8 @@ INSTRUCTION arguments
## Examples ## Examples
### Basic `Modelfile`
An example of a `Modelfile` creating a mario blueprint: An example of a `Modelfile` creating a mario blueprint:
```modelfile ```modelfile
@ -63,6 +65,35 @@ To use this:
More examples are available in the [examples directory](../examples). More examples are available in the [examples directory](../examples).
### `Modelfile`s in [ollama.ai/library][1]
There are two ways to view `Modelfile`s underlying the models in [ollama.ai/library][1]:
- Option 1: view a details page from a model's tags page:
1. Go to a particular model's tags (e.g. https://ollama.ai/library/llama2/tags)
2. Click on a tag (e.g. https://ollama.ai/library/llama2:13b)
3. Scroll down to "Layers"
- Note: if the [`FROM` instruction](#from-required) is not present,
it means the model was created from a local file
- Option 2: use `ollama show` to print the `Modelfile` like so:
```bash
> ollama show --modelfile llama2:13b
# Modelfile generated by "ollama show"
# To build a new Modelfile based on this one, replace the FROM line with:
# FROM llama2:13b
FROM /root/.ollama/models/blobs/sha256:123abc
TEMPLATE """[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>
{{ end }}{{ .Prompt }} [/INST] """
SYSTEM """"""
PARAMETER stop [INST]
PARAMETER stop [/INST]
PARAMETER stop <<SYS>>
PARAMETER stop <</SYS>>
```
## Instructions ## Instructions
### FROM (Required) ### FROM (Required)
@ -177,3 +208,5 @@ LICENSE """
- the **`Modelfile` is not case sensitive**. In the examples, we use uppercase for instructions to make it easier to distinguish it from arguments. - the **`Modelfile` is not case sensitive**. In the examples, we use uppercase for instructions to make it easier to distinguish it from arguments.
- Instructions can be in any order. In the examples, we start with FROM instruction to keep it easily readable. - Instructions can be in any order. In the examples, we start with FROM instruction to keep it easily readable.
[1]: https://ollama.ai/library

View file

@ -0,0 +1,5 @@
# Ollama Jupyter Notebook
This example downloads and installs Ollama in a Jupyter instance such as Google Colab. It will start the Ollama service and expose an endpoint using `ngrok` which can be used to communicate with the Ollama instance remotely.
For best results, use an instance with GPU accelerator.

View file

@ -0,0 +1,102 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "93f59dcb-c588-41b8-a792-55d88ade739c",
"metadata": {},
"outputs": [],
"source": [
"# Download and run the Ollama Linux install script\n",
"!curl https://ollama.ai/install.sh | sh\n",
"!command -v systemctl >/dev/null && sudo systemctl stop ollama"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "658c147e-c7f8-490e-910e-62b80f577dda",
"metadata": {},
"outputs": [],
"source": [
"!pip install aiohttp pyngrok\n",
"\n",
"import os\n",
"import asyncio\n",
"from aiohttp import ClientSession\n",
"\n",
"# Set LD_LIBRARY_PATH so the system NVIDIA library becomes preferred\n",
"# over the built-in library. This is particularly important for \n",
"# Google Colab which installs older drivers\n",
"os.environ.update({'LD_LIBRARY_PATH': '/usr/lib64-nvidia'})\n",
"\n",
"async def run(cmd):\n",
" '''\n",
" run is a helper function to run subcommands asynchronously.\n",
" '''\n",
" print('>>> starting', *cmd)\n",
" p = await asyncio.subprocess.create_subprocess_exec(\n",
" *cmd,\n",
" stdout=asyncio.subprocess.PIPE,\n",
" stderr=asyncio.subprocess.PIPE,\n",
" )\n",
"\n",
" async def pipe(lines):\n",
" async for line in lines:\n",
" print(line.strip().decode('utf-8'))\n",
"\n",
" await asyncio.gather(\n",
" pipe(p.stdout),\n",
" pipe(p.stderr),\n",
" )\n",
"\n",
"\n",
"await asyncio.gather(\n",
" run(['ollama', 'serve']),\n",
" run(['ngrok', 'http', '--log', 'stderr', '11434']),\n",
")"
]
},
{
"cell_type": "markdown",
"id": "e7735a55-9aad-4caf-8683-52e2163ba53b",
"metadata": {},
"source": [
"The previous cell starts two processes, `ollama` and `ngrok`. The log output will show a line like the following which describes the external address.\n",
"\n",
"```\n",
"t=2023-11-12T22:55:56+0000 lvl=info msg=\"started tunnel\" obj=tunnels name=command_line addr=http://localhost:11434 url=https://8249-34-125-179-11.ngrok.io\n",
"```\n",
"\n",
"The external address in this case is `https://8249-34-125-179-11.ngrok.io` which can be passed into `OLLAMA_HOST` to access this instance.\n",
"\n",
"```bash\n",
"export OLLAMA_HOST=https://8249-34-125-179-11.ngrok.io\n",
"ollama list\n",
"ollama run mistral\n",
"```"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -1,23 +1,45 @@
package format package format
import "fmt" import (
"fmt"
"math"
)
const ( const (
Byte = 1 Byte = 1
KiloByte = Byte * 1000 KiloByte = Byte * 1000
MegaByte = KiloByte * 1000 MegaByte = KiloByte * 1000
GigaByte = MegaByte * 1000 GigaByte = MegaByte * 1000
TeraByte = GigaByte * 1000
) )
func HumanBytes(b int64) string { func HumanBytes(b int64) string {
var value float64
var unit string
switch { switch {
case b > GigaByte: case b >= TeraByte:
return fmt.Sprintf("%.1f GB", float64(b)/GigaByte) value = float64(b) / TeraByte
case b > MegaByte: unit = "TB"
return fmt.Sprintf("%.1f MB", float64(b)/MegaByte) case b >= GigaByte:
case b > KiloByte: value = float64(b) / GigaByte
return fmt.Sprintf("%.1f KB", float64(b)/KiloByte) unit = "GB"
case b >= MegaByte:
value = float64(b) / MegaByte
unit = "MB"
case b >= KiloByte:
value = float64(b) / KiloByte
unit = "KB"
default: default:
return fmt.Sprintf("%d B", b) return fmt.Sprintf("%d B", b)
} }
switch {
case value >= 100:
return fmt.Sprintf("%d %s", int(value), unit)
case value != math.Trunc(value):
return fmt.Sprintf("%.1f %s", value, unit)
default:
return fmt.Sprintf("%d %s", int(value), unit)
}
} }

View file

@ -7,13 +7,13 @@ package llm
//go:generate git -C ggml apply ../patches/0002-34B-model-support.patch //go:generate git -C ggml apply ../patches/0002-34B-model-support.patch
//go:generate git -C ggml apply ../patches/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch //go:generate git -C ggml apply ../patches/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
//go:generate git -C ggml apply ../patches/0004-metal-add-missing-barriers-for-mul-mat-2699.patch //go:generate git -C ggml apply ../patches/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 //go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate cmake --build ggml/build/cpu --target server --config Release //go:generate cmake --build ggml/build/cpu --target server --config Release
//go:generate mv ggml/build/cpu/bin/server ggml/build/cpu/bin/ollama-runner //go:generate mv ggml/build/cpu/bin/server ggml/build/cpu/bin/ollama-runner
//go:generate git submodule update --force gguf //go:generate git submodule update --force gguf
//go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch //go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch
//go:generate git -C gguf apply ../patches/0001-metal-handle-ggml_scale-for-n-4-0-close-3754.patch //go:generate git -C gguf apply ../patches/0001-metal-handle-ggml_scale-for-n-4-0-close-3754.patch
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_METAL=off -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off //go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_METAL=off -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on
//go:generate cmake --build gguf/build/cpu --target server --config Release //go:generate cmake --build gguf/build/cpu --target server --config Release
//go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner //go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner

View file

@ -226,7 +226,7 @@ type llama struct {
} }
var ( var (
errNvidiaSMI = errors.New("nvidia-smi command failed") errNvidiaSMI = errors.New("warning: gpu support may not be enabled, check that you have installed GPU drivers: nvidia-smi command failed")
errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only") errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only")
) )
@ -343,6 +343,10 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
"--embedding", "--embedding",
} }
if opts.MainGPU > 0 {
params = append(params, "--main-gpu", fmt.Sprintf("%d", opts.MainGPU))
}
if opts.RopeFrequencyBase > 0 { if opts.RopeFrequencyBase > 0 {
params = append(params, "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase)) params = append(params, "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase))
} }
@ -544,6 +548,7 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
"stream": true, "stream": true,
"n_predict": llm.NumPredict, "n_predict": llm.NumPredict,
"n_keep": llm.NumKeep, "n_keep": llm.NumKeep,
"main_gpu": llm.MainGPU,
"temperature": llm.Temperature, "temperature": llm.Temperature,
"top_k": llm.TopK, "top_k": llm.TopK,
"top_p": llm.TopP, "top_p": llm.TopP,

View file

@ -41,20 +41,13 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
if runtime.GOOS == "darwin" { if runtime.GOOS == "darwin" {
switch ggml.FileType() { switch ggml.FileType() {
case "Q8_0": case "F32", "Q5_0", "Q5_1", "Q8_0":
if ggml.Name() != "gguf" && opts.NumGPU != 0 { if ggml.Name() != "gguf" && opts.NumGPU != 0 {
// GGML Q8_0 do not support Metal API and will // GGML Q8_0 do not support Metal API and will
// cause the runner to segmentation fault so disable GPU // cause the runner to segmentation fault so disable GPU
log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0") log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
opts.NumGPU = 0 opts.NumGPU = 0
} }
case "F32", "Q5_0", "Q5_1":
if opts.NumGPU != 0 {
// F32, Q5_0, Q5_1, and Q8_0 do not support Metal API and will
// cause the runner to segmentation fault so disable GPU
log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
opts.NumGPU = 0
}
} }
var requiredMemory int64 var requiredMemory int64

155
progress/bar.go Normal file
View file

@ -0,0 +1,155 @@
package progress
import (
"fmt"
"math"
"os"
"strings"
"time"
"github.com/jmorganca/ollama/format"
"golang.org/x/term"
)
type Stats struct {
rate int64
value int64
remaining time.Duration
}
type Bar struct {
message string
messageWidth int
maxValue int64
initialValue int64
currentValue int64
started time.Time
stats Stats
statted time.Time
}
func NewBar(message string, maxValue, initialValue int64) *Bar {
return &Bar{
message: message,
messageWidth: -1,
maxValue: maxValue,
initialValue: initialValue,
currentValue: initialValue,
started: time.Now(),
}
}
func (b *Bar) String() string {
termWidth, _, err := term.GetSize(int(os.Stderr.Fd()))
if err != nil {
termWidth = 80
}
var pre, mid, suf strings.Builder
if b.message != "" {
message := strings.TrimSpace(b.message)
if b.messageWidth > 0 && len(message) > b.messageWidth {
message = message[:b.messageWidth]
}
fmt.Fprintf(&pre, "%s", message)
if b.messageWidth-pre.Len() >= 0 {
pre.WriteString(strings.Repeat(" ", b.messageWidth-pre.Len()))
}
pre.WriteString(" ")
}
fmt.Fprintf(&pre, "%3.0f%% ", math.Floor(b.percent()))
fmt.Fprintf(&suf, "(%s/%s", format.HumanBytes(b.currentValue), format.HumanBytes(b.maxValue))
stats := b.Stats()
rate := int64(stats.rate)
if rate > 0 {
fmt.Fprintf(&suf, ", %s/s", format.HumanBytes(rate))
}
fmt.Fprintf(&suf, ")")
elapsed := time.Since(b.started)
if b.percent() < 100 && rate > 0 {
fmt.Fprintf(&suf, " [%s:%s]", elapsed.Round(time.Second), stats.remaining)
} else {
fmt.Fprintf(&suf, " ")
}
mid.WriteString("▕")
// add 3 extra spaces: 2 boundary characters and 1 space at the end
f := termWidth - pre.Len() - suf.Len() - 3
n := int(float64(f) * b.percent() / 100)
if n > 0 {
mid.WriteString(strings.Repeat("█", n))
}
if f-n > 0 {
mid.WriteString(strings.Repeat(" ", f-n))
}
mid.WriteString("▏")
return pre.String() + mid.String() + suf.String()
}
func (b *Bar) Set(value int64) {
if value >= b.maxValue {
value = b.maxValue
}
b.currentValue = value
}
func (b *Bar) percent() float64 {
if b.maxValue > 0 {
return float64(b.currentValue) / float64(b.maxValue) * 100
}
return 0
}
func (b *Bar) Stats() Stats {
if time.Since(b.statted) < time.Second {
return b.stats
}
switch {
case b.statted.IsZero():
b.stats = Stats{
value: b.initialValue,
rate: 0,
remaining: 0,
}
case b.currentValue >= b.maxValue:
b.stats = Stats{
value: b.maxValue,
rate: 0,
remaining: 0,
}
default:
rate := b.currentValue - b.stats.value
var remaining time.Duration
if rate > 0 {
remaining = time.Second * time.Duration((float64(b.maxValue-b.currentValue))/(float64(rate)))
}
b.stats = Stats{
value: b.currentValue,
rate: rate,
remaining: remaining,
}
}
b.statted = time.Now()
return b.stats
}

113
progress/progress.go Normal file
View file

@ -0,0 +1,113 @@
package progress
import (
"fmt"
"io"
"sync"
"time"
)
type State interface {
String() string
}
type Progress struct {
mu sync.Mutex
w io.Writer
pos int
ticker *time.Ticker
states []State
}
func NewProgress(w io.Writer) *Progress {
p := &Progress{w: w}
go p.start()
return p
}
func (p *Progress) stop() bool {
for _, state := range p.states {
if spinner, ok := state.(*Spinner); ok {
spinner.Stop()
}
}
if p.ticker != nil {
p.ticker.Stop()
p.ticker = nil
p.render()
return true
}
return false
}
func (p *Progress) Stop() bool {
stopped := p.stop()
if stopped {
fmt.Fprint(p.w, "\n")
}
return stopped
}
func (p *Progress) StopAndClear() bool {
fmt.Fprint(p.w, "\033[?25l")
defer fmt.Fprint(p.w, "\033[?25h")
stopped := p.stop()
if stopped {
// clear all progress lines
for i := 0; i < p.pos; i++ {
if i > 0 {
fmt.Fprint(p.w, "\033[A")
}
fmt.Fprint(p.w, "\033[2K\033[1G")
}
}
return stopped
}
func (p *Progress) Add(key string, state State) {
p.mu.Lock()
defer p.mu.Unlock()
p.states = append(p.states, state)
}
func (p *Progress) render() error {
p.mu.Lock()
defer p.mu.Unlock()
fmt.Fprint(p.w, "\033[?25l")
defer fmt.Fprint(p.w, "\033[?25h")
// clear already rendered progress lines
for i := 0; i < p.pos; i++ {
if i > 0 {
fmt.Fprint(p.w, "\033[A")
}
fmt.Fprint(p.w, "\033[2K\033[1G")
}
// render progress lines
for i, state := range p.states {
fmt.Fprint(p.w, state.String())
if i < len(p.states)-1 {
fmt.Fprint(p.w, "\n")
}
}
p.pos = len(p.states)
return nil
}
func (p *Progress) start() {
p.ticker = time.NewTicker(100 * time.Millisecond)
for range p.ticker.C {
p.render()
}
}

73
progress/spinner.go Normal file
View file

@ -0,0 +1,73 @@
package progress
import (
"fmt"
"strings"
"time"
)
type Spinner struct {
message string
messageWidth int
parts []string
value int
ticker *time.Ticker
started time.Time
stopped time.Time
}
func NewSpinner(message string) *Spinner {
s := &Spinner{
message: message,
parts: []string{
"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏",
},
started: time.Now(),
}
go s.start()
return s
}
func (s *Spinner) String() string {
var sb strings.Builder
if len(s.message) > 0 {
message := strings.TrimSpace(s.message)
if s.messageWidth > 0 && len(message) > s.messageWidth {
message = message[:s.messageWidth]
}
fmt.Fprintf(&sb, "%s", message)
if s.messageWidth-sb.Len() >= 0 {
sb.WriteString(strings.Repeat(" ", s.messageWidth-sb.Len()))
}
sb.WriteString(" ")
}
if s.stopped.IsZero() {
spinner := s.parts[s.value]
sb.WriteString(spinner)
sb.WriteString(" ")
}
return sb.String()
}
func (s *Spinner) start() {
s.ticker = time.NewTicker(100 * time.Millisecond)
for range s.ticker.C {
s.value = (s.value + 1) % len(s.parts)
if !s.stopped.IsZero() {
return
}
}
}
func (s *Spinner) Stop() {
if s.stopped.IsZero() {
s.stopped = time.Now()
}
}

View file

@ -1,21 +0,0 @@
MIT License
Copyright (c) 2017 Zack
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View file

@ -1,121 +0,0 @@
# progressbar
[![CI](https://github.com/schollz/progressbar/actions/workflows/ci.yml/badge.svg?branch=main&event=push)](https://github.com/schollz/progressbar/actions/workflows/ci.yml)
[![go report card](https://goreportcard.com/badge/github.com/schollz/progressbar)](https://goreportcard.com/report/github.com/schollz/progressbar)
[![coverage](https://img.shields.io/badge/coverage-84%25-brightgreen.svg)](https://gocover.io/github.com/schollz/progressbar)
[![godocs](https://godoc.org/github.com/schollz/progressbar?status.svg)](https://godoc.org/github.com/schollz/progressbar/v3)
A very simple thread-safe progress bar which should work on every OS without problems. I needed a progressbar for [croc](https://github.com/schollz/croc) and everything I tried had problems, so I made another one. In order to be OS agnostic I do not plan to support [multi-line outputs](https://github.com/schollz/progressbar/issues/6).
## Install
```
go get -u github.com/schollz/progressbar/v3
```
## Usage
### Basic usage
```golang
bar := progressbar.Default(100)
for i := 0; i < 100; i++ {
bar.Add(1)
time.Sleep(40 * time.Millisecond)
}
```
which looks like:
![Example of basic bar](examples/basic/basic.gif)
### I/O operations
The `progressbar` implements an `io.Writer` so it can automatically detect the number of bytes written to a stream, so you can use it as a progressbar for an `io.Reader`.
```golang
req, _ := http.NewRequest("GET", "https://dl.google.com/go/go1.14.2.src.tar.gz", nil)
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
f, _ := os.OpenFile("go1.14.2.src.tar.gz", os.O_CREATE|os.O_WRONLY, 0644)
defer f.Close()
bar := progressbar.DefaultBytes(
resp.ContentLength,
"downloading",
)
io.Copy(io.MultiWriter(f, bar), resp.Body)
```
which looks like:
![Example of download bar](examples/download/download.gif)
### Progress bar with unknown length
A progressbar with unknown length is a spinner. Any bar with -1 length will automatically convert it to a spinner with a customizable spinner type. For example, the above code can be run and set the `resp.ContentLength` to `-1`.
which looks like:
![Example of download bar with unknown length](examples/download-unknown/download-unknown.gif)
### Customization
There is a lot of customization that you can do - change the writer, the color, the width, description, theme, etc. See [all the options](https://pkg.go.dev/github.com/schollz/progressbar/v3?tab=doc#Option).
```golang
bar := progressbar.NewOptions(1000,
progressbar.OptionSetWriter(ansi.NewAnsiStdout()),
progressbar.OptionEnableColorCodes(true),
progressbar.OptionShowBytes(true),
progressbar.OptionSetWidth(15),
progressbar.OptionSetDescription("[cyan][1/3][reset] Writing moshable file..."),
progressbar.OptionSetTheme(progressbar.Theme{
Saucer: "[green]=[reset]",
SaucerHead: "[green]>[reset]",
SaucerPadding: " ",
BarStart: "[",
BarEnd: "]",
}))
for i := 0; i < 1000; i++ {
bar.Add(1)
time.Sleep(5 * time.Millisecond)
}
```
which looks like:
![Example of customized bar](examples/customization/customization.gif)
## Contributing
Pull requests are welcome. Feel free to...
- Revise documentation
- Add new features
- Fix bugs
- Suggest improvements
## Thanks
Thanks [@Dynom](https://github.com/dynom) for massive improvements in version 2.0!
Thanks [@CrushedPixel](https://github.com/CrushedPixel) for adding descriptions and color code support!
Thanks [@MrMe42](https://github.com/MrMe42) for adding some minor features!
Thanks [@tehstun](https://github.com/tehstun) for some great PRs!
Thanks [@Benzammour](https://github.com/Benzammour) and [@haseth](https://github.com/haseth) for helping create v3!
Thanks [@briandowns](https://github.com/briandowns) for compiling the list of spinners.
## License
MIT

File diff suppressed because it is too large Load diff

View file

@ -1,80 +0,0 @@
package progressbar
var spinners = map[int][]string{
0: {"←", "↖", "↑", "↗", "→", "↘", "↓", "↙"},
1: {"▁", "▃", "▄", "▅", "▆", "▇", "█", "▇", "▆", "▅", "▄", "▃", "▁"},
2: {"▖", "▘", "▝", "▗"},
3: {"┤", "┘", "┴", "└", "├", "┌", "┬", "┐"},
4: {"◢", "◣", "◤", "◥"},
5: {"◰", "◳", "◲", "◱"},
6: {"◴", "◷", "◶", "◵"},
7: {"◐", "◓", "◑", "◒"},
8: {".", "o", "O", "@", "*"},
9: {"|", "/", "-", "\\"},
10: {"◡◡", "⊙⊙", "◠◠"},
11: {"⣾", "⣽", "⣻", "⢿", "⡿", "⣟", "⣯", "⣷"},
12: {">))'>", " >))'>", " >))'>", " >))'>", " >))'>", " <'((<", " <'((<", " <'((<"},
13: {"⠁", "⠂", "⠄", "⡀", "⢀", "⠠", "⠐", "⠈"},
14: {"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"},
15: {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"},
16: {"▉", "▊", "▋", "▌", "▍", "▎", "▏", "▎", "▍", "▌", "▋", "▊", "▉"},
17: {"■", "□", "▪", "▫"},
18: {"←", "↑", "→", "↓"},
19: {"╫", "╪"},
20: {"⇐", "⇖", "⇑", "⇗", "⇒", "⇘", "⇓", "⇙"},
21: {"⠁", "⠁", "⠉", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠤", "⠄", "⠄", "⠤", "⠠", "⠠", "⠤", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋", "⠉", "⠈", "⠈"},
22: {"⠈", "⠉", "⠋", "⠓", "⠒", "⠐", "⠐", "⠒", "⠖", "⠦", "⠤", "⠠", "⠠", "⠤", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋", "⠉", "⠈"},
23: {"⠁", "⠉", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠤", "⠄", "⠄", "⠤", "⠴", "⠲", "⠒", "⠂", "⠂", "⠒", "⠚", "⠙", "⠉", "⠁"},
24: {"⠋", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋"},
25: {"ヲ", "ァ", "ィ", "ゥ", "ェ", "ォ", "ャ", "ュ", "ョ", "ッ", "ア", "イ", "ウ", "エ", "オ", "カ", "キ", "ク", "ケ", "コ", "サ", "シ", "ス", "セ", "ソ", "タ", "チ", "ツ", "テ", "ト", "ナ", "ニ", "ヌ", "ネ", "ノ", "ハ", "ヒ", "フ", "ヘ", "ホ", "マ", "ミ", "ム", "メ", "モ", "ヤ", "ユ", "ヨ", "ラ", "リ", "ル", "レ", "ロ", "ワ", "ン"},
26: {".", "..", "..."},
27: {"▁", "▂", "▃", "▄", "▅", "▆", "▇", "█", "▉", "▊", "▋", "▌", "▍", "▎", "▏", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█", "▇", "▆", "▅", "▄", "▃", "▂", "▁"},
28: {".", "o", "O", "°", "O", "o", "."},
29: {"+", "x"},
30: {"v", "<", "^", ">"},
31: {">>--->", " >>--->", " >>--->", " >>--->", " >>--->", " <---<<", " <---<<", " <---<<", " <---<<", "<---<<"},
32: {"|", "||", "|||", "||||", "|||||", "|||||||", "||||||||", "|||||||", "||||||", "|||||", "||||", "|||", "||", "|"},
33: {"[ ]", "[= ]", "[== ]", "[=== ]", "[==== ]", "[===== ]", "[====== ]", "[======= ]", "[======== ]", "[========= ]", "[==========]"},
34: {"(*---------)", "(-*--------)", "(--*-------)", "(---*------)", "(----*-----)", "(-----*----)", "(------*---)", "(-------*--)", "(--------*-)", "(---------*)"},
35: {"█▒▒▒▒▒▒▒▒▒", "███▒▒▒▒▒▒▒", "█████▒▒▒▒▒", "███████▒▒▒", "██████████"},
36: {"[ ]", "[=> ]", "[===> ]", "[=====> ]", "[======> ]", "[========> ]", "[==========> ]", "[============> ]", "[==============> ]", "[================> ]", "[==================> ]", "[===================>]"},
37: {"", ""},
38: {"▌", "▀", "▐▄"},
39: {"🌍", "🌎", "🌏"},
40: {"◜", "◝", "◞", "◟"},
41: {"⬒", "⬔", "⬓", "⬕"},
42: {"⬖", "⬘", "⬗", "⬙"},
43: {"[>>> >]", "[]>>>> []", "[] >>>> []", "[] >>>> []", "[] >>>> []", "[] >>>>[]", "[>> >>]"},
44: {"♠", "♣", "♥", "♦"},
45: {"➞", "➟", "➠", "➡", "➠", "➟"},
46: {" | ", ` \ `, "_ ", ` \ `, " | ", " / ", " _", " / "},
47: {" . . . .", ". . . .", ". . . .", ". . . .", ". . . . ", ". . . . ."},
48: {" | ", " / ", " _ ", ` \ `, " | ", ` \ `, " _ ", " / "},
49: {"⎺", "⎻", "⎼", "⎽", "⎼", "⎻"},
50: {"▹▹▹▹▹", "▸▹▹▹▹", "▹▸▹▹▹", "▹▹▸▹▹", "▹▹▹▸▹", "▹▹▹▹▸"},
51: {"[ ]", "[ =]", "[ ==]", "[ ===]", "[====]", "[=== ]", "[== ]", "[= ]"},
52: {"( ● )", "( ● )", "( ● )", "( ● )", "( ●)", "( ● )", "( ● )", "( ● )", "( ● )"},
53: {"✶", "✸", "✹", "✺", "✹", "✷"},
54: {"▐|\\____________▌", "▐_|\\___________▌", "▐__|\\__________▌", "▐___|\\_________▌", "▐____|\\________▌", "▐_____|\\_______▌", "▐______|\\______▌", "▐_______|\\_____▌", "▐________|\\____▌", "▐_________|\\___▌", "▐__________|\\__▌", "▐___________|\\_▌", "▐____________|\\▌", "▐____________/|▌", "▐___________/|_▌", "▐__________/|__▌", "▐_________/|___▌", "▐________/|____▌", "▐_______/|_____▌", "▐______/|______▌", "▐_____/|_______▌", "▐____/|________▌", "▐___/|_________▌", "▐__/|__________▌", "▐_/|___________▌", "▐/|____________▌"},
55: {"▐⠂ ▌", "▐⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂▌", "▐ ⠠▌", "▐ ⡀▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐⠠ ▌"},
56: {"¿", "?"},
57: {"⢹", "⢺", "⢼", "⣸", "⣇", "⡧", "⡗", "⡏"},
58: {"⢄", "⢂", "⢁", "⡁", "⡈", "⡐", "⡠"},
59: {". ", ".. ", "...", " ..", " .", " "},
60: {".", "o", "O", "°", "O", "o", "."},
61: {"▓", "▒", "░"},
62: {"▌", "▀", "▐", "▄"},
63: {"⊶", "⊷"},
64: {"▪", "▫"},
65: {"□", "■"},
66: {"▮", "▯"},
67: {"-", "=", "≡"},
68: {"d", "q", "p", "b"},
69: {"∙∙∙", "●∙∙", "∙●∙", "∙∙●", "∙∙∙"},
70: {"🌑 ", "🌒 ", "🌓 ", "🌔 ", "🌕 ", "🌖 ", "🌗 ", "🌘 "},
71: {"☗", "☖"},
72: {"⧇", "⧆"},
73: {"◉", "◎"},
74: {"㊂", "㊀", "㊁"},
75: {"⦾", "⦿"},
}

View file

@ -10,6 +10,8 @@ docker buildx build \
--platform=linux/arm64,linux/amd64 \ --platform=linux/arm64,linux/amd64 \
--build-arg=VERSION \ --build-arg=VERSION \
--build-arg=GOFLAGS \ --build-arg=GOFLAGS \
--cache-from type=local,src=.cache \
--cache-to type=local,dest=.cache \
-f Dockerfile \ -f Dockerfile \
-t ollama \ -t ollama \
. .

View file

@ -10,6 +10,7 @@ docker buildx build \
--platform=linux/arm64,linux/amd64 \ --platform=linux/arm64,linux/amd64 \
--build-arg=VERSION \ --build-arg=VERSION \
--build-arg=GOFLAGS \ --build-arg=GOFLAGS \
--cache-from type=local,src=.cache \
-f Dockerfile \ -f Dockerfile \
-t ollama/ollama -t ollama/ollama:$VERSION \ -t ollama/ollama -t ollama/ollama:$VERSION \
. .

View file

@ -7,6 +7,7 @@ import (
"fmt" "fmt"
"io" "io"
"log" "log"
"math"
"net/http" "net/http"
"net/url" "net/url"
"os" "os"
@ -53,8 +54,8 @@ type blobDownloadPart struct {
const ( const (
numDownloadParts = 64 numDownloadParts = 64
minDownloadPartSize int64 = 32 * 1000 * 1000 minDownloadPartSize int64 = 100 * format.MegaByte
maxDownloadPartSize int64 = 256 * 1000 * 1000 maxDownloadPartSize int64 = 1000 * format.MegaByte
) )
func (p *blobDownloadPart) Name() string { func (p *blobDownloadPart) Name() string {
@ -147,7 +148,6 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *Regis
continue continue
} }
i := i
g.Go(func() error { g.Go(func() error {
var err error var err error
for try := 0; try < maxRetries; try++ { for try := 0; try < maxRetries; try++ {
@ -158,12 +158,11 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *Regis
// return immediately if the context is canceled or the device is out of space // return immediately if the context is canceled or the device is out of space
return err return err
case err != nil: case err != nil:
log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], i, try, err) sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
time.Sleep(sleep)
continue continue
default: default:
if try > 0 {
log.Printf("%s part %d completed after %d retries", b.Digest[7:19], i, try)
}
return nil return nil
} }
} }
@ -285,7 +284,7 @@ func (b *blobDownload) Wait(ctx context.Context, fn func(api.ProgressResponse))
} }
fn(api.ProgressResponse{ fn(api.ProgressResponse{
Status: fmt.Sprintf("downloading %s", b.Digest), Status: fmt.Sprintf("pulling %s", b.Digest[7:19]),
Digest: b.Digest, Digest: b.Digest,
Total: b.Total, Total: b.Total,
Completed: b.Completed.Load(), Completed: b.Completed.Load(),
@ -304,7 +303,7 @@ type downloadOpts struct {
fn func(api.ProgressResponse) fn func(api.ProgressResponse)
} }
const maxRetries = 3 const maxRetries = 6
var errMaxRetriesExceeded = errors.New("max retries exceeded") var errMaxRetriesExceeded = errors.New("max retries exceeded")
@ -322,7 +321,7 @@ func downloadBlob(ctx context.Context, opts downloadOpts) error {
return err return err
default: default:
opts.fn(api.ProgressResponse{ opts.fn(api.ProgressResponse{
Status: fmt.Sprintf("downloading %s", opts.digest), Status: fmt.Sprintf("pulling %s", opts.digest[7:19]),
Digest: opts.digest, Digest: opts.digest,
Total: fi.Size(), Total: fi.Size(),
Completed: fi.Size(), Completed: fi.Size(),

View file

@ -228,26 +228,6 @@ func GetModel(name string) (*Model, error) {
return model, nil return model, nil
} }
func filenameWithPath(path, f string) (string, error) {
// if filePath starts with ~/, replace it with the user's home directory.
if strings.HasPrefix(f, fmt.Sprintf("~%s", string(os.PathSeparator))) {
parts := strings.Split(f, string(os.PathSeparator))
home, err := os.UserHomeDir()
if err != nil {
return "", fmt.Errorf("failed to open file: %v", err)
}
f = filepath.Join(home, filepath.Join(parts[1:]...))
}
// if filePath is not an absolute path, make it relative to the modelfile path
if !filepath.IsAbs(f) {
f = filepath.Join(filepath.Dir(path), f)
}
return f, nil
}
func realpath(p string) string { func realpath(p string) string {
abspath, err := filepath.Abs(p) abspath, err := filepath.Abs(p)
if err != nil { if err != nil {
@ -1146,43 +1126,49 @@ func GetSHA256Digest(r io.Reader) (string, int64) {
var errUnauthorized = fmt.Errorf("unauthorized") var errUnauthorized = fmt.Errorf("unauthorized")
func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.ReadSeeker, regOpts *RegistryOptions) (*http.Response, error) { func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.ReadSeeker, regOpts *RegistryOptions) (*http.Response, error) {
lastErr := errMaxRetriesExceeded resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
for try := 0; try < maxRetries; try++ { if err != nil {
resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts) if !errors.Is(err, context.Canceled) {
if err != nil { log.Printf("request failed: %v", err)
log.Printf("couldn't start upload: %v", err)
return nil, err
} }
switch { return nil, err
case resp.StatusCode == http.StatusUnauthorized: }
auth := resp.Header.Get("www-authenticate")
authRedir := ParseAuthRedirectString(auth) switch {
token, err := getAuthToken(ctx, authRedir) case resp.StatusCode == http.StatusUnauthorized:
// Handle authentication error with one retry
auth := resp.Header.Get("www-authenticate")
authRedir := ParseAuthRedirectString(auth)
token, err := getAuthToken(ctx, authRedir)
if err != nil {
return nil, err
}
regOpts.Token = token
if body != nil {
_, err = body.Seek(0, io.SeekStart)
if err != nil { if err != nil {
return nil, err return nil, err
} }
regOpts.Token = token
if body != nil {
body.Seek(0, io.SeekStart)
}
lastErr = errUnauthorized
case resp.StatusCode == http.StatusNotFound:
return nil, os.ErrNotExist
case resp.StatusCode >= http.StatusBadRequest:
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("%d: %s", resp.StatusCode, err)
}
return nil, fmt.Errorf("%d: %s", resp.StatusCode, body)
default:
return resp, nil
} }
resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
if resp.StatusCode == http.StatusUnauthorized {
return nil, errUnauthorized
}
return resp, err
case resp.StatusCode == http.StatusNotFound:
return nil, os.ErrNotExist
case resp.StatusCode >= http.StatusBadRequest:
responseBody, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("%d: %s", resp.StatusCode, err)
}
return nil, fmt.Errorf("%d: %s", resp.StatusCode, responseBody)
} }
return nil, lastErr return resp, nil
} }
func makeRequest(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.Reader, regOpts *RegistryOptions) (*http.Response, error) { func makeRequest(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.Reader, regOpts *RegistryOptions) (*http.Response, error) {

View file

@ -666,8 +666,14 @@ func HeadBlobHandler(c *gin.Context) {
} }
func CreateBlobHandler(c *gin.Context) { func CreateBlobHandler(c *gin.Context) {
targetPath, err := GetBlobsPath(c.Param("digest"))
if err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
hash := sha256.New() hash := sha256.New()
temp, err := os.CreateTemp("", c.Param("digest")) temp, err := os.CreateTemp(filepath.Dir(targetPath), c.Param("digest")+"-")
if err != nil { if err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return return
@ -690,12 +696,6 @@ func CreateBlobHandler(c *gin.Context) {
return return
} }
targetPath, err := GetBlobsPath(c.Param("digest"))
if err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
if err := os.Rename(temp.Name(), targetPath); err != nil { if err := os.Rename(temp.Name(), targetPath); err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return return
@ -794,7 +794,7 @@ func Serve(ln net.Listener, allowOrigins []string) error {
if runtime.GOOS == "linux" { if runtime.GOOS == "linux" {
// check compatibility to log warnings // check compatibility to log warnings
if _, err := llm.CheckVRAM(); err != nil { if _, err := llm.CheckVRAM(); err != nil {
log.Printf("Warning: GPU support may not be enabled, check you have installed GPU drivers: %v", err) log.Printf(err.Error())
} }
} }

View file

@ -5,9 +5,9 @@ import (
"crypto/md5" "crypto/md5"
"errors" "errors"
"fmt" "fmt"
"hash"
"io" "io"
"log" "log"
"math"
"net/http" "net/http"
"net/url" "net/url"
"os" "os"
@ -35,6 +35,8 @@ type blobUpload struct {
context.CancelFunc context.CancelFunc
file *os.File
done bool done bool
err error err error
references atomic.Int32 references atomic.Int32
@ -42,8 +44,8 @@ type blobUpload struct {
const ( const (
numUploadParts = 64 numUploadParts = 64
minUploadPartSize int64 = 95 * 1000 * 1000 minUploadPartSize int64 = 100 * format.MegaByte
maxUploadPartSize int64 = 1000 * 1000 * 1000 maxUploadPartSize int64 = 1000 * format.MegaByte
) )
func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *RegistryOptions) error { func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *RegistryOptions) error {
@ -128,12 +130,12 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
return return
} }
f, err := os.Open(p) b.file, err = os.Open(p)
if err != nil { if err != nil {
b.err = err b.err = err
return return
} }
defer f.Close() defer b.file.Close()
g, inner := errgroup.WithContext(ctx) g, inner := errgroup.WithContext(ctx)
g.SetLimit(numUploadParts) g.SetLimit(numUploadParts)
@ -145,7 +147,6 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
g.Go(func() error { g.Go(func() error {
var err error var err error
for try := 0; try < maxRetries; try++ { for try := 0; try < maxRetries; try++ {
part.ReadSeeker = io.NewSectionReader(f, part.Offset, part.Size)
err = b.uploadChunk(inner, http.MethodPatch, requestURL, part, opts) err = b.uploadChunk(inner, http.MethodPatch, requestURL, part, opts)
switch { switch {
case errors.Is(err, context.Canceled): case errors.Is(err, context.Canceled):
@ -153,7 +154,10 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
case errors.Is(err, errMaxRetriesExceeded): case errors.Is(err, errMaxRetriesExceeded):
return err return err
case err != nil: case err != nil:
log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], part.N, try, err) part.Reset()
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
time.Sleep(sleep)
continue continue
} }
@ -173,8 +177,16 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
requestURL := <-b.nextURL requestURL := <-b.nextURL
var sb strings.Builder var sb strings.Builder
// calculate md5 checksum and add it to the commit request
for _, part := range b.Parts { for _, part := range b.Parts {
sb.Write(part.Sum(nil)) hash := md5.New()
if _, err := io.Copy(hash, io.NewSectionReader(b.file, part.Offset, part.Size)); err != nil {
b.err = err
return
}
sb.Write(hash.Sum(nil))
} }
md5sum := md5.Sum([]byte(sb.String())) md5sum := md5.Sum([]byte(sb.String()))
@ -188,29 +200,39 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
headers.Set("Content-Type", "application/octet-stream") headers.Set("Content-Type", "application/octet-stream")
headers.Set("Content-Length", "0") headers.Set("Content-Length", "0")
resp, err := makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, nil, opts) for try := 0; try < maxRetries; try++ {
if err != nil { resp, err := makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, nil, opts)
b.err = err if err != nil {
b.err = err
if errors.Is(err, context.Canceled) {
return
}
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
log.Printf("%s complete upload attempt %d failed: %v, retrying in %s", b.Digest[7:19], try, err, sleep)
time.Sleep(sleep)
continue
}
defer resp.Body.Close()
b.err = nil
b.done = true
return return
} }
defer resp.Body.Close()
b.done = true
} }
func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL *url.URL, part *blobUploadPart, opts *RegistryOptions) error { func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL *url.URL, part *blobUploadPart, opts *RegistryOptions) error {
part.Reset()
headers := make(http.Header) headers := make(http.Header)
headers.Set("Content-Type", "application/octet-stream") headers.Set("Content-Type", "application/octet-stream")
headers.Set("Content-Length", fmt.Sprintf("%d", part.Size)) headers.Set("Content-Length", fmt.Sprintf("%d", part.Size))
headers.Set("X-Redirect-Uploads", "1")
if method == http.MethodPatch { if method == http.MethodPatch {
headers.Set("X-Redirect-Uploads", "1")
headers.Set("Content-Range", fmt.Sprintf("%d-%d", part.Offset, part.Offset+part.Size-1)) headers.Set("Content-Range", fmt.Sprintf("%d-%d", part.Offset, part.Offset+part.Size-1))
} }
resp, err := makeRequest(ctx, method, requestURL, headers, io.TeeReader(part.ReadSeeker, io.MultiWriter(part, part.Hash)), opts) sr := io.NewSectionReader(b.file, part.Offset, part.Size)
resp, err := makeRequest(ctx, method, requestURL, headers, io.TeeReader(sr, part), opts)
if err != nil { if err != nil {
return err return err
} }
@ -235,6 +257,7 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
return err return err
} }
// retry uploading to the redirect URL
for try := 0; try < maxRetries; try++ { for try := 0; try < maxRetries; try++ {
err = b.uploadChunk(ctx, http.MethodPut, redirectURL, part, nil) err = b.uploadChunk(ctx, http.MethodPut, redirectURL, part, nil)
switch { switch {
@ -243,7 +266,10 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
case errors.Is(err, errMaxRetriesExceeded): case errors.Is(err, errMaxRetriesExceeded):
return err return err
case err != nil: case err != nil:
log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], part.N, try, err) part.Reset()
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
time.Sleep(sleep)
continue continue
} }
@ -301,7 +327,7 @@ func (b *blobUpload) Wait(ctx context.Context, fn func(api.ProgressResponse)) er
} }
fn(api.ProgressResponse{ fn(api.ProgressResponse{
Status: fmt.Sprintf("uploading %s", b.Digest), Status: fmt.Sprintf("pushing %s", b.Digest[7:19]),
Digest: b.Digest, Digest: b.Digest,
Total: b.Total, Total: b.Total,
Completed: b.Completed.Load(), Completed: b.Completed.Load(),
@ -315,14 +341,10 @@ func (b *blobUpload) Wait(ctx context.Context, fn func(api.ProgressResponse)) er
type blobUploadPart struct { type blobUploadPart struct {
// N is the part number // N is the part number
N int N int
Offset int64 Offset int64
Size int64 Size int64
hash.Hash
written int64 written int64
io.ReadSeeker
*blobUpload *blobUpload
} }
@ -334,10 +356,8 @@ func (p *blobUploadPart) Write(b []byte) (n int, err error) {
} }
func (p *blobUploadPart) Reset() { func (p *blobUploadPart) Reset() {
p.Seek(0, io.SeekStart)
p.Completed.Add(-int64(p.written)) p.Completed.Add(-int64(p.written))
p.written = 0 p.written = 0
p.Hash = md5.New()
} }
func uploadBlob(ctx context.Context, mp ModelPath, layer *Layer, opts *RegistryOptions, fn func(api.ProgressResponse)) error { func uploadBlob(ctx context.Context, mp ModelPath, layer *Layer, opts *RegistryOptions, fn func(api.ProgressResponse)) error {
@ -352,7 +372,7 @@ func uploadBlob(ctx context.Context, mp ModelPath, layer *Layer, opts *RegistryO
default: default:
defer resp.Body.Close() defer resp.Body.Close()
fn(api.ProgressResponse{ fn(api.ProgressResponse{
Status: fmt.Sprintf("uploading %s", layer.Digest), Status: fmt.Sprintf("pushing %s", layer.Digest[7:19]),
Digest: layer.Digest, Digest: layer.Digest,
Total: layer.Size, Total: layer.Size,
Completed: layer.Size, Completed: layer.Size,