Merge branch 'main' into install-instructions-archlinux

2023-11-21 06:32:50 -08:00 · 2023-11-21 06:32:50 -08:00 · 5ebcde1541
commit 5ebcde1541
parent e1cd3152c9 45206cb7cc
27 changed files with 733 additions and 1557 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -6,3 +6,4 @@ scripts
 llm/llama.cpp/ggml
 llm/llama.cpp/gguf
 .env
 .cache
--- a/.gitignore
+++ b/.gitignore
@ -6,3 +6,4 @@
 dist
 ollama
 ggml-metal.metal
 .cache
--- a/README.md
+++ b/README.md
@ -216,6 +216,10 @@ See the [API documentation](./docs/api.md) for all endpoints.
 ## Community Integrations
 ### Mobile
 - [Mobile Artificial Intelligence Distribution](https://github.com/MaidFoundation/Maid) (Maid)
 ### Web & Desktop
 - [HTML UI](https://github.com/rtcfirefly/ollama-ui)
@ -234,12 +238,15 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Emacs client](https://github.com/zweifisch/ollama)
 - [gen.nvim](https://github.com/David-Kunz/gen.nvim)
 - [ollama.nvim](https://github.com/nomnivore/ollama.nvim)
 - [ogpt.nvim](https://github.com/huynle/ogpt.nvim)
 - [gptel Emacs client](https://github.com/karthink/gptel)
 - [ollama package for archlinux](https://archlinux.org/packages/extra/x86_64/ollama/)
 - [Oatmeal](https://github.com/dustinblackman/oatmeal)
 ### Libraries
 - [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
 - [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
 - [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
 - [LiteLLM](https://github.com/BerriAI/litellm)
 - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
@ -248,6 +255,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
 - [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
 - [Ollama for Dart](https://github.com/breitburg/dart-ollama)
 - [Ollama for Laravel](https://github.com/cloudstudio/ollama-laravel)
 ### Mobile
@ -263,3 +271,4 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Dagger Chatbot](https://github.com/samalba/dagger-chatbot)
 - [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)
 - [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
 - [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@ -30,7 +30,7 @@ import (
 	"github.com/jmorganca/ollama/api"
 	"github.com/jmorganca/ollama/format"
 	"github.com/jmorganca/ollama/parser"
-	"github.com/jmorganca/ollama/progressbar"
+	"github.com/jmorganca/ollama/progress"
 	"github.com/jmorganca/ollama/readline"
 	"github.com/jmorganca/ollama/server"
 	"github.com/jmorganca/ollama/version"
@ -48,14 +48,16 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}
 	p := progress.NewProgress(os.Stderr)
 	defer p.Stop()
 	bars := make(map[string]*progress.Bar)
 	modelfile, err := os.ReadFile(filename)
 	if err != nil {
 		return err
 	}
 	spinner := NewSpinner("transferring context")
 	go spinner.Spin(100 * time.Millisecond)
 	commands, err := parser.Parse(bytes.NewReader(modelfile))
 	if err != nil {
 		return err
@ -66,6 +68,10 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}
 	status := "transferring model data"
 	spinner := progress.NewSpinner(status)
 	p.Add(status, spinner)
 	for _, c := range commands {
 		switch c.Name {
 		case "model", "adapter":
@ -76,6 +82,10 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 				path = filepath.Join(home, path[2:])
 			}
 			if !filepath.IsAbs(path) {
 				path = filepath.Join(filepath.Dir(filename), path)
 			}
 			bin, err := os.Open(path)
 			if errors.Is(err, os.ErrNotExist) && c.Name == "model" {
 				continue
@ -99,41 +109,34 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 		}
 	}
 	var currentDigest string
 	var bar *progressbar.ProgressBar
 	request := api.CreateRequest{Name: args[0], Path: filename, Modelfile: string(modelfile)}
 	fn := func(resp api.ProgressResponse) error {
-		if resp.Digest != currentDigest && resp.Digest != "" {
+		if resp.Digest != "" {
 			spinner.Stop()
-			currentDigest = resp.Digest
+
-			// pulling
+			bar, ok := bars[resp.Digest]
-			bar = progressbar.DefaultBytes(
+			if !ok {
-				resp.Total,
+				bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
-				resp.Status,
+				bars[resp.Digest] = bar
-			)
+				p.Add(resp.Digest, bar)
-			bar.Set64(resp.Completed)
+			}
-		} else if resp.Digest == currentDigest && resp.Digest != "" {
+
-			bar.Set64(resp.Completed)
+			bar.Set(resp.Completed)
-		} else {
+		} else if status != resp.Status {
 			currentDigest = ""
 			spinner.Stop()
-			spinner = NewSpinner(resp.Status)
+
-			go spinner.Spin(100 * time.Millisecond)
+			status = resp.Status
 			spinner = progress.NewSpinner(status)
 			p.Add(status, spinner)
 		}
 		return nil
 	}
 	request := api.CreateRequest{Name: args[0], Modelfile: string(modelfile)}
 	if err := client.Create(context.Background(), &request, fn); err != nil {
 		return err
 	}
 	spinner.Stop()
 	if spinner.description != "success" {
 		return errors.New("unexpected end to create model")
 	}
 	return nil
 }
@ -170,36 +173,46 @@ func PushHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}
-	var currentDigest string
+	p := progress.NewProgress(os.Stderr)
-	var bar *progressbar.ProgressBar
+	defer p.Stop()
 	bars := make(map[string]*progress.Bar)
 	var status string
 	var spinner *progress.Spinner
 	request := api.PushRequest{Name: args[0], Insecure: insecure}
 	fn := func(resp api.ProgressResponse) error {
-		if resp.Digest != currentDigest && resp.Digest != "" {
+		if resp.Digest != "" {
-			currentDigest = resp.Digest
+			if spinner != nil {
-			bar = progressbar.DefaultBytes(
+				spinner.Stop()
-				resp.Total,
+			}
 				fmt.Sprintf("pushing %s...", resp.Digest[7:19]),
 			)
-			bar.Set64(resp.Completed)
+			bar, ok := bars[resp.Digest]
-		} else if resp.Digest == currentDigest && resp.Digest != "" {
+			if !ok {
-			bar.Set64(resp.Completed)
+				bar = progress.NewBar(fmt.Sprintf("pushing %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
-		} else {
+				bars[resp.Digest] = bar
-			currentDigest = ""
+				p.Add(resp.Digest, bar)
-			fmt.Println(resp.Status)
+			}
 			bar.Set(resp.Completed)
 		} else if status != resp.Status {
 			if spinner != nil {
 				spinner.Stop()
 			}
 			status = resp.Status
 			spinner = progress.NewSpinner(status)
 			p.Add(status, spinner)
 		}
 		return nil
 	}
 	request := api.PushRequest{Name: args[0], Insecure: insecure}
 	if err := client.Push(context.Background(), &request, fn); err != nil {
 		return err
 	}
-	if bar != nil && !bar.IsFinished() {
+	spinner.Stop()
 		return errors.New("unexpected end to push model")
 	}
 	return nil
 }
@ -350,46 +363,51 @@ func PullHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}
 	return pull(args[0], insecure)
 }
 func pull(model string, insecure bool) error {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
 	}
-	var currentDigest string
+	p := progress.NewProgress(os.Stderr)
-	var bar *progressbar.ProgressBar
+	defer p.Stop()
 	bars := make(map[string]*progress.Bar)
 	var status string
 	var spinner *progress.Spinner
 	request := api.PullRequest{Name: model, Insecure: insecure}
 	fn := func(resp api.ProgressResponse) error {
-		if resp.Digest != currentDigest && resp.Digest != "" {
+		if resp.Digest != "" {
-			currentDigest = resp.Digest
+			if spinner != nil {
-			bar = progressbar.DefaultBytes(
+				spinner.Stop()
-				resp.Total,
+			}
 				fmt.Sprintf("pulling %s...", resp.Digest[7:19]),
 			)
-			bar.Set64(resp.Completed)
+			bar, ok := bars[resp.Digest]
-		} else if resp.Digest == currentDigest && resp.Digest != "" {
+			if !ok {
-			bar.Set64(resp.Completed)
+				bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
-		} else {
+				bars[resp.Digest] = bar
-			currentDigest = ""
+				p.Add(resp.Digest, bar)
-			fmt.Println(resp.Status)
+			}
 			bar.Set(resp.Completed)
 		} else if status != resp.Status {
 			if spinner != nil {
 				spinner.Stop()
 			}
 			status = resp.Status
 			spinner = progress.NewSpinner(status)
 			p.Add(status, spinner)
 		}
 		return nil
 	}
 	request := api.PullRequest{Name: args[0], Insecure: insecure}
 	if err := client.Pull(context.Background(), &request, fn); err != nil {
 		return err
 	}
 	if bar != nil && !bar.IsFinished() {
 		return errors.New("unexpected end to pull model")
 	}
 	return nil
 }
@ -442,8 +460,11 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st
 		return err
 	}
-	spinner := NewSpinner("")
+	p := progress.NewProgress(os.Stderr)
-	go spinner.Spin(60 * time.Millisecond)
+	defer p.StopAndClear()
 	spinner := progress.NewSpinner("")
 	p.Add("", spinner)
 	var latest api.GenerateResponse
@ -475,9 +496,7 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st
 	request := api.GenerateRequest{Model: model, Prompt: prompt, Context: generateContext, Format: format}
 	fn := func(response api.GenerateResponse) error {
-		if !spinner.IsFinished() {
+		p.StopAndClear()
 			spinner.Finish()
 		}
 		latest = response
@ -511,7 +530,6 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st
 	if err := client.Generate(cancelCtx, &request, fn); err != nil {
 		if strings.Contains(err.Error(), "context canceled") && abort {
 			spinner.Finish()
 			return nil
 		}
 		return err
--- a/cmd/spinner.go
+++ b/cmd/spinner.go
@ -1,44 +0,0 @@
 package cmd
 import (
 	"fmt"
 	"os"
 	"time"
 	"github.com/jmorganca/ollama/progressbar"
 )
 type Spinner struct {
 	description string
 	*progressbar.ProgressBar
 }
 func NewSpinner(description string) *Spinner {
 	return &Spinner{
 		description: description,
 		ProgressBar: progressbar.NewOptions(-1,
 			progressbar.OptionSetWriter(os.Stderr),
 			progressbar.OptionThrottle(60*time.Millisecond),
 			progressbar.OptionSpinnerType(14),
 			progressbar.OptionSetRenderBlankState(true),
 			progressbar.OptionSetElapsedTime(false),
 			progressbar.OptionClearOnFinish(),
 			progressbar.OptionSetDescription(description),
 		),
 	}
 }
 func (s *Spinner) Spin(tick time.Duration) {
 	for range time.Tick(tick) {
 		if s.IsFinished() {
 			break
 		}
 		s.Add(1)
 	}
 }
 func (s *Spinner) Stop() {
 	s.Finish()
 	fmt.Println(s.description)
 }
--- a/docs/api.md
+++ b/docs/api.md
@ -51,7 +51,9 @@ Advanced parameters (optional):
 ### JSON mode
-Enable JSON mode by setting the `format` parameter to `json` and specifying the model should use JSON in the `prompt`. This will structure the response as valid JSON. See the JSON mode [example](#request-json-mode) below.
+Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as valid JSON. See the JSON mode [example](#request-json-mode) below.
 > Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
 ### Examples
--- a/docs/faq.md
+++ b/docs/faq.md
@ -149,8 +149,8 @@ docker build -t ollama-with-ca .
 docker run -d -e HTTPS_PROXY=https://my.proxy.example.com -p 11434:11434 ollama-with-ca
 ```
-### How do I use Ollama with GPU acceleration in Docker?
+## How do I use Ollama with GPU acceleration in Docker?
-The Ollama Docker container can be configured with GPU acceleration in Linux or Windows (with WSL2). This requires the [nvidia-container-toolkit](https://github.com/NVIDIA/nvidia-container-toolkit).
+The Ollama Docker container can be configured with GPU acceleration in Linux or Windows (with WSL2). This requires the [nvidia-container-toolkit](https://github.com/NVIDIA/nvidia-container-toolkit). See [ollama/ollama](https://hub.docker.com/r/ollama/ollama) for more details.
 GPU acceleration is not available for Docker Desktop in macOS due to the lack of GPU passthrough and emulation.
--- a/docs/modelfile.md
+++ b/docs/modelfile.md
@ -41,6 +41,8 @@ INSTRUCTION arguments
 ## Examples
 ### Basic `Modelfile`
 An example of a `Modelfile` creating a mario blueprint:
 ```modelfile
@ -63,6 +65,35 @@ To use this:
 More examples are available in the [examples directory](../examples).
 ### `Modelfile`s in [ollama.ai/library][1]
 There are two ways to view `Modelfile`s underlying the models in [ollama.ai/library][1]:
 - Option 1: view a details page from a model's tags page:
   1. Go to a particular model's tags (e.g. https://ollama.ai/library/llama2/tags)
   2. Click on a tag (e.g. https://ollama.ai/library/llama2:13b)
   3. Scroll down to "Layers"
      - Note: if the [`FROM` instruction](#from-required) is not present,
        it means the model was created from a local file
 - Option 2: use `ollama show` to print the `Modelfile` like so:
  ```bash
  > ollama show --modelfile llama2:13b
  # Modelfile generated by "ollama show"
  # To build a new Modelfile based on this one, replace the FROM line with:
  # FROM llama2:13b
  FROM /root/.ollama/models/blobs/sha256:123abc
  TEMPLATE """[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>
  {{ end }}{{ .Prompt }} [/INST] """
  SYSTEM """"""
  PARAMETER stop [INST]
  PARAMETER stop [/INST]
  PARAMETER stop <<SYS>>
  PARAMETER stop <</SYS>>
  ```
 ## Instructions
 ### FROM (Required)
@ -177,3 +208,5 @@ LICENSE """
 - the **`Modelfile` is not case sensitive**. In the examples, we use uppercase for instructions to make it easier to distinguish it from arguments.
 - Instructions can be in any order. In the examples, we start with FROM instruction to keep it easily readable.
 [1]: https://ollama.ai/library
--- a/examples/jupyter-notebook/README.md
+++ b/examples/jupyter-notebook/README.md
@ -0,0 +1,5 @@
 # Ollama Jupyter Notebook
 This example downloads and installs Ollama in a Jupyter instance such as Google Colab. It will start the Ollama service and expose an endpoint using `ngrok` which can be used to communicate with the Ollama instance remotely.
 For best results, use an instance with GPU accelerator.
--- a/examples/jupyter-notebook/ollama.ipynb
+++ b/examples/jupyter-notebook/ollama.ipynb
@ -0,0 +1,102 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "93f59dcb-c588-41b8-a792-55d88ade739c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Download and run the Ollama Linux install script\n",
    "!curl https://ollama.ai/install.sh | sh\n",
    "!command -v systemctl >/dev/null && sudo systemctl stop ollama"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "658c147e-c7f8-490e-910e-62b80f577dda",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install aiohttp pyngrok\n",
    "\n",
    "import os\n",
    "import asyncio\n",
    "from aiohttp import ClientSession\n",
    "\n",
    "# Set LD_LIBRARY_PATH so the system NVIDIA library becomes preferred\n",
    "# over the built-in library. This is particularly important for \n",
    "# Google Colab which installs older drivers\n",
    "os.environ.update({'LD_LIBRARY_PATH': '/usr/lib64-nvidia'})\n",
    "\n",
    "async def run(cmd):\n",
    "  '''\n",
    "  run is a helper function to run subcommands asynchronously.\n",
    "  '''\n",
    "  print('>>> starting', *cmd)\n",
    "  p = await asyncio.subprocess.create_subprocess_exec(\n",
    "      *cmd,\n",
    "      stdout=asyncio.subprocess.PIPE,\n",
    "      stderr=asyncio.subprocess.PIPE,\n",
    "  )\n",
    "\n",
    "  async def pipe(lines):\n",
    "    async for line in lines:\n",
    "      print(line.strip().decode('utf-8'))\n",
    "\n",
    "  await asyncio.gather(\n",
    "      pipe(p.stdout),\n",
    "      pipe(p.stderr),\n",
    "  )\n",
    "\n",
    "\n",
    "await asyncio.gather(\n",
    "    run(['ollama', 'serve']),\n",
    "    run(['ngrok', 'http', '--log', 'stderr', '11434']),\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e7735a55-9aad-4caf-8683-52e2163ba53b",
   "metadata": {},
   "source": [
    "The previous cell starts two processes, `ollama` and `ngrok`. The log output will show a line like the following which describes the external address.\n",
    "\n",
    "```\n",
    "t=2023-11-12T22:55:56+0000 lvl=info msg=\"started tunnel\" obj=tunnels name=command_line addr=http://localhost:11434 url=https://8249-34-125-179-11.ngrok.io\n",
    "```\n",
    "\n",
    "The external address in this case is `https://8249-34-125-179-11.ngrok.io` which can be passed into `OLLAMA_HOST` to access this instance.\n",
    "\n",
    "```bash\n",
    "export OLLAMA_HOST=https://8249-34-125-179-11.ngrok.io\n",
    "ollama list\n",
    "ollama run mistral\n",
    "```"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/format/bytes.go
+++ b/format/bytes.go
@ -1,23 +1,45 @@
 package format
-import "fmt"
+import (
 	"fmt"
 	"math"
 )
 const (
 	Byte     = 1
 	KiloByte = Byte * 1000
 	MegaByte = KiloByte * 1000
 	GigaByte = MegaByte * 1000
 	TeraByte = GigaByte * 1000
 )
 func HumanBytes(b int64) string {
 	var value float64
 	var unit string
 	switch {
-	case b > GigaByte:
+	case b >= TeraByte:
-		return fmt.Sprintf("%.1f GB", float64(b)/GigaByte)
+		value = float64(b) / TeraByte
-	case b > MegaByte:
+		unit = "TB"
-		return fmt.Sprintf("%.1f MB", float64(b)/MegaByte)
+	case b >= GigaByte:
-	case b > KiloByte:
+		value = float64(b) / GigaByte
-		return fmt.Sprintf("%.1f KB", float64(b)/KiloByte)
+		unit = "GB"
 	case b >= MegaByte:
 		value = float64(b) / MegaByte
 		unit = "MB"
 	case b >= KiloByte:
 		value = float64(b) / KiloByte
 		unit = "KB"
 	default:
 		return fmt.Sprintf("%d B", b)
 	}
 	switch {
 	case value >= 100:
 		return fmt.Sprintf("%d %s", int(value), unit)
 	case value != math.Trunc(value):
 		return fmt.Sprintf("%.1f %s", value, unit)
 	default:
 		return fmt.Sprintf("%d %s", int(value), unit)
 	}
 }
--- a/llm/llama.cpp/generate_darwin_amd64.go
+++ b/llm/llama.cpp/generate_darwin_amd64.go
@ -7,13 +7,13 @@ package llm
 //go:generate git -C ggml apply ../patches/0002-34B-model-support.patch
 //go:generate git -C ggml apply ../patches/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
 //go:generate git -C ggml apply ../patches/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
-//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
+//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
 //go:generate cmake --build ggml/build/cpu --target server --config Release
 //go:generate mv ggml/build/cpu/bin/server ggml/build/cpu/bin/ollama-runner
 //go:generate git submodule update --force gguf
 //go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch
 //go:generate git -C gguf apply ../patches/0001-metal-handle-ggml_scale-for-n-4-0-close-3754.patch
-//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_METAL=off -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
+//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_METAL=off -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on
 //go:generate cmake --build gguf/build/cpu --target server --config Release
 //go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner
--- a/llm/llama.go
+++ b/llm/llama.go
@ -226,7 +226,7 @@ type llama struct {
 }
 var (
-	errNvidiaSMI     = errors.New("nvidia-smi command failed")
+	errNvidiaSMI     = errors.New("warning: gpu support may not be enabled, check that you have installed GPU drivers: nvidia-smi command failed")
 	errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only")
 )
@ -343,6 +343,10 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
 		"--embedding",
 	}
 	if opts.MainGPU > 0 {
 		params = append(params, "--main-gpu", fmt.Sprintf("%d", opts.MainGPU))
 	}
 	if opts.RopeFrequencyBase > 0 {
 		params = append(params, "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase))
 	}
@ -544,6 +548,7 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
 		"stream":            true,
 		"n_predict":         llm.NumPredict,
 		"n_keep":            llm.NumKeep,
 		"main_gpu":          llm.MainGPU,
 		"temperature":       llm.Temperature,
 		"top_k":             llm.TopK,
 		"top_p":             llm.TopP,
--- a/llm/llm.go
+++ b/llm/llm.go
@ -41,20 +41,13 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
 	if runtime.GOOS == "darwin" {
 		switch ggml.FileType() {
-		case "Q8_0":
+		case "F32", "Q5_0", "Q5_1", "Q8_0":
 			if ggml.Name() != "gguf" && opts.NumGPU != 0 {
 				// GGML Q8_0 do not support Metal API and will
 				// cause the runner to segmentation fault so disable GPU
 				log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
 				opts.NumGPU = 0
 			}
 		case "F32", "Q5_0", "Q5_1":
 			if opts.NumGPU != 0 {
 				// F32, Q5_0, Q5_1, and Q8_0 do not support Metal API and will
 				// cause the runner to segmentation fault so disable GPU
 				log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
 				opts.NumGPU = 0
 			}
 		}
 		var requiredMemory int64
--- a/progress/bar.go
+++ b/progress/bar.go
@ -0,0 +1,155 @@
 package progress
 import (
 	"fmt"
 	"math"
 	"os"
 	"strings"
 	"time"
 	"github.com/jmorganca/ollama/format"
 	"golang.org/x/term"
 )
 type Stats struct {
 	rate      int64
 	value     int64
 	remaining time.Duration
 }
 type Bar struct {
 	message      string
 	messageWidth int
 	maxValue     int64
 	initialValue int64
 	currentValue int64
 	started time.Time
 	stats   Stats
 	statted time.Time
 }
 func NewBar(message string, maxValue, initialValue int64) *Bar {
 	return &Bar{
 		message:      message,
 		messageWidth: -1,
 		maxValue:     maxValue,
 		initialValue: initialValue,
 		currentValue: initialValue,
 		started:      time.Now(),
 	}
 }
 func (b *Bar) String() string {
 	termWidth, _, err := term.GetSize(int(os.Stderr.Fd()))
 	if err != nil {
 		termWidth = 80
 	}
 	var pre, mid, suf strings.Builder
 	if b.message != "" {
 		message := strings.TrimSpace(b.message)
 		if b.messageWidth > 0 && len(message) > b.messageWidth {
 			message = message[:b.messageWidth]
 		}
 		fmt.Fprintf(&pre, "%s", message)
 		if b.messageWidth-pre.Len() >= 0 {
 			pre.WriteString(strings.Repeat(" ", b.messageWidth-pre.Len()))
 		}
 		pre.WriteString(" ")
 	}
 	fmt.Fprintf(&pre, "%3.0f%% ", math.Floor(b.percent()))
 	fmt.Fprintf(&suf, "(%s/%s", format.HumanBytes(b.currentValue), format.HumanBytes(b.maxValue))
 	stats := b.Stats()
 	rate := int64(stats.rate)
 	if rate > 0 {
 		fmt.Fprintf(&suf, ", %s/s", format.HumanBytes(rate))
 	}
 	fmt.Fprintf(&suf, ")")
 	elapsed := time.Since(b.started)
 	if b.percent() < 100 && rate > 0 {
 		fmt.Fprintf(&suf, " [%s:%s]", elapsed.Round(time.Second), stats.remaining)
 	} else {
 		fmt.Fprintf(&suf, "        ")
 	}
 	mid.WriteString("▕")
 	// add 3 extra spaces: 2 boundary characters and 1 space at the end
 	f := termWidth - pre.Len() - suf.Len() - 3
 	n := int(float64(f) * b.percent() / 100)
 	if n > 0 {
 		mid.WriteString(strings.Repeat("█", n))
 	}
 	if f-n > 0 {
 		mid.WriteString(strings.Repeat(" ", f-n))
 	}
 	mid.WriteString("▏")
 	return pre.String() + mid.String() + suf.String()
 }
 func (b *Bar) Set(value int64) {
 	if value >= b.maxValue {
 		value = b.maxValue
 	}
 	b.currentValue = value
 }
 func (b *Bar) percent() float64 {
 	if b.maxValue > 0 {
 		return float64(b.currentValue) / float64(b.maxValue) * 100
 	}
 	return 0
 }
 func (b *Bar) Stats() Stats {
 	if time.Since(b.statted) < time.Second {
 		return b.stats
 	}
 	switch {
 	case b.statted.IsZero():
 		b.stats = Stats{
 			value:     b.initialValue,
 			rate:      0,
 			remaining: 0,
 		}
 	case b.currentValue >= b.maxValue:
 		b.stats = Stats{
 			value:     b.maxValue,
 			rate:      0,
 			remaining: 0,
 		}
 	default:
 		rate := b.currentValue - b.stats.value
 		var remaining time.Duration
 		if rate > 0 {
 			remaining = time.Second * time.Duration((float64(b.maxValue-b.currentValue))/(float64(rate)))
 		}
 		b.stats = Stats{
 			value:     b.currentValue,
 			rate:      rate,
 			remaining: remaining,
 		}
 	}
 	b.statted = time.Now()
 	return b.stats
 }
--- a/progress/progress.go
+++ b/progress/progress.go
@ -0,0 +1,113 @@
 package progress
 import (
 	"fmt"
 	"io"
 	"sync"
 	"time"
 )
 type State interface {
 	String() string
 }
 type Progress struct {
 	mu sync.Mutex
 	w  io.Writer
 	pos int
 	ticker *time.Ticker
 	states []State
 }
 func NewProgress(w io.Writer) *Progress {
 	p := &Progress{w: w}
 	go p.start()
 	return p
 }
 func (p *Progress) stop() bool {
 	for _, state := range p.states {
 		if spinner, ok := state.(*Spinner); ok {
 			spinner.Stop()
 		}
 	}
 	if p.ticker != nil {
 		p.ticker.Stop()
 		p.ticker = nil
 		p.render()
 		return true
 	}
 	return false
 }
 func (p *Progress) Stop() bool {
 	stopped := p.stop()
 	if stopped {
 		fmt.Fprint(p.w, "\n")
 	}
 	return stopped
 }
 func (p *Progress) StopAndClear() bool {
 	fmt.Fprint(p.w, "\033[?25l")
 	defer fmt.Fprint(p.w, "\033[?25h")
 	stopped := p.stop()
 	if stopped {
 		// clear all progress lines
 		for i := 0; i < p.pos; i++ {
 			if i > 0 {
 				fmt.Fprint(p.w, "\033[A")
 			}
 			fmt.Fprint(p.w, "\033[2K\033[1G")
 		}
 	}
 	return stopped
 }
 func (p *Progress) Add(key string, state State) {
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	p.states = append(p.states, state)
 }
 func (p *Progress) render() error {
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	fmt.Fprint(p.w, "\033[?25l")
 	defer fmt.Fprint(p.w, "\033[?25h")
 	// clear already rendered progress lines
 	for i := 0; i < p.pos; i++ {
 		if i > 0 {
 			fmt.Fprint(p.w, "\033[A")
 		}
 		fmt.Fprint(p.w, "\033[2K\033[1G")
 	}
 	// render progress lines
 	for i, state := range p.states {
 		fmt.Fprint(p.w, state.String())
 		if i < len(p.states)-1 {
 			fmt.Fprint(p.w, "\n")
 		}
 	}
 	p.pos = len(p.states)
 	return nil
 }
 func (p *Progress) start() {
 	p.ticker = time.NewTicker(100 * time.Millisecond)
 	for range p.ticker.C {
 		p.render()
 	}
 }
--- a/progress/spinner.go
+++ b/progress/spinner.go
@ -0,0 +1,73 @@
 package progress
 import (
 	"fmt"
 	"strings"
 	"time"
 )
 type Spinner struct {
 	message      string
 	messageWidth int
 	parts []string
 	value int
 	ticker  *time.Ticker
 	started time.Time
 	stopped time.Time
 }
 func NewSpinner(message string) *Spinner {
 	s := &Spinner{
 		message: message,
 		parts: []string{
 			"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏",
 		},
 		started: time.Now(),
 	}
 	go s.start()
 	return s
 }
 func (s *Spinner) String() string {
 	var sb strings.Builder
 	if len(s.message) > 0 {
 		message := strings.TrimSpace(s.message)
 		if s.messageWidth > 0 && len(message) > s.messageWidth {
 			message = message[:s.messageWidth]
 		}
 		fmt.Fprintf(&sb, "%s", message)
 		if s.messageWidth-sb.Len() >= 0 {
 			sb.WriteString(strings.Repeat(" ", s.messageWidth-sb.Len()))
 		}
 		sb.WriteString(" ")
 	}
 	if s.stopped.IsZero() {
 		spinner := s.parts[s.value]
 		sb.WriteString(spinner)
 		sb.WriteString(" ")
 	}
 	return sb.String()
 }
 func (s *Spinner) start() {
 	s.ticker = time.NewTicker(100 * time.Millisecond)
 	for range s.ticker.C {
 		s.value = (s.value + 1) % len(s.parts)
 		if !s.stopped.IsZero() {
 			return
 		}
 	}
 }
 func (s *Spinner) Stop() {
 	if s.stopped.IsZero() {
 		s.stopped = time.Now()
 	}
 }
--- a/progressbar/LICENSE
+++ b/progressbar/LICENSE
@ -1,21 +0,0 @@
 MIT License
 Copyright (c) 2017 Zack
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/progressbar/README.md
+++ b/progressbar/README.md
@ -1,121 +0,0 @@
 # progressbar
 [![CI](https://github.com/schollz/progressbar/actions/workflows/ci.yml/badge.svg?branch=main&event=push)](https://github.com/schollz/progressbar/actions/workflows/ci.yml)
 [![go report card](https://goreportcard.com/badge/github.com/schollz/progressbar)](https://goreportcard.com/report/github.com/schollz/progressbar) 
 [![coverage](https://img.shields.io/badge/coverage-84%25-brightgreen.svg)](https://gocover.io/github.com/schollz/progressbar)
 [![godocs](https://godoc.org/github.com/schollz/progressbar?status.svg)](https://godoc.org/github.com/schollz/progressbar/v3) 
 A very simple thread-safe progress bar which should work on every OS without problems. I needed a progressbar for [croc](https://github.com/schollz/croc) and everything I tried had problems, so I made another one. In order to be OS agnostic I do not plan to support [multi-line outputs](https://github.com/schollz/progressbar/issues/6).
 ## Install
 ```
 go get -u github.com/schollz/progressbar/v3
 ```
 ## Usage 
 ### Basic usage
 ```golang
 bar := progressbar.Default(100)
 for i := 0; i < 100; i++ {
    bar.Add(1)
    time.Sleep(40 * time.Millisecond)
 }
 ```
 which looks like:
 ![Example of basic bar](examples/basic/basic.gif)
 ### I/O operations
 The `progressbar` implements an `io.Writer` so it can automatically detect the number of bytes written to a stream, so you can use it as a progressbar for an `io.Reader`.
 ```golang
 req, _ := http.NewRequest("GET", "https://dl.google.com/go/go1.14.2.src.tar.gz", nil)
 resp, _ := http.DefaultClient.Do(req)
 defer resp.Body.Close()
 f, _ := os.OpenFile("go1.14.2.src.tar.gz", os.O_CREATE|os.O_WRONLY, 0644)
 defer f.Close()
 bar := progressbar.DefaultBytes(
    resp.ContentLength,
    "downloading",
 )
 io.Copy(io.MultiWriter(f, bar), resp.Body)
 ```
 which looks like:
 ![Example of download bar](examples/download/download.gif)
 ### Progress bar with unknown length
 A progressbar with unknown length is a spinner. Any bar with -1 length will automatically convert it to a spinner with a customizable spinner type. For example, the above code can be run and set the `resp.ContentLength` to `-1`.
 which looks like:
 ![Example of download bar with unknown length](examples/download-unknown/download-unknown.gif)
 ### Customization
 There is a lot of customization that you can do - change the writer, the color, the width, description, theme, etc. See [all the options](https://pkg.go.dev/github.com/schollz/progressbar/v3?tab=doc#Option).
 ```golang
 bar := progressbar.NewOptions(1000,
    progressbar.OptionSetWriter(ansi.NewAnsiStdout()),
    progressbar.OptionEnableColorCodes(true),
    progressbar.OptionShowBytes(true),
    progressbar.OptionSetWidth(15),
    progressbar.OptionSetDescription("[cyan][1/3][reset] Writing moshable file..."),
    progressbar.OptionSetTheme(progressbar.Theme{
        Saucer:        "[green]=[reset]",
        SaucerHead:    "[green]>[reset]",
        SaucerPadding: " ",
        BarStart:      "[",
        BarEnd:        "]",
    }))
 for i := 0; i < 1000; i++ {
    bar.Add(1)
    time.Sleep(5 * time.Millisecond)
 }
 ```
 which looks like:
 ![Example of customized bar](examples/customization/customization.gif)
 ## Contributing
 Pull requests are welcome. Feel free to...
 - Revise documentation
 - Add new features
 - Fix bugs
 - Suggest improvements
 ## Thanks
 Thanks [@Dynom](https://github.com/dynom) for massive improvements in version 2.0!
 Thanks [@CrushedPixel](https://github.com/CrushedPixel) for adding descriptions and color code support!
 Thanks [@MrMe42](https://github.com/MrMe42) for adding some minor features!
 Thanks [@tehstun](https://github.com/tehstun) for some great PRs!
 Thanks [@Benzammour](https://github.com/Benzammour) and [@haseth](https://github.com/haseth) for helping create v3!
 Thanks [@briandowns](https://github.com/briandowns) for compiling the list of spinners.
 ## License
 MIT
--- a/progressbar/progressbar.go
+++ b/progressbar/progressbar.go
--- a/progressbar/spinners.go
+++ b/progressbar/spinners.go
@ -1,80 +0,0 @@
 package progressbar
 var spinners = map[int][]string{
 	0:  {"←", "↖", "↑", "↗", "→", "↘", "↓", "↙"},
 	1:  {"▁", "▃", "▄", "▅", "▆", "▇", "█", "▇", "▆", "▅", "▄", "▃", "▁"},
 	2:  {"▖", "▘", "▝", "▗"},
 	3:  {"┤", "┘", "┴", "└", "├", "┌", "┬", "┐"},
 	4:  {"◢", "◣", "◤", "◥"},
 	5:  {"◰", "◳", "◲", "◱"},
 	6:  {"◴", "◷", "◶", "◵"},
 	7:  {"◐", "◓", "◑", "◒"},
 	8:  {".", "o", "O", "@", "*"},
 	9:  {"|", "/", "-", "\\"},
 	10: {"◡◡", "⊙⊙", "◠◠"},
 	11: {"⣾", "⣽", "⣻", "⢿", "⡿", "⣟", "⣯", "⣷"},
 	12: {">))'>", " >))'>", "  >))'>", "   >))'>", "    >))'>", "   <'((<", "  <'((<", " <'((<"},
 	13: {"⠁", "⠂", "⠄", "⡀", "⢀", "⠠", "⠐", "⠈"},
 	14: {"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"},
 	15: {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"},
 	16: {"▉", "▊", "▋", "▌", "▍", "▎", "▏", "▎", "▍", "▌", "▋", "▊", "▉"},
 	17: {"■", "□", "▪", "▫"},
 	18: {"←", "↑", "→", "↓"},
 	19: {"╫", "╪"},
 	20: {"⇐", "⇖", "⇑", "⇗", "⇒", "⇘", "⇓", "⇙"},
 	21: {"⠁", "⠁", "⠉", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠤", "⠄", "⠄", "⠤", "⠠", "⠠", "⠤", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋", "⠉", "⠈", "⠈"},
 	22: {"⠈", "⠉", "⠋", "⠓", "⠒", "⠐", "⠐", "⠒", "⠖", "⠦", "⠤", "⠠", "⠠", "⠤", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋", "⠉", "⠈"},
 	23: {"⠁", "⠉", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠤", "⠄", "⠄", "⠤", "⠴", "⠲", "⠒", "⠂", "⠂", "⠒", "⠚", "⠙", "⠉", "⠁"},
 	24: {"⠋", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋"},
 	25: {"ｦ", "ｧ", "ｨ", "ｩ", "ｪ", "ｫ", "ｬ", "ｭ", "ｮ", "ｯ", "ｱ", "ｲ", "ｳ", "ｴ", "ｵ", "ｶ", "ｷ", "ｸ", "ｹ", "ｺ", "ｻ", "ｼ", "ｽ", "ｾ", "ｿ", "ﾀ", "ﾁ", "ﾂ", "ﾃ", "ﾄ", "ﾅ", "ﾆ", "ﾇ", "ﾈ", "ﾉ", "ﾊ", "ﾋ", "ﾌ", "ﾍ", "ﾎ", "ﾏ", "ﾐ", "ﾑ", "ﾒ", "ﾓ", "ﾔ", "ﾕ", "ﾖ", "ﾗ", "ﾘ", "ﾙ", "ﾚ", "ﾛ", "ﾜ", "ﾝ"},
 	26: {".", "..", "..."},
 	27: {"▁", "▂", "▃", "▄", "▅", "▆", "▇", "█", "▉", "▊", "▋", "▌", "▍", "▎", "▏", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█", "▇", "▆", "▅", "▄", "▃", "▂", "▁"},
 	28: {".", "o", "O", "°", "O", "o", "."},
 	29: {"+", "x"},
 	30: {"v", "<", "^", ">"},
 	31: {">>--->", " >>--->", "  >>--->", "   >>--->", "    >>--->", "    <---<<", "   <---<<", "  <---<<", " <---<<", "<---<<"},
 	32: {"|", "||", "|||", "||||", "|||||", "|||||||", "||||||||", "|||||||", "||||||", "|||||", "||||", "|||", "||", "|"},
 	33: {"[          ]", "[=         ]", "[==        ]", "[===       ]", "[====      ]", "[=====     ]", "[======    ]", "[=======   ]", "[========  ]", "[========= ]", "[==========]"},
 	34: {"(*---------)", "(-*--------)", "(--*-------)", "(---*------)", "(----*-----)", "(-----*----)", "(------*---)", "(-------*--)", "(--------*-)", "(---------*)"},
 	35: {"█▒▒▒▒▒▒▒▒▒", "███▒▒▒▒▒▒▒", "█████▒▒▒▒▒", "███████▒▒▒", "██████████"},
 	36: {"[                    ]", "[=>                  ]", "[===>                ]", "[=====>              ]", "[======>             ]", "[========>           ]", "[==========>         ]", "[============>       ]", "[==============>     ]", "[================>   ]", "[==================> ]", "[===================>]"},
 	37: {"ဝ", "၀"},
 	38: {"▌", "▀", "▐▄"},
 	39: {"🌍", "🌎", "🌏"},
 	40: {"◜", "◝", "◞", "◟"},
 	41: {"⬒", "⬔", "⬓", "⬕"},
 	42: {"⬖", "⬘", "⬗", "⬙"},
 	43: {"[>>>          >]", "[]>>>>        []", "[]  >>>>      []", "[]    >>>>    []", "[]      >>>>  []", "[]        >>>>[]", "[>>          >>]"},
 	44: {"♠", "♣", "♥", "♦"},
 	45: {"➞", "➟", "➠", "➡", "➠", "➟"},
 	46: {"  |  ", ` \   `, "_    ", ` \   `, "  |  ", "   / ", "    _", "   / "},
 	47: {"  . . . .", ".   . . .", ". .   . .", ". . .   .", ". . . .  ", ". . . . ."},
 	48: {" |     ", "  /    ", "   _   ", `    \  `, "     | ", `    \  `, "   _   ", "  /    "},
 	49: {"⎺", "⎻", "⎼", "⎽", "⎼", "⎻"},
 	50: {"▹▹▹▹▹", "▸▹▹▹▹", "▹▸▹▹▹", "▹▹▸▹▹", "▹▹▹▸▹", "▹▹▹▹▸"},
 	51: {"[    ]", "[   =]", "[  ==]", "[ ===]", "[====]", "[=== ]", "[==  ]", "[=   ]"},
 	52: {"( ●    )", "(  ●   )", "(   ●  )", "(    ● )", "(     ●)", "(    ● )", "(   ●  )", "(  ●   )", "( ●    )"},
 	53: {"✶", "✸", "✹", "✺", "✹", "✷"},
 	54: {"▐|\\____________▌", "▐_|\\___________▌", "▐__|\\__________▌", "▐___|\\_________▌", "▐____|\\________▌", "▐_____|\\_______▌", "▐______|\\______▌", "▐_______|\\_____▌", "▐________|\\____▌", "▐_________|\\___▌", "▐__________|\\__▌", "▐___________|\\_▌", "▐____________|\\▌", "▐____________/|▌", "▐___________/|_▌", "▐__________/|__▌", "▐_________/|___▌", "▐________/|____▌", "▐_______/|_____▌", "▐______/|______▌", "▐_____/|_______▌", "▐____/|________▌", "▐___/|_________▌", "▐__/|__________▌", "▐_/|___________▌", "▐/|____________▌"},
 	55: {"▐⠂       ▌", "▐⠈       ▌", "▐ ⠂      ▌", "▐ ⠠      ▌", "▐  ⡀     ▌", "▐  ⠠     ▌", "▐   ⠂    ▌", "▐   ⠈    ▌", "▐    ⠂   ▌", "▐    ⠠   ▌", "▐     ⡀  ▌", "▐     ⠠  ▌", "▐      ⠂ ▌", "▐      ⠈ ▌", "▐       ⠂▌", "▐       ⠠▌", "▐       ⡀▌", "▐      ⠠ ▌", "▐      ⠂ ▌", "▐     ⠈  ▌", "▐     ⠂  ▌", "▐    ⠠   ▌", "▐    ⡀   ▌", "▐   ⠠    ▌", "▐   ⠂    ▌", "▐  ⠈     ▌", "▐  ⠂     ▌", "▐ ⠠      ▌", "▐ ⡀      ▌", "▐⠠       ▌"},
 	56: {"¿", "?"},
 	57: {"⢹", "⢺", "⢼", "⣸", "⣇", "⡧", "⡗", "⡏"},
 	58: {"⢄", "⢂", "⢁", "⡁", "⡈", "⡐", "⡠"},
 	59: {".  ", ".. ", "...", " ..", "  .", "   "},
 	60: {".", "o", "O", "°", "O", "o", "."},
 	61: {"▓", "▒", "░"},
 	62: {"▌", "▀", "▐", "▄"},
 	63: {"⊶", "⊷"},
 	64: {"▪", "▫"},
 	65: {"□", "■"},
 	66: {"▮", "▯"},
 	67: {"-", "=", "≡"},
 	68: {"d", "q", "p", "b"},
 	69: {"∙∙∙", "●∙∙", "∙●∙", "∙∙●", "∙∙∙"},
 	70: {"🌑 ", "🌒 ", "🌓 ", "🌔 ", "🌕 ", "🌖 ", "🌗 ", "🌘 "},
 	71: {"☗", "☖"},
 	72: {"⧇", "⧆"},
 	73: {"◉", "◎"},
 	74: {"㊂", "㊀", "㊁"},
 	75: {"⦾", "⦿"},
 }
--- a/scripts/build_docker.sh
+++ b/scripts/build_docker.sh
@ -10,6 +10,8 @@ docker buildx build \
    --platform=linux/arm64,linux/amd64 \
    --build-arg=VERSION \
    --build-arg=GOFLAGS \
    --cache-from type=local,src=.cache \
    --cache-to type=local,dest=.cache \
    -f Dockerfile \
    -t ollama \
    .
--- a/scripts/push_docker.sh
+++ b/scripts/push_docker.sh
@ -10,6 +10,7 @@ docker buildx build \
    --platform=linux/arm64,linux/amd64 \
    --build-arg=VERSION \
    --build-arg=GOFLAGS \
    --cache-from type=local,src=.cache \
    -f Dockerfile \
    -t ollama/ollama -t ollama/ollama:$VERSION \
    .
--- a/server/download.go
+++ b/server/download.go
@ -7,6 +7,7 @@ import (
 	"fmt"
 	"io"
 	"log"
 	"math"
 	"net/http"
 	"net/url"
 	"os"
@ -53,8 +54,8 @@ type blobDownloadPart struct {
 const (
 	numDownloadParts          = 64
-	minDownloadPartSize int64 = 32 * 1000 * 1000
+	minDownloadPartSize int64 = 100 * format.MegaByte
-	maxDownloadPartSize int64 = 256 * 1000 * 1000
+	maxDownloadPartSize int64 = 1000 * format.MegaByte
 )
 func (p *blobDownloadPart) Name() string {
@ -147,7 +148,6 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *Regis
 			continue
 		}
 		i := i
 		g.Go(func() error {
 			var err error
 			for try := 0; try < maxRetries; try++ {
@ -158,12 +158,11 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *Regis
 					// return immediately if the context is canceled or the device is out of space
 					return err
 				case err != nil:
-					log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], i, try, err)
+					sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
 					log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
 					time.Sleep(sleep)
 					continue
 				default:
 					if try > 0 {
 						log.Printf("%s part %d completed after %d retries", b.Digest[7:19], i, try)
 					}
 					return nil
 				}
 			}
@ -285,7 +284,7 @@ func (b *blobDownload) Wait(ctx context.Context, fn func(api.ProgressResponse))
 		}
 		fn(api.ProgressResponse{
-			Status:    fmt.Sprintf("downloading %s", b.Digest),
+			Status:    fmt.Sprintf("pulling %s", b.Digest[7:19]),
 			Digest:    b.Digest,
 			Total:     b.Total,
 			Completed: b.Completed.Load(),
@ -304,7 +303,7 @@ type downloadOpts struct {
 	fn      func(api.ProgressResponse)
 }
-const maxRetries = 3
+const maxRetries = 6
 var errMaxRetriesExceeded = errors.New("max retries exceeded")
@ -322,7 +321,7 @@ func downloadBlob(ctx context.Context, opts downloadOpts) error {
 		return err
 	default:
 		opts.fn(api.ProgressResponse{
-			Status:    fmt.Sprintf("downloading %s", opts.digest),
+			Status:    fmt.Sprintf("pulling %s", opts.digest[7:19]),
 			Digest:    opts.digest,
 			Total:     fi.Size(),
 			Completed: fi.Size(),
--- a/server/images.go
+++ b/server/images.go
@ -228,26 +228,6 @@ func GetModel(name string) (*Model, error) {
 	return model, nil
 }
 func filenameWithPath(path, f string) (string, error) {
 	// if filePath starts with ~/, replace it with the user's home directory.
 	if strings.HasPrefix(f, fmt.Sprintf("~%s", string(os.PathSeparator))) {
 		parts := strings.Split(f, string(os.PathSeparator))
 		home, err := os.UserHomeDir()
 		if err != nil {
 			return "", fmt.Errorf("failed to open file: %v", err)
 		}
 		f = filepath.Join(home, filepath.Join(parts[1:]...))
 	}
 	// if filePath is not an absolute path, make it relative to the modelfile path
 	if !filepath.IsAbs(f) {
 		f = filepath.Join(filepath.Dir(path), f)
 	}
 	return f, nil
 }
 func realpath(p string) string {
 	abspath, err := filepath.Abs(p)
 	if err != nil {
@ -1146,43 +1126,49 @@ func GetSHA256Digest(r io.Reader) (string, int64) {
 var errUnauthorized = fmt.Errorf("unauthorized")
 func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.ReadSeeker, regOpts *RegistryOptions) (*http.Response, error) {
-	lastErr := errMaxRetriesExceeded
+	resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
-	for try := 0; try < maxRetries; try++ {
+	if err != nil {
-		resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
+		if !errors.Is(err, context.Canceled) {
-		if err != nil {
+			log.Printf("request failed: %v", err)
 			log.Printf("couldn't start upload: %v", err)
 			return nil, err
 		}
-		switch {
+		return nil, err
-		case resp.StatusCode == http.StatusUnauthorized:
+	}
-			auth := resp.Header.Get("www-authenticate")
+
-			authRedir := ParseAuthRedirectString(auth)
+	switch {
-			token, err := getAuthToken(ctx, authRedir)
+	case resp.StatusCode == http.StatusUnauthorized:
 		// Handle authentication error with one retry
 		auth := resp.Header.Get("www-authenticate")
 		authRedir := ParseAuthRedirectString(auth)
 		token, err := getAuthToken(ctx, authRedir)
 		if err != nil {
 			return nil, err
 		}
 		regOpts.Token = token
 		if body != nil {
 			_, err = body.Seek(0, io.SeekStart)
 			if err != nil {
 				return nil, err
 			}
 			regOpts.Token = token
 			if body != nil {
 				body.Seek(0, io.SeekStart)
 			}
 			lastErr = errUnauthorized
 		case resp.StatusCode == http.StatusNotFound:
 			return nil, os.ErrNotExist
 		case resp.StatusCode >= http.StatusBadRequest:
 			body, err := io.ReadAll(resp.Body)
 			if err != nil {
 				return nil, fmt.Errorf("%d: %s", resp.StatusCode, err)
 			}
 			return nil, fmt.Errorf("%d: %s", resp.StatusCode, body)
 		default:
 			return resp, nil
 		}
 		resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
 		if resp.StatusCode == http.StatusUnauthorized {
 			return nil, errUnauthorized
 		}
 		return resp, err
 	case resp.StatusCode == http.StatusNotFound:
 		return nil, os.ErrNotExist
 	case resp.StatusCode >= http.StatusBadRequest:
 		responseBody, err := io.ReadAll(resp.Body)
 		if err != nil {
 			return nil, fmt.Errorf("%d: %s", resp.StatusCode, err)
 		}
 		return nil, fmt.Errorf("%d: %s", resp.StatusCode, responseBody)
 	}
-	return nil, lastErr
+	return resp, nil
 }
 func makeRequest(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.Reader, regOpts *RegistryOptions) (*http.Response, error) {
--- a/server/routes.go
+++ b/server/routes.go
@ -666,8 +666,14 @@ func HeadBlobHandler(c *gin.Context) {
 }
 func CreateBlobHandler(c *gin.Context) {
 	targetPath, err := GetBlobsPath(c.Param("digest"))
 	if err != nil {
 		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
 	}
 	hash := sha256.New()
-	temp, err := os.CreateTemp("", c.Param("digest"))
+	temp, err := os.CreateTemp(filepath.Dir(targetPath), c.Param("digest")+"-")
 	if err != nil {
 		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
@ -690,12 +696,6 @@ func CreateBlobHandler(c *gin.Context) {
 		return
 	}
 	targetPath, err := GetBlobsPath(c.Param("digest"))
 	if err != nil {
 		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
 	}
 	if err := os.Rename(temp.Name(), targetPath); err != nil {
 		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
@ -794,7 +794,7 @@ func Serve(ln net.Listener, allowOrigins []string) error {
 	if runtime.GOOS == "linux" {
 		// check compatibility to log warnings
 		if _, err := llm.CheckVRAM(); err != nil {
-			log.Printf("Warning: GPU support may not be enabled, check you have installed GPU drivers: %v", err)
+			log.Printf(err.Error())
 		}
 	}
--- a/server/upload.go
+++ b/server/upload.go
@ -5,9 +5,9 @@ import (
 	"crypto/md5"
 	"errors"
 	"fmt"
 	"hash"
 	"io"
 	"log"
 	"math"
 	"net/http"
 	"net/url"
 	"os"
@ -35,6 +35,8 @@ type blobUpload struct {
 	context.CancelFunc
 	file *os.File
 	done       bool
 	err        error
 	references atomic.Int32
@ -42,8 +44,8 @@ type blobUpload struct {
 const (
 	numUploadParts          = 64
-	minUploadPartSize int64 = 95 * 1000 * 1000
+	minUploadPartSize int64 = 100 * format.MegaByte
-	maxUploadPartSize int64 = 1000 * 1000 * 1000
+	maxUploadPartSize int64 = 1000 * format.MegaByte
 )
 func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *RegistryOptions) error {
@ -128,12 +130,12 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
 		return
 	}
-	f, err := os.Open(p)
+	b.file, err = os.Open(p)
 	if err != nil {
 		b.err = err
 		return
 	}
-	defer f.Close()
+	defer b.file.Close()
 	g, inner := errgroup.WithContext(ctx)
 	g.SetLimit(numUploadParts)
@ -145,7 +147,6 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
 			g.Go(func() error {
 				var err error
 				for try := 0; try < maxRetries; try++ {
 					part.ReadSeeker = io.NewSectionReader(f, part.Offset, part.Size)
 					err = b.uploadChunk(inner, http.MethodPatch, requestURL, part, opts)
 					switch {
 					case errors.Is(err, context.Canceled):
@ -153,7 +154,10 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
 					case errors.Is(err, errMaxRetriesExceeded):
 						return err
 					case err != nil:
-						log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], part.N, try, err)
+						part.Reset()
 						sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
 						log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
 						time.Sleep(sleep)
 						continue
 					}
@ -173,8 +177,16 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
 	requestURL := <-b.nextURL
 	var sb strings.Builder
 	// calculate md5 checksum and add it to the commit request
 	for _, part := range b.Parts {
-		sb.Write(part.Sum(nil))
+		hash := md5.New()
 		if _, err := io.Copy(hash, io.NewSectionReader(b.file, part.Offset, part.Size)); err != nil {
 			b.err = err
 			return
 		}
 		sb.Write(hash.Sum(nil))
 	}
 	md5sum := md5.Sum([]byte(sb.String()))
@ -188,29 +200,39 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
 	headers.Set("Content-Type", "application/octet-stream")
 	headers.Set("Content-Length", "0")
-	resp, err := makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, nil, opts)
+	for try := 0; try < maxRetries; try++ {
-	if err != nil {
+		resp, err := makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, nil, opts)
-		b.err = err
+		if err != nil {
 			b.err = err
 			if errors.Is(err, context.Canceled) {
 				return
 			}
 			sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
 			log.Printf("%s complete upload attempt %d failed: %v, retrying in %s", b.Digest[7:19], try, err, sleep)
 			time.Sleep(sleep)
 			continue
 		}
 		defer resp.Body.Close()
 		b.err = nil
 		b.done = true
 		return
 	}
 	defer resp.Body.Close()
 	b.done = true
 }
 func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL *url.URL, part *blobUploadPart, opts *RegistryOptions) error {
 	part.Reset()
 	headers := make(http.Header)
 	headers.Set("Content-Type", "application/octet-stream")
 	headers.Set("Content-Length", fmt.Sprintf("%d", part.Size))
 	headers.Set("X-Redirect-Uploads", "1")
 	if method == http.MethodPatch {
 		headers.Set("X-Redirect-Uploads", "1")
 		headers.Set("Content-Range", fmt.Sprintf("%d-%d", part.Offset, part.Offset+part.Size-1))
 	}
-	resp, err := makeRequest(ctx, method, requestURL, headers, io.TeeReader(part.ReadSeeker, io.MultiWriter(part, part.Hash)), opts)
+	sr := io.NewSectionReader(b.file, part.Offset, part.Size)
 	resp, err := makeRequest(ctx, method, requestURL, headers, io.TeeReader(sr, part), opts)
 	if err != nil {
 		return err
 	}
@ -235,6 +257,7 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
 			return err
 		}
 		// retry uploading to the redirect URL
 		for try := 0; try < maxRetries; try++ {
 			err = b.uploadChunk(ctx, http.MethodPut, redirectURL, part, nil)
 			switch {
@ -243,7 +266,10 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
 			case errors.Is(err, errMaxRetriesExceeded):
 				return err
 			case err != nil:
-				log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], part.N, try, err)
+				part.Reset()
 				sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
 				log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
 				time.Sleep(sleep)
 				continue
 			}
@ -301,7 +327,7 @@ func (b *blobUpload) Wait(ctx context.Context, fn func(api.ProgressResponse)) er
 		}
 		fn(api.ProgressResponse{
-			Status:    fmt.Sprintf("uploading %s", b.Digest),
+			Status:    fmt.Sprintf("pushing %s", b.Digest[7:19]),
 			Digest:    b.Digest,
 			Total:     b.Total,
 			Completed: b.Completed.Load(),
@ -315,14 +341,10 @@ func (b *blobUpload) Wait(ctx context.Context, fn func(api.ProgressResponse)) er
 type blobUploadPart struct {
 	// N is the part number
-	N      int
+	N       int
-	Offset int64
+	Offset  int64
-	Size   int64
+	Size    int64
 	hash.Hash
 	written int64
 	io.ReadSeeker
 	*blobUpload
 }
@ -334,10 +356,8 @@ func (p *blobUploadPart) Write(b []byte) (n int, err error) {
 }
 func (p *blobUploadPart) Reset() {
 	p.Seek(0, io.SeekStart)
 	p.Completed.Add(-int64(p.written))
 	p.written = 0
 	p.Hash = md5.New()
 }
 func uploadBlob(ctx context.Context, mp ModelPath, layer *Layer, opts *RegistryOptions, fn func(api.ProgressResponse)) error {
@ -352,7 +372,7 @@ func uploadBlob(ctx context.Context, mp ModelPath, layer *Layer, opts *RegistryO
 	default:
 		defer resp.Body.Close()
 		fn(api.ProgressResponse{
-			Status:    fmt.Sprintf("uploading %s", layer.Digest),
+			Status:    fmt.Sprintf("pushing %s", layer.Digest[7:19]),
 			Digest:    layer.Digest,
 			Total:     layer.Size,
 			Completed: layer.Size,