Merge branch 'main' into patch-1

2023-11-04 19:12:18 -05:00 · 2023-11-04 19:12:18 -05:00 · 6febde7200
commit 6febde7200
parent 96da0792e6 325cfcd9ff
25 changed files with 580 additions and 2301 deletions
--- a/README.md
+++ b/README.md
@ -29,8 +29,7 @@ curl https://ollama.ai/install.sh | sh

 ### Docker

-The official [Ollama Docker image `ollama/ollama`](https://hub.docker.com/r/ollama/ollama)
-is available on Docker Hub.
+The official [Ollama Docker image](https://hub.docker.com/r/ollama/ollama) `ollama/ollama` is available on Docker Hub.

 ## Quickstart

@ -235,6 +234,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
 - [LiteLLM](https://github.com/BerriAI/litellm)
 - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
+- [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)

 ### Plugins (Extensions)
 - [Raycast extension](https://github.com/MassimilianoPasquini97/raycast_ollama)
@ -245,5 +245,3 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)
 - [Dumbar](https://github.com/JerrySievert/Dumbar)

-
-
--- a/api/client.go
+++ b/api/client.go
@ -72,7 +72,7 @@ func ClientFromEnvironment() (*Client, error) {
 		},
 	}

-	mockRequest, err := http.NewRequest("HEAD", client.base.String(), nil)
+	mockRequest, err := http.NewRequest(http.MethodHead, client.base.String(), nil)
 	if err != nil {
 		return nil, err
 	}
--- a/api/types.go
+++ b/api/types.go
@ -293,7 +293,7 @@ func DefaultOptions() Options {
 	return Options{
 		// options set on request to runner
 		NumPredict:       -1,
-		NumKeep:          -1,
+		NumKeep:          0,
 		Temperature:      0.8,
 		TopK:             40,
 		TopP:             0.9,
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@ -11,6 +11,7 @@ import (
 	"io"
 	"log"
 	"net"
+	"net/http"
 	"os"
 	"os/exec"
 	"os/signal"
@ -98,19 +99,16 @@ func RunHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}

-	models, err := client.List(context.Background())
-	if err != nil {
-		return err
-	}
-
-	canonicalModelPath := server.ParseModelPath(args[0])
-	for _, model := range models.Models {
-		if model.Name == canonicalModelPath.GetShortTagname() {
-			return RunGenerate(cmd, args)
+	name := args[0]
+	// check if the model exists on the server
+	_, err = client.Show(context.Background(), &api.ShowRequest{Name: name})
+	var statusError api.StatusError
+	switch {
+	case errors.As(err, &statusError) && statusError.StatusCode == http.StatusNotFound:
+		if err := PullHandler(cmd, args); err != nil {
+			return err
 		}
-	}
-
-	if err := PullHandler(cmd, args); err != nil {
+	case err != nil:
 		return err
 	}

@ -731,21 +729,6 @@ func RunServer(cmd *cobra.Command, _ []string) error {
 		origins = strings.Split(o, ",")
 	}

-	if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
-		if err := server.PruneLayers(); err != nil {
-			return err
-		}
-
-		manifestsPath, err := server.GetManifestPath()
-		if err != nil {
-			return err
-		}
-
-		if err := server.PruneDirectory(manifestsPath); err != nil {
-			return err
-		}
-	}
-
 	return server.Serve(ln, origins)
 }

--- a/docs/api.md
+++ b/docs/api.md
@ -45,9 +45,11 @@ Advanced parameters (optional):
 - `system`: system prompt to (overrides what is defined in the `Modelfile`)
 - `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
 - `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
- `stream`: if `false` the response will be be returned as a single response object, rather than a stream of objects
+- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects

-### Request
+### Examples
+
+#### Request

 ```shell
 curl -X POST http://localhost:11434/api/generate -d '{
@ -56,9 +58,9 @@ curl -X POST http://localhost:11434/api/generate -d '{
 }'
 ```

-### Response
+#### Response

-A stream of JSON objects:
+A stream of JSON objects is returned:

 ```json
 {
@ -102,6 +104,38 @@ To calculate how fast the response is generated in tokens per second (token/s),
 }
 ```

+#### Request
+
+```shell
+curl -X POST http://localhost:11434/api/generate -d '{
+  "model": "llama2:7b",
+  "prompt": "Why is the sky blue?",
+  "stream": false
+}'
+```
+
+#### Response
+
+If `stream` is set to `false`, the response will be a single JSON object:
+
+```json
+{
+  "model": "llama2:7b",
+  "created_at": "2023-08-04T19:22:45.499127Z",
+  "response": "The sky is blue because it is the color of the sky.",
+  "context": [1, 2, 3],
+  "done": true,
+  "total_duration": 5589157167,
+  "load_duration": 3013701500,
+  "sample_count": 114,
+  "sample_duration": 81442000,
+  "prompt_eval_count": 46,
+  "prompt_eval_duration": 1160282000,
+  "eval_count": 13,
+  "eval_duration": 1325948000
+}
+```
+
 ## Create a Model

 ```shell
@ -114,9 +148,11 @@ Create a model from a [`Modelfile`](./modelfile.md)

 - `name`: name of the model to create
 - `path`: path to the Modelfile
- `stream`: (optional) if `false` the response will be be returned as a single response object, rather than a stream of objects
+- `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects

-### Request
+### Examples
+
+#### Request

 ```shell
 curl -X POST http://localhost:11434/api/create -d '{
@ -125,7 +161,7 @@ curl -X POST http://localhost:11434/api/create -d '{
 }'
 ```

-### Response
+#### Response

 A stream of JSON objects. When finished, `status` is `success`.

@ -143,13 +179,17 @@ GET /api/tags

 List models that are available locally.

-### Request
+### Examples
+
+#### Request

 ```shell
 curl http://localhost:11434/api/tags
 ```

-### Response
+#### Response
+
+A single JSON object will be returned.

 ```json
 {
@ -180,7 +220,9 @@ Show details about a model including modelfile, template, parameters, license, a

 - `name`: name of the model to show

-### Request
+### Examples
+
+#### Request

 ```shell
 curl http://localhost:11434/api/show -d '{
@ -188,7 +230,7 @@ curl http://localhost:11434/api/show -d '{
 }'
 ```

-### Response
+#### Response

 ```json
 {
@ -207,7 +249,9 @@ POST /api/copy

 Copy a model. Creates a model with another name from an existing model.

-### Request
+### Examples
+
+#### Request

 ```shell
 curl http://localhost:11434/api/copy -d '{
@ -216,6 +260,10 @@ curl http://localhost:11434/api/copy -d '{
 }'
 ```

+#### Response
+
+The only response is a 200 OK if successful.
+
 ## Delete a Model

 ```shell
@ -226,9 +274,11 @@ Delete a model and its data.

 ### Parameters

- `model`: model name to delete
+- `name`: model name to delete

-### Request
+### Examples
+
+#### Request

 ```shell
 curl -X DELETE http://localhost:11434/api/delete -d '{
@ -236,6 +286,10 @@ curl -X DELETE http://localhost:11434/api/delete -d '{
 }'
 ```

+#### Response
+
+If successful, the only response is a 200 OK.
+
 ## Pull a Model

 ```shell
@ -248,9 +302,11 @@ Download a model from the ollama library. Cancelled pulls are resumed from where

 - `name`: name of the model to pull
 - `insecure`: (optional) allow insecure connections to the library. Only use this if you are pulling from your own library during development.
- `stream`: (optional) if `false` the response will be be returned as a single response object, rather than a stream of objects
+- `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects

-### Request
+### Examples
+
+#### Request

 ```shell
 curl -X POST http://localhost:11434/api/pull -d '{
@ -258,13 +314,51 @@ curl -X POST http://localhost:11434/api/pull -d '{
 }'
 ```

-### Response
+#### Response
+
+If `stream` is not specified, or set to `true`, a stream of JSON objects is returned:
+
+The first object is the manifest:
+
+```json
+{
+  "status": "pulling manifest"
+}
+```
+
+Then there is a series of downloading responses. Until any of the download is completed, the `completed` key may not be included. The number of files to be downloaded depends on the number of layers specified in the manifest.

 ```json
 {
  "status": "downloading digestname",
  "digest": "digestname",
-  "total": 2142590208
+  "total": 2142590208,
+  "completed": 241970
+}
+```
+
+After all the files are downloaded, the final responses are:
+
+```json
+{
+    "status": "verifying sha256 digest"
+}
+{
+    "status": "writing manifest"
+}
+{
+    "status": "removing any unused layers"
+}
+{
+    "status": "success"
+}
+```
+
+if `stream` is set to false, then the response is a single JSON object:
+
+```json
+{
+  "status": "success"
 }
 ```

@ -280,9 +374,11 @@ Upload a model to a model library. Requires registering for ollama.ai and adding

 - `name`: name of the model to push in the form of `<namespace>/<model>:<tag>`
 - `insecure`: (optional) allow insecure connections to the library. Only use this if you are pushing to your library during development.
- `stream`: (optional) if `false` the response will be be returned as a single response object, rather than a stream of objects
+- `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects

-### Request
+### Examples
+
+#### Request

 ```shell
 curl -X POST http://localhost:11434/api/push -d '{
@ -290,9 +386,9 @@ curl -X POST http://localhost:11434/api/push -d '{
 }'
 ```

-### Response
+#### Response

-Streaming response that starts with:
+If `stream` is not specified, or set to `true`, a stream of JSON objects is returned:

 ```json
 { "status": "retrieving manifest" }
@ -325,6 +421,12 @@ Finally, when the upload is complete:
 {"status":"success"}
 ```

+If `stream` is set to `false`, then the response is a single JSON object:
+
+```json
+{ "status": "success" }
+```
+
 ## Generate Embeddings

 ```shell
@ -342,7 +444,9 @@ Advanced parameters:

 - `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`

-### Request
+### Examples
+
+#### Request

 ```shell
 curl -X POST http://localhost:11434/api/embeddings -d '{
@ -351,7 +455,7 @@ curl -X POST http://localhost:11434/api/embeddings -d '{
 }'
 ```

-### Response
+#### Response

 ```json
 {
--- a/docs/import.md
+++ b/docs/import.md
@ -185,7 +185,7 @@ python convert.py <path to model directory>
 python convert-falcon-hf-to-gguf.py <path to model directory>

 # GPTNeoXForCausalLM
-python convert-falcon-hf-to-gguf.py <path to model directory>
+python convert-gptneox-hf-to-gguf.py <path to model directory>

 # GPTBigCodeForCausalLM
 python convert-starcoder-hf-to-gguf.py <path to model directory>
--- a/examples/langchain-python-rag-privategpt/constants.py
+++ b/examples/langchain-python-rag-privategpt/constants.py
@ -6,7 +6,6 @@ PERSIST_DIRECTORY = os.environ.get('PERSIST_DIRECTORY', 'db')

 # Define the Chroma settings
 CHROMA_SETTINGS = Settings(
-        chroma_db_impl='duckdb+parquet',
        persist_directory=PERSIST_DIRECTORY,
        anonymized_telemetry=False
 )
--- a/examples/langchain-python-rag-privategpt/ingest.py
+++ b/examples/langchain-python-rag-privategpt/ingest.py
@ -150,7 +150,7 @@ def main():
        print("Creating new vectorstore")
        texts = process_documents()
        print(f"Creating embeddings. May take some minutes...")
-        db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory, client_settings=CHROMA_SETTINGS)
+        db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory)
    db.persist()
    db = None

--- a/examples/langchain-python-rag-privategpt/privateGPT.py
+++ b/examples/langchain-python-rag-privategpt/privateGPT.py
@ -4,6 +4,7 @@ from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 from langchain.vectorstores import Chroma
 from langchain.llms import Ollama
+import chromadb
 import os
 import argparse
 import time
@ -22,7 +23,9 @@ def main():
    # Parse the command line arguments
    args = parse_arguments()
    embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)
-    db = Chroma(persist_directory=persist_directory, embedding_function=embeddings, client_settings=CHROMA_SETTINGS)
+
+    db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
+
    retriever = db.as_retriever(search_kwargs={"k": target_source_chunks})
    # activate/deactivate the streaming StdOut callback for LLMs
    callbacks = [] if args.mute_stream else [StreamingStdOutCallbackHandler()]
--- a/examples/langchain-python-rag-privategpt/requirements.txt
+++ b/examples/langchain-python-rag-privategpt/requirements.txt
--- a/go.mod
+++ b/go.mod
@ -11,7 +11,6 @@ require (
 	github.com/olekukonko/tablewriter v0.0.5
 	github.com/spf13/cobra v1.7.0
 	golang.org/x/sync v0.3.0
-	gonum.org/v1/gonum v0.14.0
 )

 require github.com/rivo/uniseg v0.2.0 // indirect
--- a/go.sum
+++ b/go.sum
@ -140,8 +140,6 @@ golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
 golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-gonum.org/v1/gonum v0.14.0 h1:2NiG67LD1tEH0D7kM+ps2V+fXmsAnpUeec7n8tcr4S0=
-gonum.org/v1/gonum v0.14.0/go.mod h1:AoWeoz0becf9QMWtE8iWXNXc27fK4fNeHNf/oMejGfU=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
 google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=
--- a/llm/llama.go
+++ b/llm/llama.go
@ -306,13 +306,19 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
 	params := []string{
 		"--model", model,
 		"--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
-		"--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase),
-		"--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale),
 		"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
 		"--n-gpu-layers", fmt.Sprintf("%d", numGPU),
 		"--embedding",
 	}

+	if opts.RopeFrequencyBase > 0 {
+		params = append(params, "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase))
+	}
+
+	if opts.RopeFrequencyScale > 0 {
+		params = append(params, "--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale))
+	}
+
 	if opts.NumGQA > 0 {
 		params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA))
 	}
@ -360,7 +366,15 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
 			runner.Path,
 			append(params, "--port", strconv.Itoa(port))...,
 		)
-		cmd.Env = append(os.Environ(), fmt.Sprintf("LD_LIBRARY_PATH=%s", filepath.Dir(runner.Path)))
+
+		var libraryPaths []string
+		if libraryPath, ok := os.LookupEnv("LD_LIBRARY_PATH"); ok {
+			libraryPaths = append(libraryPaths, libraryPath)
+		}
+
+		libraryPaths = append(libraryPaths, filepath.Dir(runner.Path))
+
+		cmd.Env = append(os.Environ(), fmt.Sprintf("LD_LIBRARY_PATH=%s", strings.Join(libraryPaths, ":")))
 		cmd.Stdout = os.Stderr
 		statusWriter := NewStatusWriter()
 		cmd.Stderr = statusWriter
--- a/llm/llm.go
+++ b/llm/llm.go
@ -85,7 +85,10 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error

 	switch ggml.Name() {
 	case "gguf":
-		opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions
+		// TODO: gguf will load these options automatically from the model binary
+		opts.NumGQA = 0
+		opts.RopeFrequencyBase = 0.0
+		opts.RopeFrequencyScale = 0.0
 		return newLlama(model, adapters, chooseRunners(workDir, "gguf"), ggml.NumLayers(), opts)
 	case "ggml", "ggmf", "ggjt", "ggla":
 		return newLlama(model, adapters, chooseRunners(workDir, "ggml"), ggml.NumLayers(), opts)
--- a/readline/buffer.go
+++ b/readline/buffer.go
@ -2,6 +2,7 @@ package readline

 import (
 	"fmt"
+	"os"

 	"github.com/emirpasic/gods/lists/arraylist"
 	"golang.org/x/term"
@ -17,7 +18,8 @@ type Buffer struct {
 }

 func NewBuffer(prompt *Prompt) (*Buffer, error) {
-	width, height, err := term.GetSize(0)
+	fd := int(os.Stdout.Fd())
+	width, height, err := term.GetSize(fd)
 	if err != nil {
 		fmt.Println("Error getting size:", err)
 		return nil, err
--- a/readline/readline.go
+++ b/readline/readline.go
@ -51,11 +51,12 @@ func (i *Instance) Readline() (string, error) {
 	}
 	fmt.Print(prompt)

-	termios, err := SetRawMode(syscall.Stdin)
+	fd := int(syscall.Stdin)
+	termios, err := SetRawMode(fd)
 	if err != nil {
 		return "", err
 	}
-	defer UnsetRawMode(syscall.Stdin, termios)
+	defer UnsetRawMode(fd, termios)

 	buf, _ := NewBuffer(i.Prompt)

--- a/readline/term_bsd.go
+++ b/readline/term_bsd.go
@ -1,4 +1,5 @@
 //go:build darwin || freebsd || netbsd || openbsd
+
 package readline

 import (
--- a/readline/term_linux.go
+++ b/readline/term_linux.go
@ -1,4 +1,5 @@
 //go:build linux || solaris
+
 package readline

 import (
--- a/readline/term_windows.go
+++ b/readline/term_windows.go
@ -0,0 +1,62 @@
+package readline
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+const (
+	enableLineInput       = 2
+	enableWindowInput     = 8
+	enableMouseInput      = 16
+	enableInsertMode      = 32
+	enableQuickEditMode   = 64
+	enableExtendedFlags   = 128
+	enableProcessedOutput = 1
+	enableWrapAtEolOutput = 2
+	enableAutoPosition    = 256 // Cursor position is not affected by writing data to the console.
+	enableEchoInput       = 4   // Characters are written to the console as they're read.
+	enableProcessedInput  = 1   // Enables input processing (like recognizing Ctrl+C).
+)
+
+var kernel32 = syscall.NewLazyDLL("kernel32.dll")
+
+var (
+	procGetConsoleMode = kernel32.NewProc("GetConsoleMode")
+	procSetConsoleMode = kernel32.NewProc("SetConsoleMode")
+)
+
+type State struct {
+	mode uint32
+}
+
+// IsTerminal checks if the given file descriptor is associated with a terminal
+func IsTerminal(fd int) bool {
+	var st uint32
+	r, _, e := syscall.SyscallN(procGetConsoleMode.Addr(), uintptr(fd), uintptr(unsafe.Pointer(&st)), 0)
+	// if the call succeeds and doesn't produce an error, it's a terminal
+	return r != 0 && e == 0
+}
+
+func SetRawMode(fd int) (*State, error) {
+	var st uint32
+	// retrieve the current mode of the terminal
+	_, _, e := syscall.SyscallN(procGetConsoleMode.Addr(), uintptr(fd), uintptr(unsafe.Pointer(&st)), 0)
+	if e != 0 {
+		return nil, error(e)
+	}
+	// modify the mode to set it to raw
+	raw := st &^ (enableEchoInput | enableProcessedInput | enableLineInput | enableProcessedOutput)
+	// apply the new mode to the terminal
+	_, _, e = syscall.SyscallN(procSetConsoleMode.Addr(), uintptr(fd), uintptr(raw), 0)
+	if e != 0 {
+		return nil, error(e)
+	}
+	// return the original state so that it can be restored later
+	return &State{st}, nil
+}
+
+func UnsetRawMode(fd int, state *State) error {
+	_, _, err := syscall.SyscallN(procSetConsoleMode.Addr(), uintptr(fd), uintptr(state.mode), 0)
+	return err
+}
--- a/scripts/install.sh
+++ b/scripts/install.sh
@ -63,7 +63,10 @@ status "Installing ollama to $BINDIR..."
 $SUDO install -o0 -g0 -m755 -d $BINDIR
 $SUDO install -o0 -g0 -m755 $TEMP_DIR/ollama $BINDIR/ollama

-install_success() { status 'Install complete. Run "ollama" from the command line.'; }
+install_success() { 
+    status 'The Ollama API is now available at 0.0.0.0:11434.'
+    status 'Install complete. Run "ollama" from the command line.'
+}
 trap install_success EXIT

 # Everything from this point onwards is optional.
@ -130,6 +133,7 @@ if check_gpu nvidia-smi; then
 fi

 if ! check_gpu lspci && ! check_gpu lshw; then
+    install_success
    warning "No NVIDIA GPU detected. Ollama will run in CPU-only mode."
    exit 0
 fi
--- a/server/auth.go
+++ b/server/auth.go
@ -91,7 +91,7 @@ func getAuthToken(ctx context.Context, redirData AuthRedirect) (string, error) {
 	}

 	s := SignatureData{
-		Method: "GET",
+		Method: http.MethodGet,
 		Path:   redirectURL.String(),
 		Data:   nil,
 	}
@ -103,7 +103,7 @@ func getAuthToken(ctx context.Context, redirData AuthRedirect) (string, error) {

 	headers := make(http.Header)
 	headers.Set("Authorization", sig)
-	resp, err := makeRequest(ctx, "GET", redirectURL, headers, nil, nil)
+	resp, err := makeRequest(ctx, http.MethodGet, redirectURL, headers, nil, nil)
 	if err != nil {
 		log.Printf("couldn't get token: %q", err)
 		return "", err
--- a/server/download.go
+++ b/server/download.go
@ -89,17 +89,12 @@ func (b *blobDownload) Prepare(ctx context.Context, requestURL *url.URL, opts *R
 	}

 	if len(b.Parts) == 0 {
-		resp, err := makeRequest(ctx, "HEAD", requestURL, nil, nil, opts)
+		resp, err := makeRequestWithRetry(ctx, http.MethodHead, requestURL, nil, nil, opts)
 		if err != nil {
 			return err
 		}
 		defer resp.Body.Close()

-		if resp.StatusCode >= http.StatusBadRequest {
-			body, _ := io.ReadAll(resp.Body)
-			return fmt.Errorf("registry responded with code %d: %v", resp.StatusCode, string(body))
-		}
-
 		b.Total, _ = strconv.ParseInt(resp.Header.Get("Content-Length"), 10, 64)

 		var size = b.Total / numDownloadParts
@ -134,7 +129,6 @@ func (b *blobDownload) Run(ctx context.Context, requestURL *url.URL, opts *Regis

 func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *RegistryOptions) error {
 	defer blobDownloadManager.Delete(b.Digest)
-
 	ctx, b.CancelFunc = context.WithCancel(ctx)

 	file, err := os.OpenFile(b.Name+"-partial", os.O_CREATE|os.O_RDWR, 0644)
@ -170,7 +164,7 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *Regis
 				}
 			}

-			return errors.New("max retries exceeded")
+			return errMaxRetriesExceeded
 		})
 	}

@ -200,7 +194,7 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *Regis
 func (b *blobDownload) downloadChunk(ctx context.Context, requestURL *url.URL, w io.Writer, part *blobDownloadPart, opts *RegistryOptions) error {
 	headers := make(http.Header)
 	headers.Set("Range", fmt.Sprintf("bytes=%d-%d", part.StartsAt(), part.StopsAt()-1))
-	resp, err := makeRequest(ctx, "GET", requestURL, headers, nil, opts)
+	resp, err := makeRequestWithRetry(ctx, http.MethodGet, requestURL, headers, nil, opts)
 	if err != nil {
 		return err
 	}
@ -308,6 +302,8 @@ type downloadOpts struct {

 const maxRetries = 3

+var errMaxRetriesExceeded = errors.New("max retries exceeded")
+
 // downloadBlob downloads a blob from the registry and stores it in the blobs directory
 func downloadBlob(ctx context.Context, opts downloadOpts) error {
 	fp, err := GetBlobsPath(opts.digest)
--- a/server/images.go
+++ b/server/images.go
@ -63,15 +63,11 @@ func (m *Model) Prompt(request api.GenerateRequest) (string, error) {
 		First  bool
 		System string
 		Prompt string
-
-		// deprecated: versions <= 0.0.7 used this to omit the system prompt
-		Context []int
 	}

 	vars.First = len(request.Context) == 0
 	vars.System = m.System
 	vars.Prompt = request.Prompt
-	vars.Context = request.Context

 	if request.System != "" {
 		vars.System = request.System
@ -981,46 +977,7 @@ func PushModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu
 	layers = append(layers, &manifest.Config)

 	for _, layer := range layers {
-		exists, err := checkBlobExistence(ctx, mp, layer.Digest, regOpts)
-		if err != nil {
-			return err
-		}
-
-		if exists {
-			fn(api.ProgressResponse{
-				Status:    "using existing layer",
-				Digest:    layer.Digest,
-				Total:     layer.Size,
-				Completed: layer.Size,
-			})
-			log.Printf("Layer %s already exists", layer.Digest)
-			continue
-		}
-
-		fn(api.ProgressResponse{
-			Status: "starting upload",
-			Digest: layer.Digest,
-			Total:  layer.Size,
-		})
-
-		location, chunkSize, err := startUpload(ctx, mp, layer, regOpts)
-		if err != nil {
-			log.Printf("couldn't start upload: %v", err)
-			return err
-		}
-
-		if strings.HasPrefix(filepath.Base(location.Path), "sha256:") {
-			layer.Digest = filepath.Base(location.Path)
-			fn(api.ProgressResponse{
-				Status:    "using existing layer",
-				Digest:    layer.Digest,
-				Total:     layer.Size,
-				Completed: layer.Size,
-			})
-			continue
-		}
-
-		if err := uploadBlob(ctx, location, layer, chunkSize, regOpts, fn); err != nil {
+		if err := uploadBlob(ctx, mp, layer, regOpts, fn); err != nil {
 			log.Printf("error uploading blob: %v", err)
 			return err
 		}
@ -1037,7 +994,7 @@ func PushModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu

 	headers := make(http.Header)
 	headers.Set("Content-Type", "application/vnd.docker.distribution.manifest.v2+json")
-	resp, err := makeRequestWithRetry(ctx, "PUT", requestURL, headers, bytes.NewReader(manifestJSON), regOpts)
+	resp, err := makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, bytes.NewReader(manifestJSON), regOpts)
 	if err != nil {
 		return err
 	}
@ -1159,22 +1116,12 @@ func pullModelManifest(ctx context.Context, mp ModelPath, regOpts *RegistryOptio

 	headers := make(http.Header)
 	headers.Set("Accept", "application/vnd.docker.distribution.manifest.v2+json")
-	resp, err := makeRequest(ctx, "GET", requestURL, headers, nil, regOpts)
+	resp, err := makeRequestWithRetry(ctx, http.MethodGet, requestURL, headers, nil, regOpts)
 	if err != nil {
-		log.Printf("couldn't get manifest: %v", err)
 		return nil, err
 	}
 	defer resp.Body.Close()

-	if resp.StatusCode >= http.StatusBadRequest {
-		if resp.StatusCode == http.StatusNotFound {
-			return nil, fmt.Errorf("model not found")
-		}
-
-		body, _ := io.ReadAll(resp.Body)
-		return nil, fmt.Errorf("on pull registry responded with code %d: %s", resp.StatusCode, body)
-	}
-
 	var m *ManifestV2
 	if err := json.NewDecoder(resp.Body).Decode(&m); err != nil {
 		return nil, err
@ -1218,24 +1165,7 @@ func GetSHA256Digest(r io.Reader) (string, int64) {
 	return fmt.Sprintf("sha256:%x", h.Sum(nil)), n
 }

-// Function to check if a blob already exists in the Docker registry
-func checkBlobExistence(ctx context.Context, mp ModelPath, digest string, regOpts *RegistryOptions) (bool, error) {
-	requestURL := mp.BaseURL()
-	requestURL = requestURL.JoinPath("v2", mp.GetNamespaceRepository(), "blobs", digest)
-
-	resp, err := makeRequest(ctx, "HEAD", requestURL, nil, nil, regOpts)
-	if err != nil {
-		log.Printf("couldn't check for blob: %v", err)
-		return false, err
-	}
-	defer resp.Body.Close()
-
-	// Check for success: If the blob exists, the Docker registry will respond with a 200 OK
-	return resp.StatusCode < http.StatusBadRequest, nil
-}
-
 func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.ReadSeeker, regOpts *RegistryOptions) (*http.Response, error) {
-	var status string
 	for try := 0; try < maxRetries; try++ {
 		resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
 		if err != nil {
@ -1243,8 +1173,6 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
 			return nil, err
 		}

-		status = resp.Status
-
 		switch {
 		case resp.StatusCode == http.StatusUnauthorized:
 			auth := resp.Header.Get("www-authenticate")
@ -1256,21 +1184,25 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR

 			regOpts.Token = token
 			if body != nil {
-				if _, err := body.Seek(0, io.SeekStart); err != nil {
-					return nil, err
-				}
+				body.Seek(0, io.SeekStart)
 			}

 			continue
+		case resp.StatusCode == http.StatusNotFound:
+			return nil, os.ErrNotExist
 		case resp.StatusCode >= http.StatusBadRequest:
-			body, _ := io.ReadAll(resp.Body)
-			return nil, fmt.Errorf("on upload registry responded with code %d: %s", resp.StatusCode, body)
+			body, err := io.ReadAll(resp.Body)
+			if err != nil {
+				return nil, fmt.Errorf("%d: %s", resp.StatusCode, err)
+			}
+
+			return nil, fmt.Errorf("%d: %s", resp.StatusCode, body)
 		default:
 			return resp, nil
 		}
 	}

-	return nil, fmt.Errorf("max retry exceeded: %v", status)
+	return nil, errMaxRetriesExceeded
 }

 func makeRequest(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.Reader, regOpts *RegistryOptions) (*http.Response, error) {
--- a/server/routes.go
+++ b/server/routes.go
@ -365,7 +365,9 @@ func PushModelHandler(c *gin.Context) {
 			Insecure: req.Insecure,
 		}

-		ctx := context.Background()
+		ctx, cancel := context.WithCancel(c.Request.Context())
+		defer cancel()
+
 		if err := PushModel(ctx, req.Name, regOpts, fn); err != nil {
 			ch <- gin.H{"error": err.Error()}
 		}
@ -614,6 +616,22 @@ var defaultAllowOrigins = []string{
 }

 func Serve(ln net.Listener, allowOrigins []string) error {
+	if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
+		// clean up unused layers and manifests
+		if err := PruneLayers(); err != nil {
+			return err
+		}
+
+		manifestsPath, err := GetManifestPath()
+		if err != nil {
+			return err
+		}
+
+		if err := PruneDirectory(manifestsPath); err != nil {
+			return err
+		}
+	}
+
 	config := cors.DefaultConfig()
 	config.AllowWildcard = true

@ -679,7 +697,7 @@ func Serve(ln net.Listener, allowOrigins []string) error {
 	if runtime.GOOS == "linux" {
 		// check compatibility to log warnings
 		if _, err := llm.CheckVRAM(); err != nil {
-			log.Printf("Warning: GPU support may not enabled, check you have installed install GPU drivers: %v", err)
+			log.Printf("Warning: GPU support may not be enabled, check you have installed GPU drivers: %v", err)
 		}
 	}

--- a/server/upload.go
+++ b/server/upload.go
@ -2,218 +2,367 @@ package server

 import (
 	"context"
+	"crypto/md5"
 	"errors"
 	"fmt"
+	"hash"
 	"io"
 	"log"
 	"net/http"
 	"net/url"
 	"os"
-	"strconv"
+	"strings"
 	"sync"
+	"sync/atomic"
+	"time"

 	"github.com/jmorganca/ollama/api"
+	"github.com/jmorganca/ollama/format"
+	"golang.org/x/sync/errgroup"
 )

+var blobUploadManager sync.Map
+
+type blobUpload struct {
+	*Layer
+
+	Total     int64
+	Completed atomic.Int64
+
+	Parts []blobUploadPart
+
+	nextURL chan *url.URL
+
+	context.CancelFunc
+
+	done       bool
+	err        error
+	references atomic.Int32
+}
+
+type blobUploadPart struct {
+	// N is the part number
+	N      int
+	Offset int64
+	Size   int64
+	hash.Hash
+}
+
 const (
-	redirectChunkSize int64 = 1024 * 1024 * 1024
-	regularChunkSize  int64 = 95 * 1024 * 1024
+	numUploadParts          = 64
+	minUploadPartSize int64 = 95 * 1000 * 1000
+	maxUploadPartSize int64 = 1000 * 1000 * 1000
 )

-func startUpload(ctx context.Context, mp ModelPath, layer *Layer, regOpts *RegistryOptions) (*url.URL, int64, error) {
-	requestURL := mp.BaseURL()
-	requestURL = requestURL.JoinPath("v2", mp.GetNamespaceRepository(), "blobs/uploads/")
-	if layer.From != "" {
+func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *RegistryOptions) error {
+	p, err := GetBlobsPath(b.Digest)
+	if err != nil {
+		return err
+	}
+
+	if b.From != "" {
 		values := requestURL.Query()
-		values.Add("mount", layer.Digest)
-		values.Add("from", layer.From)
+		values.Add("mount", b.Digest)
+		values.Add("from", b.From)
 		requestURL.RawQuery = values.Encode()
 	}

-	resp, err := makeRequestWithRetry(ctx, "POST", requestURL, nil, nil, regOpts)
+	resp, err := makeRequestWithRetry(ctx, http.MethodPost, requestURL, nil, nil, opts)
 	if err != nil {
-		log.Printf("couldn't start upload: %v", err)
-		return nil, 0, err
+		return err
 	}
 	defer resp.Body.Close()

 	location := resp.Header.Get("Docker-Upload-Location")
-	chunkSize := redirectChunkSize
 	if location == "" {
 		location = resp.Header.Get("Location")
-		chunkSize = regularChunkSize
 	}

-	locationURL, err := url.Parse(location)
+	fi, err := os.Stat(p)
 	if err != nil {
-		return nil, 0, err
+		return err
 	}

-	return locationURL, chunkSize, nil
+	b.Total = fi.Size()
+
+	var size = b.Total / numUploadParts
+	switch {
+	case size < minUploadPartSize:
+		size = minUploadPartSize
+	case size > maxUploadPartSize:
+		size = maxUploadPartSize
+	}
+
+	var offset int64
+	for offset < fi.Size() {
+		if offset+size > fi.Size() {
+			size = fi.Size() - offset
+		}
+
+		// set part.N to the current number of parts
+		b.Parts = append(b.Parts, blobUploadPart{N: len(b.Parts), Offset: offset, Size: size, Hash: md5.New()})
+		offset += size
+	}
+
+	log.Printf("uploading %s in %d %s part(s)", b.Digest[7:19], len(b.Parts), format.HumanBytes(b.Parts[0].Size))
+
+	requestURL, err = url.Parse(location)
+	if err != nil {
+		return err
+	}
+
+	b.nextURL = make(chan *url.URL, 1)
+	b.nextURL <- requestURL
+	return nil
 }

-func uploadBlob(ctx context.Context, requestURL *url.URL, layer *Layer, chunkSize int64, regOpts *RegistryOptions, fn func(api.ProgressResponse)) error {
-	// TODO allow resumability
-	// TODO allow canceling uploads via DELETE
+// Run uploads blob parts to the upstream. If the upstream supports redirection, parts will be uploaded
+// in parallel as defined by Prepare. Otherwise, parts will be uploaded serially. Run sets b.err on error.
+func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
+	defer blobUploadManager.Delete(b.Digest)
+	ctx, b.CancelFunc = context.WithCancel(ctx)

-	fp, err := GetBlobsPath(layer.Digest)
+	p, err := GetBlobsPath(b.Digest)
 	if err != nil {
-		return err
+		b.err = err
+		return
 	}

-	f, err := os.Open(fp)
+	f, err := os.Open(p)
 	if err != nil {
-		return err
+		b.err = err
+		return
 	}
 	defer f.Close()

-	pw := ProgressWriter{
-		status: fmt.Sprintf("uploading %s", layer.Digest),
-		digest: layer.Digest,
-		total:  layer.Size,
-		fn:     fn,
-	}
+	g, inner := errgroup.WithContext(ctx)
+	g.SetLimit(numUploadParts)
+	for i := range b.Parts {
+		part := &b.Parts[i]
+		select {
+		case <-inner.Done():
+		case requestURL := <-b.nextURL:
+			g.Go(func() error {
+				for try := 0; try < maxRetries; try++ {
+					r := io.NewSectionReader(f, part.Offset, part.Size)
+					err := b.uploadChunk(inner, http.MethodPatch, requestURL, r, part, opts)
+					switch {
+					case errors.Is(err, context.Canceled):
+						return err
+					case errors.Is(err, errMaxRetriesExceeded):
+						return err
+					case err != nil:
+						log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], part.N, try, err)
+						continue
+					}

-	for offset := int64(0); offset < layer.Size; {
-		chunk := layer.Size - offset
-		if chunk > chunkSize {
-			chunk = chunkSize
-		}
+					return nil
+				}

-		resp, err := uploadBlobChunk(ctx, http.MethodPatch, requestURL, f, offset, chunk, regOpts, &pw)
-		if err != nil {
-			fn(api.ProgressResponse{
-				Status:    fmt.Sprintf("error uploading chunk: %v", err),
-				Digest:    layer.Digest,
-				Total:     layer.Size,
-				Completed: offset,
+				return errMaxRetriesExceeded
 			})
-
-			return err
-		}
-
-		offset += chunk
-		location := resp.Header.Get("Docker-Upload-Location")
-		if location == "" {
-			location = resp.Header.Get("Location")
-		}
-
-		requestURL, err = url.Parse(location)
-		if err != nil {
-			return err
 		}
 	}

+	if err := g.Wait(); err != nil {
+		b.err = err
+		return
+	}
+
+	requestURL := <-b.nextURL
+
+	var sb strings.Builder
+	for _, part := range b.Parts {
+		sb.Write(part.Sum(nil))
+	}
+
+	md5sum := md5.Sum([]byte(sb.String()))
+
 	values := requestURL.Query()
-	values.Add("digest", layer.Digest)
+	values.Add("digest", b.Digest)
+	values.Add("etag", fmt.Sprintf("%x-%d", md5sum, len(b.Parts)))
 	requestURL.RawQuery = values.Encode()

 	headers := make(http.Header)
 	headers.Set("Content-Type", "application/octet-stream")
 	headers.Set("Content-Length", "0")

-	// finish the upload
-	resp, err := makeRequest(ctx, "PUT", requestURL, headers, nil, regOpts)
+	resp, err := makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, nil, opts)
+	if err != nil {
+		b.err = err
+		return
+	}
+	defer resp.Body.Close()
+
+	b.done = true
+}
+
+func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL *url.URL, rs io.ReadSeeker, part *blobUploadPart, opts *RegistryOptions) error {
+	headers := make(http.Header)
+	headers.Set("Content-Type", "application/octet-stream")
+	headers.Set("Content-Length", fmt.Sprintf("%d", part.Size))
+	headers.Set("X-Redirect-Uploads", "1")
+
+	if method == http.MethodPatch {
+		headers.Set("Content-Range", fmt.Sprintf("%d-%d", part.Offset, part.Offset+part.Size-1))
+	}
+
+	buw := blobUploadWriter{blobUpload: b}
+	resp, err := makeRequest(ctx, method, requestURL, headers, io.TeeReader(rs, io.MultiWriter(&buw, part.Hash)), opts)
 	if err != nil {
-		log.Printf("couldn't finish upload: %v", err)
 		return err
 	}
 	defer resp.Body.Close()

-	if resp.StatusCode >= http.StatusBadRequest {
-		body, _ := io.ReadAll(resp.Body)
-		return fmt.Errorf("on finish upload registry responded with code %d: %v", resp.StatusCode, string(body))
-	}
-	return nil
-}
-
-func uploadBlobChunk(ctx context.Context, method string, requestURL *url.URL, r io.ReaderAt, offset, limit int64, opts *RegistryOptions, pw *ProgressWriter) (*http.Response, error) {
-	sectionReader := io.NewSectionReader(r, offset, limit)
-
-	headers := make(http.Header)
-	headers.Set("Content-Type", "application/octet-stream")
-	headers.Set("Content-Length", strconv.Itoa(int(limit)))
-	headers.Set("X-Redirect-Uploads", "1")
-
-	if method == http.MethodPatch {
-		headers.Set("Content-Range", fmt.Sprintf("%d-%d", offset, offset+sectionReader.Size()-1))
+	location := resp.Header.Get("Docker-Upload-Location")
+	if location == "" {
+		location = resp.Header.Get("Location")
 	}

-	for try := 0; try < maxRetries; try++ {
-		resp, err := makeRequest(ctx, method, requestURL, headers, io.TeeReader(sectionReader, pw), opts)
-		if err != nil && !errors.Is(err, io.EOF) {
-			return nil, err
+	nextURL, err := url.Parse(location)
+	if err != nil {
+		return err
+	}
+
+	switch {
+	case resp.StatusCode == http.StatusTemporaryRedirect:
+		b.nextURL <- nextURL
+
+		redirectURL, err := resp.Location()
+		if err != nil {
+			return err
 		}
-		defer resp.Body.Close()

-		switch {
-		case resp.StatusCode == http.StatusTemporaryRedirect:
-			location, err := resp.Location()
-			if err != nil {
-				return nil, err
-			}
-
-			pw.completed = offset
-			if _, err := uploadBlobChunk(ctx, http.MethodPut, location, r, offset, limit, nil, pw); err != nil {
-				// retry
-				log.Printf("retrying redirected upload: %v", err)
+		for try := 0; try < maxRetries; try++ {
+			rs.Seek(0, io.SeekStart)
+			b.Completed.Add(-buw.written)
+			buw.written = 0
+			part.Hash = md5.New()
+			err := b.uploadChunk(ctx, http.MethodPut, redirectURL, rs, part, nil)
+			switch {
+			case errors.Is(err, context.Canceled):
+				return err
+			case errors.Is(err, errMaxRetriesExceeded):
+				return err
+			case err != nil:
+				log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], part.N, try, err)
 				continue
 			}

-			return resp, nil
-		case resp.StatusCode == http.StatusUnauthorized:
-			auth := resp.Header.Get("www-authenticate")
-			authRedir := ParseAuthRedirectString(auth)
-			token, err := getAuthToken(ctx, authRedir)
-			if err != nil {
-				return nil, err
-			}
-
-			opts.Token = token
-
-			pw.completed = offset
-			sectionReader = io.NewSectionReader(r, offset, limit)
-			continue
-		case resp.StatusCode >= http.StatusBadRequest:
-			body, _ := io.ReadAll(resp.Body)
-			return nil, fmt.Errorf("on upload registry responded with code %d: %s", resp.StatusCode, body)
+			return nil
 		}

-		return resp, nil
+		return errMaxRetriesExceeded
+
+	case resp.StatusCode == http.StatusUnauthorized:
+		auth := resp.Header.Get("www-authenticate")
+		authRedir := ParseAuthRedirectString(auth)
+		token, err := getAuthToken(ctx, authRedir)
+		if err != nil {
+			return err
+		}
+
+		opts.Token = token
+		fallthrough
+	case resp.StatusCode >= http.StatusBadRequest:
+		body, err := io.ReadAll(resp.Body)
+		if err != nil {
+			return err
+		}
+
+		rs.Seek(0, io.SeekStart)
+		b.Completed.Add(-buw.written)
+		buw.written = 0
+		return fmt.Errorf("http status %d %s: %s", resp.StatusCode, resp.Status, body)
 	}

-	return nil, fmt.Errorf("max retries exceeded")
+	if method == http.MethodPatch {
+		b.nextURL <- nextURL
+	}
+
+	return nil
 }

-type ProgressWriter struct {
-	status    string
-	digest    string
-	bucket    int64
-	completed int64
-	total     int64
-	fn        func(api.ProgressResponse)
-	mu        sync.Mutex
+func (b *blobUpload) acquire() {
+	b.references.Add(1)
 }

-func (pw *ProgressWriter) Write(b []byte) (int, error) {
-	pw.mu.Lock()
-	defer pw.mu.Unlock()
+func (b *blobUpload) release() {
+	if b.references.Add(-1) == 0 {
+		b.CancelFunc()
+	}
+}

-	n := len(b)
-	pw.bucket += int64(n)
+func (b *blobUpload) Wait(ctx context.Context, fn func(api.ProgressResponse)) error {
+	b.acquire()
+	defer b.release()

-	// throttle status updates to not spam the client
-	if pw.bucket >= 1024*1024 || pw.completed+pw.bucket >= pw.total {
-		pw.completed += pw.bucket
-		pw.fn(api.ProgressResponse{
-			Status:    pw.status,
-			Digest:    pw.digest,
-			Total:     pw.total,
-			Completed: pw.completed,
+	ticker := time.NewTicker(60 * time.Millisecond)
+	for {
+		select {
+		case <-ticker.C:
+		case <-ctx.Done():
+			return ctx.Err()
+		}
+
+		fn(api.ProgressResponse{
+			Status:    fmt.Sprintf("uploading %s", b.Digest),
+			Digest:    b.Digest,
+			Total:     b.Total,
+			Completed: b.Completed.Load(),
 		})

-		pw.bucket = 0
+		if b.done || b.err != nil {
+			return b.err
+		}
 	}
+}

+type blobUploadWriter struct {
+	written int64
+	*blobUpload
+}
+
+func (b *blobUploadWriter) Write(p []byte) (n int, err error) {
+	n = len(p)
+	b.written += int64(n)
+	b.Completed.Add(int64(n))
 	return n, nil
 }
+
+func uploadBlob(ctx context.Context, mp ModelPath, layer *Layer, opts *RegistryOptions, fn func(api.ProgressResponse)) error {
+	requestURL := mp.BaseURL()
+	requestURL = requestURL.JoinPath("v2", mp.GetNamespaceRepository(), "blobs", layer.Digest)
+
+	resp, err := makeRequestWithRetry(ctx, http.MethodHead, requestURL, nil, nil, opts)
+	switch {
+	case errors.Is(err, os.ErrNotExist):
+	case err != nil:
+		return err
+	default:
+		defer resp.Body.Close()
+		fn(api.ProgressResponse{
+			Status:    fmt.Sprintf("uploading %s", layer.Digest),
+			Digest:    layer.Digest,
+			Total:     layer.Size,
+			Completed: layer.Size,
+		})
+
+		return nil
+	}
+
+	data, ok := blobUploadManager.LoadOrStore(layer.Digest, &blobUpload{Layer: layer})
+	upload := data.(*blobUpload)
+	if !ok {
+		requestURL := mp.BaseURL()
+		requestURL = requestURL.JoinPath("v2", mp.GetNamespaceRepository(), "blobs/uploads/")
+		if err := upload.Prepare(ctx, requestURL, opts); err != nil {
+			blobUploadManager.Delete(layer.Digest)
+			return err
+		}
+
+		go upload.Run(context.Background(), opts)
+	}
+
+	return upload.Wait(ctx, fn)
+}