79 changed files with 262 additions and 458 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -1,2 +1 @@
 llm/ext_server/* linguist-vendored
-* text eol=lf
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@ -273,7 +273,7 @@ jobs:
        if: ${{ startsWith(matrix.os, 'macos-') }}
      - uses: golangci/golangci-lint-action@v6
        with:
-          args: --timeout 8m0s -v
+          args: --timeout 8m0s -v ${{ startsWith(matrix.os, 'windows-') && '' || '--disable gofmt --disable goimports' }}
  test:
    strategy:
      matrix:
--- a/.golangci.yaml
+++ b/.golangci.yaml
@ -7,32 +7,22 @@ linters:
    - bodyclose
    - containedctx
    - contextcheck
-    - errcheck
    - exportloopref
-    - gci
    - gocheckcompilerdirectives
-    - gofmt
-    - gofumpt
-    - gosimple
-    - govet
-    - ineffassign
+    # conditionally enable this on linux/macos
+    # - gofmt
+    # - goimports
    - intrange
-    - makezero
    - misspell
    - nilerr
    - nolintlint
    - nosprintfhostport
-    - staticcheck
-    - tenv
    - testifylint
    - unconvert
    - unused
-    - usestdlibvars
    - wastedassign
    - whitespace
-linters-settings:
-  gci:
-    sections: [standard, default, localmodule]
+    - usestdlibvars
 severity:
  default-severity: error
  rules:
--- a/README.md
+++ b/README.md
@ -54,7 +54,6 @@ Here are some example models that can be downloaded:
 | Llama 3.1          | 405B       | 231GB | `ollama run llama3.1:405b`     |
 | Phi 3 Mini         | 3.8B       | 2.3GB | `ollama run phi3`              |
 | Phi 3 Medium       | 14B        | 7.9GB | `ollama run phi3:medium`       |
-| Gemma 2            | 2B         | 1.6GB | `ollama run gemma2:2b`         |
 | Gemma 2            | 9B         | 5.5GB | `ollama run gemma2`            |
 | Gemma 2            | 27B        | 16GB  | `ollama run gemma2:27b`        |
 | Mistral            | 7B         | 4.1GB | `ollama run mistral`           |
@ -301,7 +300,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
 - [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)
 - [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac)
- [Harbor](https://github.com/av/harbor) (Containerized LLM Toolkit with Ollama as default backend)

 ### Terminal

--- a/api/client.go
+++ b/api/client.go
@ -18,7 +18,6 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
-	"errors"
 	"fmt"
 	"io"
 	"net/http"
@ -173,7 +172,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
 		}

 		if errorResponse.Error != "" {
-			return errors.New(errorResponse.Error)
+			return fmt.Errorf(errorResponse.Error)
 		}

 		if response.StatusCode >= http.StatusBadRequest {
--- a/api/types.go
+++ b/api/types.go
@ -231,6 +231,7 @@ type Options struct {

 // Runner options which must be set when the model is loaded into memory
 type Runner struct {
+	UseNUMA   bool  `json:"numa,omitempty"`
 	NumCtx    int   `json:"num_ctx,omitempty"`
 	NumBatch  int   `json:"num_batch,omitempty"`
 	NumGPU    int   `json:"num_gpu,omitempty"`
@ -614,6 +615,7 @@ func DefaultOptions() Options {
 			F16KV:     true,
 			UseMLock:  false,
 			UseMMap:   nil,
+			UseNUMA:   false,
 		},
 	}
 }
--- a/api/types_test.go
+++ b/api/types_test.go
@ -2,7 +2,7 @@ package api

 import (
 	"encoding/json"
-	"errors"
+	"fmt"
 	"math"
 	"testing"
 	"time"
@ -192,7 +192,7 @@ func TestUseMmapFormatParams(t *testing.T) {
 				"use_mmap": {"foo"},
 			},
 			exp: nil,
-			err: errors.New("invalid bool value [foo]"),
+			err: fmt.Errorf("invalid bool value [foo]"),
 		},
 	}

--- a/app/lifecycle/getstarted_nonwindows.go
+++ b/app/lifecycle/getstarted_nonwindows.go
@ -2,8 +2,8 @@

 package lifecycle

-import "errors"
+import "fmt"

 func GetStarted() error {
-	return errors.New("not implemented")
+	return fmt.Errorf("GetStarted not implemented")
 }
--- a/app/lifecycle/getstarted_windows.go
+++ b/app/lifecycle/getstarted_windows.go
@ -34,6 +34,7 @@ func GetStarted() error {
 		Sys:   &syscall.SysProcAttr{CreationFlags: CREATE_NEW_CONSOLE, HideWindow: false},
 	}
 	proc, err := os.StartProcess(args[0], args, attrs)
+
 	if err != nil {
 		return fmt.Errorf("unable to start getting started shell %w", err)
 	}
--- a/app/lifecycle/logging.go
+++ b/app/lifecycle/logging.go
@ -27,7 +27,7 @@ func InitLogging() {
 		// TODO - write one-line to the app.log file saying we're running in console mode to help avoid confusion
 	} else {
 		rotateLogs(AppLogFile)
-		logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
+		logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
 		if err != nil {
 			slog.Error(fmt.Sprintf("failed to create server log %v", err))
 			return
--- a/app/lifecycle/logging_nonwindows.go
+++ b/app/lifecycle/logging_nonwindows.go
@ -5,5 +5,5 @@ package lifecycle
 import "log/slog"

 func ShowLogs() {
-	slog.Warn("not implemented")
+	slog.Warn("ShowLogs not yet implemented")
 }
--- a/app/lifecycle/logging_test.go
+++ b/app/lifecycle/logging_test.go
@ -17,7 +17,7 @@ func TestRotateLogs(t *testing.T) {
 	// No log exists
 	rotateLogs(logFile)

-	require.NoError(t, os.WriteFile(logFile, []byte("1"), 0o644))
+	require.NoError(t, os.WriteFile(logFile, []byte("1"), 0644))
 	assert.FileExists(t, logFile)
 	// First rotation
 	rotateLogs(logFile)
@ -32,7 +32,7 @@ func TestRotateLogs(t *testing.T) {
 	assert.NoFileExists(t, logFile)

 	for i := 2; i <= LogRotationCount+1; i++ {
-		require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0o644))
+		require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0644))
 		assert.FileExists(t, logFile)
 		rotateLogs(logFile)
 		assert.NoFileExists(t, logFile)
--- a/app/lifecycle/server.go
+++ b/app/lifecycle/server.go
@ -55,7 +55,7 @@ func start(ctx context.Context, command string) (*exec.Cmd, error) {
 	}

 	rotateLogs(ServerLogFile)
-	logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
+	logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create server log: %w", err)
 	}
--- a/app/lifecycle/updater.go
+++ b/app/lifecycle/updater.go
@ -15,7 +15,6 @@ import (
 	"path"
 	"path/filepath"
 	"runtime"
-	"strconv"
 	"strings"
 	"time"

@ -47,7 +46,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
 	query.Add("os", runtime.GOOS)
 	query.Add("arch", runtime.GOARCH)
 	query.Add("version", version.Version)
-	query.Add("ts", strconv.FormatInt(time.Now().Unix(), 10))
+	query.Add("ts", fmt.Sprintf("%d", time.Now().Unix()))

 	nonce, err := auth.NewNonce(rand.Reader, 16)
 	if err != nil {
--- a/app/lifecycle/updater_nonwindows.go
+++ b/app/lifecycle/updater_nonwindows.go
@ -4,9 +4,9 @@ package lifecycle

 import (
 	"context"
-	"errors"
+	"fmt"
 )

 func DoUpgrade(cancel context.CancelFunc, done chan int) error {
-	return errors.New("not implemented")
+	return fmt.Errorf("DoUpgrade not yet implemented")
 }
--- a/app/lifecycle/updater_windows.go
+++ b/app/lifecycle/updater_windows.go
@ -2,7 +2,6 @@ package lifecycle

 import (
 	"context"
-	"errors"
 	"fmt"
 	"log/slog"
 	"os"
@ -16,7 +15,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
 		return fmt.Errorf("failed to lookup downloads: %s", err)
 	}
 	if len(files) == 0 {
-		return errors.New("no update downloads found")
+		return fmt.Errorf("no update downloads found")
 	} else if len(files) > 1 {
 		// Shouldn't happen
 		slog.Warn(fmt.Sprintf("multiple downloads found, using first one %v", files))
@ -65,7 +64,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
 		}
 	} else {
 		// TODO - some details about why it didn't start, or is this a pedantic error case?
-		return errors.New("installer process did not start")
+		return fmt.Errorf("installer process did not start")
 	}

 	// TODO should we linger for a moment and check to make sure it's actually running by checking the pid?
--- a/app/tray/tray_nonwindows.go
+++ b/app/tray/tray_nonwindows.go
@ -3,11 +3,11 @@
 package tray

 import (
-	"errors"
+	"fmt"

 	"github.com/ollama/ollama/app/tray/commontray"
 )

 func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
-	return nil, errors.New("not implemented")
+	return nil, fmt.Errorf("NOT IMPLEMENTED YET")
 }
--- a/app/tray/wintray/eventloop.go
+++ b/app/tray/wintray/eventloop.go
@ -11,7 +11,9 @@ import (
 	"golang.org/x/sys/windows"
 )

-var quitOnce sync.Once
+var (
+	quitOnce sync.Once
+)

 func (t *winTray) Run() {
 	nativeLoop()
--- a/app/tray/wintray/tray.go
+++ b/app/tray/wintray/tray.go
@ -13,9 +13,8 @@ import (
 	"sync"
 	"unsafe"

-	"golang.org/x/sys/windows"
-
 	"github.com/ollama/ollama/app/tray/commontray"
+	"golang.org/x/sys/windows"
 )

 // Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32
@ -415,7 +414,7 @@ func iconBytesToFilePath(iconBytes []byte) (string, error) {
 	iconFilePath := filepath.Join(os.TempDir(), "ollama_temp_icon_"+dataHash)

 	if _, err := os.Stat(iconFilePath); os.IsNotExist(err) {
-		if err := os.WriteFile(iconFilePath, iconBytes, 0o644); err != nil {
+		if err := os.WriteFile(iconFilePath, iconBytes, 0644); err != nil {
 			return "", err
 		}
 	}
--- a/auth/auth.go
+++ b/auth/auth.go
@ -5,7 +5,6 @@ import (
 	"context"
 	"crypto/rand"
 	"encoding/base64"
-	"errors"
 	"fmt"
 	"io"
 	"log/slog"
@ -79,7 +78,7 @@ func Sign(ctx context.Context, bts []byte) (string, error) {
 	publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
 	parts := bytes.Split(publicKey, []byte(" "))
 	if len(parts) < 2 {
-		return "", errors.New("malformed public key")
+		return "", fmt.Errorf("malformed public key")
 	}

 	signedData, err := privateKey.Sign(rand.Reader, bts)
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@ -1160,7 +1160,7 @@ func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
 			return err
 		}
 		if err := startApp(cmd.Context(), client); err != nil {
-			return errors.New("could not connect to ollama app, is it running?")
+			return fmt.Errorf("could not connect to ollama app, is it running?")
 		}
 	}
 	return nil
--- a/cmd/interactive.go
+++ b/cmd/interactive.go
@ -604,7 +604,7 @@ func getImageData(filePath string) ([]byte, error) {
 	// Check if the file size exceeds 100MB
 	var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
 	if info.Size() > maxSize {
-		return nil, errors.New("file size exceeds maximum limit (100MB)")
+		return nil, fmt.Errorf("file size exceeds maximum limit (100MB)")
 	}

 	buf = make([]byte, info.Size())
--- a/cmd/start_darwin.go
+++ b/cmd/start_darwin.go
@ -2,7 +2,7 @@ package cmd

 import (
 	"context"
-	"errors"
+	"fmt"
 	"os"
 	"os/exec"
 	"strings"
@ -20,7 +20,7 @@ func startApp(ctx context.Context, client *api.Client) error {
 		return err
 	}
 	if !strings.Contains(link, "Ollama.app") {
-		return errors.New("could not find ollama app")
+		return fmt.Errorf("could not find ollama app")
 	}
 	path := strings.Split(link, "Ollama.app")
 	if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil {
--- a/cmd/start_default.go
+++ b/cmd/start_default.go
@ -4,11 +4,11 @@ package cmd

 import (
 	"context"
-	"errors"
+	"fmt"

 	"github.com/ollama/ollama/api"
 )

 func startApp(ctx context.Context, client *api.Client) error {
-	return errors.New("could not connect to ollama server, run 'ollama serve' to start it")
+	return fmt.Errorf("could not connect to ollama server, run 'ollama serve' to start it")
 }
--- a/cmd/start_windows.go
+++ b/cmd/start_windows.go
@ -31,7 +31,7 @@ func startApp(ctx context.Context, client *api.Client) error {
 			// Finally look in the path
 			appExe, err = exec.LookPath(AppName)
 			if err != nil {
-				return errors.New("could not locate ollama app")
+				return fmt.Errorf("could not locate ollama app")
 			}
 		}
 	}
--- a/convert/convert_llama.go
+++ b/convert/convert_llama.go
@ -5,10 +5,9 @@ import (
 	"fmt"
 	"strings"

+	"github.com/ollama/ollama/llm"
 	"github.com/pdevine/tensor"
 	"github.com/pdevine/tensor/native"
-
-	"github.com/ollama/ollama/llm"
 )

 type llama struct {
--- a/convert/convert_test.go
+++ b/convert/convert_test.go
@ -2,7 +2,6 @@ package convert

 import (
 	"crypto/sha256"
-	"encoding/hex"
 	"encoding/json"
 	"flag"
 	"fmt"
@ -15,9 +14,8 @@ import (
 	"slices"
 	"testing"

-	"golang.org/x/exp/maps"
-
 	"github.com/ollama/ollama/llm"
+	"golang.org/x/exp/maps"
 )

 func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
@ -101,7 +99,7 @@ func TestConvertFull(t *testing.T) {
 					t.Fatal(err)
 				}

-				actual[tensor.Name] = hex.EncodeToString(sha256sum.Sum(nil))
+				actual[tensor.Name] = fmt.Sprintf("%x", sha256sum.Sum(nil))
 			}

 			expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt)))
--- a/convert/fs.go
+++ b/convert/fs.go
@ -10,8 +10,8 @@ import (
 )

 type ZipReader struct {
-	r *zip.Reader
-	p string
+	r     *zip.Reader
+	p     string

 	// limit is the maximum size of a file that can be read directly
 	// from the zip archive. Files larger than this size will be extracted
--- a/convert/reader_safetensors.go
+++ b/convert/reader_safetensors.go
@ -111,9 +111,8 @@ func (st safetensor) WriteTo(w io.Writer) (int64, error) {
 			return 0, err
 		}

-		f32s = make([]float32, len(u16s))
-		for i := range u16s {
-			f32s[i] = float16.Frombits(u16s[i]).Float32()
+		for _, b := range u16s {
+			f32s = append(f32s, float16.Frombits(b).Float32())
 		}

 	case "BF16":
--- a/docs/docker.md
+++ b/docs/docker.md
@ -1,71 +1,71 @@
-# Ollama Docker image
-
-### CPU only
-
-```bash
-docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
-```
-
-### Nvidia GPU
-Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation).
-
-#### Install with Apt
-1.  Configure the repository
-```bash
-curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
-    | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
-curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
-    | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
-    | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
-sudo apt-get update
-```
-2.  Install the NVIDIA Container Toolkit packages
-```bash
-sudo apt-get install -y nvidia-container-toolkit
-```
-
-#### Install with Yum or Dnf
-1.  Configure the repository
-
-```bash
-curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo \
-    | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
-```
-
-2. Install the NVIDIA Container Toolkit packages
-
-```bash
-sudo yum install -y nvidia-container-toolkit
-```
-
-#### Configure Docker to use Nvidia driver
-```
-sudo nvidia-ctk runtime configure --runtime=docker
-sudo systemctl restart docker
-```
-
-#### Start the container
-
-```bash
-docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
-```
-
-### AMD GPU
-
-To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following command:
-
-```
-docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm
-```
-
-### Run model locally
-
-Now you can run a model:
-
-```
-docker exec -it ollama ollama run llama3.1
-```
-
-### Try different models
-
-More models can be found on the [Ollama library](https://ollama.com/library).
+# Ollama Docker image
+
+### CPU only
+
+```bash
+docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
+```
+
+### Nvidia GPU
+Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation).
+
+#### Install with Apt
+1.  Configure the repository
+```bash
+curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
+    | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
+curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
+    | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
+    | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
+sudo apt-get update
+```
+2.  Install the NVIDIA Container Toolkit packages
+```bash
+sudo apt-get install -y nvidia-container-toolkit
+```
+
+#### Install with Yum or Dnf
+1.  Configure the repository
+    
+```bash
+curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo \
+    | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
+```
+    
+2. Install the NVIDIA Container Toolkit packages
+    
+```bash
+sudo yum install -y nvidia-container-toolkit
+```
+
+#### Configure Docker to use Nvidia driver 
+```
+sudo nvidia-ctk runtime configure --runtime=docker
+sudo systemctl restart docker
+```
+
+#### Start the container
+
+```bash
+docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
+```
+
+### AMD GPU
+
+To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following command:
+
+```
+docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm
+```
+
+### Run model locally
+
+Now you can run a model:
+
+```
+docker exec -it ollama ollama run llama3.1
+```
+
+### Try different models
+
+More models can be found on the [Ollama library](https://ollama.com/library).
--- a/docs/openai.md
+++ b/docs/openai.md
@ -28,35 +28,13 @@ chat_completion = client.chat.completions.create(
    model='llama3',
 )

-response = client.chat.completions.create(
-    model="llava",
-    messages=[
-        {
-            "role": "user",
-            "content": [
-                {"type": "text", "text": "What's in this image?"},
-                {
-                    "type": "image_url",
-                    "image_url": "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC",
-                },
-            ],
-        }
-    ],
-    max_tokens=300,
-)
-
-completion = client.completions.create(
-    model="llama3",
-    prompt="Say this is a test",
-)
-
 list_completion = client.models.list()

 model = client.models.retrieve("llama3")

 embeddings = client.embeddings.create(
    model="all-minilm",
-    input=["why is the sky blue?", "why is the grass green?"],
+    input=["why is the sky blue?", "why is the grass green?"]
 )
 ```

@ -73,44 +51,23 @@ const openai = new OpenAI({
 })

 const chatCompletion = await openai.chat.completions.create({
-    messages: [{ role: 'user', content: 'Say this is a test' }],
-    model: 'llama3',
-})
-
-const response = await openai.chat.completions.create({
-    model: "llava",
-    messages: [
-        {
-        role: "user",
-        content: [
-            { type: "text", text: "What's in this image?" },
-            {
-            type: "image_url",
-            image_url: "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC",
-            },
-        ],
-        },
-    ],
-})
-
-const completion = await openai.completions.create({
-    model: "llama3",
-    prompt: "Say this is a test.",
+  messages: [{ role: 'user', content: 'Say this is a test' }],
+  model: 'llama3',
 })

 const listCompletion = await openai.models.list()

-const model = await openai.models.retrieve("llama3")
+const model = await openai.models.retrieve("llama3");

 const embedding = await openai.embeddings.create({
  model: "all-minilm",
  input: ["why is the sky blue?", "why is the grass green?"],
-})
+});
 ```

 ### `curl`

-``` shell
+```
 curl http://localhost:11434/v1/chat/completions \
    -H "Content-Type: application/json" \
    -d '{
@ -127,37 +84,6 @@ curl http://localhost:11434/v1/chat/completions \
        ]
    }'

-curl http://localhost:11434/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -d '{
-    "model": "llava",
-    "messages": [
-      {
-        "role": "user",
-        "content": [
-          {
-            "type": "text",
-            "text": "What'\''s in this image?"
-          },
-          {
-            "type": "image_url",
-            "image_url": {
-               "url": "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC"
-            }
-          }
-        ]
-      }
-    ],
-    "max_tokens": 300
-  }'
-
-curl http://localhost:11434/v1/completions \
-    -H "Content-Type: application/json" \
-    -d '{
-        "model": "llama3",
-        "prompt": "Say this is a test"
-    }'
-
 curl http://localhost:11434/v1/models

 curl http://localhost:11434/v1/models/llama3
@ -180,7 +106,6 @@ curl http://localhost:11434/v1/embeddings \
 - [x] Streaming
 - [x] JSON mode
 - [x] Reproducible outputs
- [x] Vision
 - [x] Tools (streaming support coming soon)
 - [ ] Vision
 - [ ] Logprobs
@ -190,10 +115,7 @@ curl http://localhost:11434/v1/embeddings \
 - [x] `model`
 - [x] `messages`
  - [x] Text `content`
-  - [x] Image `content`
-    - [x] Base64 encoded image
-    - [ ] Image URL
-  - [x] Array of `content` parts
+  - [ ] Array of `content` parts
 - [x] `frequency_penalty`
 - [x] `presence_penalty`
 - [x] `response_format`
@ -209,39 +131,6 @@ curl http://localhost:11434/v1/embeddings \
 - [ ] `user`
 - [ ] `n`

-### `/v1/completions`
-
-#### Supported features
-
- [x] Completions
- [x] Streaming
- [x] JSON mode
- [x] Reproducible outputs
- [ ] Logprobs
-
-#### Supported request fields
-
- [x] `model`
- [x] `prompt`
- [x] `frequency_penalty`
- [x] `presence_penalty`
- [x] `seed`
- [x] `stop`
- [x] `stream`
- [x] `temperature`
- [x] `top_p`
- [x] `max_tokens`
- [x] `suffix`
- [ ] `best_of`
- [ ] `echo`
- [ ] `logit_bias`
- [ ] `user`
- [ ] `n`
-
-#### Notes
-
- `prompt` currently only accepts a string
-
 ### `/v1/models`

 #### Notes
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@ -9,7 +9,7 @@ cat ~/.ollama/logs/server.log
 On **Linux** systems with systemd, the logs can be found with this command:

 ```shell
-journalctl -u ollama --no-pager
+journalctl -u ollama
 ```

 When you run Ollama in a **container**, the logs go to stdout/stderr in the container:
--- a/format/format.go
+++ b/format/format.go
@ -3,7 +3,6 @@ package format
 import (
 	"fmt"
 	"math"
-	"strconv"
 )

 const (
@ -29,6 +28,6 @@ func HumanNumber(b uint64) string {
 	case b >= Thousand:
 		return fmt.Sprintf("%.0fK", float64(b)/Thousand)
 	default:
-		return strconv.FormatUint(b, 10)
+		return fmt.Sprintf("%d", b)
 	}
 }
--- a/gpu/amd_common.go
+++ b/gpu/amd_common.go
@ -3,7 +3,7 @@
 package gpu

 import (
-	"errors"
+	"fmt"
 	"log/slog"
 	"os"
 	"path/filepath"
@ -95,5 +95,5 @@ func commonAMDValidateLibDir() (string, error) {
 		}
 	}

-	return "", errors.New("no suitable rocm found, falling back to CPU")
+	return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
 }
--- a/gpu/amd_hip_windows.go
+++ b/gpu/amd_hip_windows.go
@ -1,7 +1,6 @@
 package gpu

 import (
-	"errors"
 	"fmt"
 	"log/slog"
 	"syscall"
@ -77,7 +76,7 @@ func (hl *HipLib) Release() {

 func (hl *HipLib) AMDDriverVersion() (driverMajor, driverMinor int, err error) {
 	if hl.dll == 0 {
-		return 0, 0, errors.New("dll has been unloaded")
+		return 0, 0, fmt.Errorf("dll has been unloaded")
 	}
 	var version int
 	status, _, err := syscall.SyscallN(hl.hipDriverGetVersion, uintptr(unsafe.Pointer(&version)))
@ -111,7 +110,7 @@ func (hl *HipLib) HipGetDeviceCount() int {

 func (hl *HipLib) HipSetDevice(device int) error {
 	if hl.dll == 0 {
-		return errors.New("dll has been unloaded")
+		return fmt.Errorf("dll has been unloaded")
 	}
 	status, _, err := syscall.SyscallN(hl.hipSetDevice, uintptr(device))
 	if status != hipSuccess {
@ -122,7 +121,7 @@ func (hl *HipLib) HipSetDevice(device int) error {

 func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, error) {
 	if hl.dll == 0 {
-		return nil, errors.New("dll has been unloaded")
+		return nil, fmt.Errorf("dll has been unloaded")
 	}
 	var props hipDevicePropMinimal
 	status, _, err := syscall.SyscallN(hl.hipGetDeviceProperties, uintptr(unsafe.Pointer(&props)), uintptr(device))
@ -135,7 +134,7 @@ func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, err
 // free, total, err
 func (hl *HipLib) HipMemGetInfo() (uint64, uint64, error) {
 	if hl.dll == 0 {
-		return 0, 0, errors.New("dll has been unloaded")
+		return 0, 0, fmt.Errorf("dll has been unloaded")
 	}
 	var totalMemory uint64
 	var freeMemory uint64
--- a/gpu/amd_linux.go
+++ b/gpu/amd_linux.go
@ -393,7 +393,7 @@ func AMDValidateLibDir() (string, error) {

 	// If we still haven't found a usable rocm, the user will have to install it on their own
 	slog.Warn("amdgpu detected, but no compatible rocm library found.  Either install rocm v6, or follow manual install instructions at https://github.com/ollama/ollama/blob/main/docs/linux.md#manual-install")
-	return "", errors.New("no suitable rocm found, falling back to CPU")
+	return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
 }

 func AMDDriverVersion() (driverMajor, driverMinor int, err error) {
--- a/gpu/amd_windows.go
+++ b/gpu/amd_windows.go
@ -2,7 +2,7 @@ package gpu

 import (
 	"bytes"
-	"errors"
+	"fmt"
 	"log/slog"
 	"os"
 	"path/filepath"
@ -85,7 +85,7 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 		n = bytes.IndexByte(props.GcnArchName[:], 0)
 		gfx := string(props.GcnArchName[:n])
 		slog.Debug("hip device", "id", i, "name", name, "gfx", gfx)
-		// slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY!  Always 0
+		//slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY!  Always 0
 		// TODO  Why isn't props.iGPU accurate!?
 		if strings.EqualFold(name, iGPUName) {
 			slog.Info("unsupported Radeon iGPU detected skipping", "id", i, "name", name, "gfx", gfx)
@ -161,7 +161,7 @@ func AMDValidateLibDir() (string, error) {

 	// Should not happen on windows since we include it in the installer, but stand-alone binary might hit this
 	slog.Warn("amdgpu detected, but no compatible rocm library found.  Please install ROCm")
-	return "", errors.New("no suitable rocm found, falling back to CPU")
+	return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
 }

 func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
--- a/gpu/assets.go
+++ b/gpu/assets.go
@ -42,7 +42,7 @@ func PayloadsDir() (string, error) {
 				return "", fmt.Errorf("failed to generate tmp dir: %w", err)
 			}
 		} else {
-			err = os.MkdirAll(tmpDir, 0o755)
+			err = os.MkdirAll(tmpDir, 0755)
 			if err != nil {
 				return "", fmt.Errorf("failed to generate tmp dir %s: %w", tmpDir, err)
 			}
@ -54,7 +54,7 @@ func PayloadsDir() (string, error) {
 		if err != nil {
 			return "", err
 		}
-		if _, err := pidFile.Write([]byte(strconv.Itoa(os.Getpid()))); err != nil {
+		if _, err := pidFile.Write([]byte(fmt.Sprint(os.Getpid()))); err != nil {
 			return "", err
 		}

--- a/gpu/cpu_common.go
+++ b/gpu/cpu_common.go
@ -1,11 +1,6 @@
 package gpu

 import (
-	"os"
-	"path/filepath"
-	"runtime"
-	"strings"
-
 	"golang.org/x/sys/cpu"
 )

@ -19,19 +14,3 @@ func GetCPUCapability() CPUCapability {
 	// else LCD
 	return CPUCapabilityNone
 }
-
-func IsNUMA() bool {
-	if runtime.GOOS != "linux" {
-		// numa support in llama.cpp is linux only
-		return false
-	}
-	ids := map[string]interface{}{}
-	packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id")
-	for _, packageId := range packageIds {
-		id, err := os.ReadFile(packageId)
-		if err == nil {
-			ids[strings.TrimSpace(string(id))] = struct{}{}
-		}
-	}
-	return len(ids) > 1
-}
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@ -7,9 +7,9 @@ package gpu
 #cgo windows LDFLAGS: -lpthread

 #include "gpu_info.h"
+
 */
 import "C"
-
 import (
 	"fmt"
 	"log/slog"
@ -70,6 +70,7 @@ var CudaTegra string = os.Getenv("JETSON_JETPACK")

 // Note: gpuMutex must already be held
 func initCudaHandles() *cudaHandles {
+
 	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing

 	cHandles := &cudaHandles{}
@ -210,16 +211,14 @@ func GetGPUInfo() GpuInfoList {
 		if err != nil {
 			slog.Warn("error looking up system memory", "error", err)
 		}
-		cpus = []CPUInfo{
-			{
-				GpuInfo: GpuInfo{
-					memInfo: mem,
-					Library: "cpu",
-					Variant: cpuCapability,
-					ID:      "0",
-				},
+		cpus = []CPUInfo{CPUInfo{
+			GpuInfo: GpuInfo{
+				memInfo: mem,
+				Library: "cpu",
+				Variant: cpuCapability,
+				ID:      "0",
 			},
-		}
+		}}

 		// Fallback to CPU mode if we're lacking required vector extensions on x86
 		if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
--- a/gpu/gpu_darwin.go
+++ b/gpu/gpu_darwin.go
@ -8,7 +8,6 @@ package gpu
 #include "gpu_info_darwin.h"
 */
 import "C"
-
 import (
 	"runtime"

--- a/gpu/gpu_info.h
+++ b/gpu/gpu_info.h
@ -67,4 +67,4 @@ void cpu_check_ram(mem_info_t *resp);
 #include "gpu_info_oneapi.h"

 #endif  // __GPU_INFO_H__
-#endif  // __APPLE__
+#endif  // __APPLE__
--- a/gpu/gpu_linux.go
+++ b/gpu/gpu_linux.go
@ -43,12 +43,10 @@ var OneapiGlobs = []string{
 	"/usr/lib*/libze_intel_gpu.so*",
 }

-var (
-	CudartMgmtName = "libcudart.so*"
-	NvcudaMgmtName = "libcuda.so*"
-	NvmlMgmtName   = "" // not currently wired on linux
-	OneapiMgmtName = "libze_intel_gpu.so"
-)
+var CudartMgmtName = "libcudart.so*"
+var NvcudaMgmtName = "libcuda.so*"
+var NvmlMgmtName = "" // not currently wired on linux
+var OneapiMgmtName = "libze_intel_gpu.so"

 func GetCPUMem() (memInfo, error) {
 	var mem memInfo
--- a/gpu/gpu_windows.go
+++ b/gpu/gpu_windows.go
@ -40,12 +40,10 @@ var OneapiGlobs = []string{
 	"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
 }

-var (
-	CudartMgmtName = "cudart64_*.dll"
-	NvcudaMgmtName = "nvcuda.dll"
-	NvmlMgmtName   = "nvml.dll"
-	OneapiMgmtName = "ze_intel_gpu64.dll"
-)
+var CudartMgmtName = "cudart64_*.dll"
+var NvcudaMgmtName = "nvcuda.dll"
+var NvmlMgmtName = "nvml.dll"
+var OneapiMgmtName = "ze_intel_gpu64.dll"

 func GetCPUMem() (memInfo, error) {
 	memStatus := MEMORYSTATUSEX{length: sizeofMemoryStatusEx}
--- a/integration/concurrency_test.go
+++ b/integration/concurrency_test.go
@ -5,7 +5,6 @@ package integration
 import (
 	"context"
 	"log/slog"
-	"os"
 	"strconv"
 	"sync"
 	"testing"
@ -14,6 +13,7 @@ import (
 	"github.com/stretchr/testify/require"

 	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/format"
 )

@ -41,8 +41,8 @@ func TestMultiModelConcurrency(t *testing.T) {
 			},
 		}
 		resp = [2][]string{
-			{"sunlight"},
-			{"england", "english", "massachusetts", "pilgrims", "british"},
+			[]string{"sunlight"},
+			[]string{"england", "english", "massachusetts", "pilgrims", "british"},
 		}
 	)
 	var wg sync.WaitGroup
@ -71,11 +71,12 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
 	reqLimit := len(req)
 	iterLimit := 5

-	if s := os.Getenv("OLLAMA_MAX_VRAM"); s != "" {
-		maxVram, err := strconv.ParseUint(s, 10, 64)
+	vram := os.Getenv("OLLAMA_MAX_VRAM") // TODO - discover actual VRAM
+	if vram != "" {
+		max, err := strconv.ParseUint(vram, 10, 64)
 		require.NoError(t, err)
 		// Don't hammer on small VRAM cards...
-		if maxVram < 4*format.GibiByte {
+		if max < 4*1024*1024*1024 {
 			reqLimit = min(reqLimit, 2)
 			iterLimit = 2
 		}
@ -232,12 +233,12 @@ func TestMultiModelStress(t *testing.T) {
 	consumed := uint64(256 * format.MebiByte) // Assume some baseline usage
 	for i := 0; i < len(req); i++ {
 		// Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long
-		if i > 1 && consumed > maxVram {
-			slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
+		if i > 1 && consumed > vram {
+			slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed))
 			break
 		}
 		consumed += chosenModels[i].size
-		slog.Info("target vram", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
+		slog.Info("target vram", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed))

 		wg.Add(1)
 		go func(i int) {
--- a/integration/llm_test.go
+++ b/integration/llm_test.go
@ -35,8 +35,8 @@ var (
 		},
 	}
 	resp = [2][]string{
-		{"sunlight"},
-		{"england", "english", "massachusetts", "pilgrims"},
+		[]string{"sunlight"},
+		[]string{"england", "english", "massachusetts", "pilgrims"},
 	}
 )

--- a/integration/max_queue_test.go
+++ b/integration/max_queue_test.go
@ -29,7 +29,7 @@ func TestMaxQueue(t *testing.T) {
 	// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
 	threadCount := 32
 	if maxQueue := envconfig.MaxQueue(); maxQueue != 0 {
-		threadCount = int(maxQueue)
+		threadCount = maxQueue
 	} else {
 		t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount))
 	}
--- a/integration/utils_test.go
+++ b/integration/utils_test.go
@ -162,7 +162,7 @@ func PullIfMissing(ctx context.Context, client *api.Client, modelName string) er
 	fn := func(resp api.ProgressResponse) error {
 		// fmt.Print(".")
 		if !stallTimer.Reset(stallDuration) {
-			return errors.New("stall was detected, aborting status reporting")
+			return fmt.Errorf("stall was detected, aborting status reporting")
 		}
 		return nil
 	}
@ -180,7 +180,7 @@ func PullIfMissing(ctx context.Context, client *api.Client, modelName string) er

 	select {
 	case <-stallTimer.C:
-		return errors.New("download stalled")
+		return fmt.Errorf("download stalled")
 	case <-done:
 		return pullError
 	}
@ -243,7 +243,7 @@ func DoGenerate(ctx context.Context, t *testing.T, client *api.Client, genReq ap
 		// fmt.Print(".")
 		buf.Write([]byte(response.Response))
 		if !stallTimer.Reset(streamTimeout) {
-			return errors.New("stall was detected while streaming response, aborting")
+			return fmt.Errorf("stall was detected while streaming response, aborting")
 		}
 		return nil
 	}
@ -334,10 +334,10 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
 			},
 		},
 		[][]string{
-			{"sunlight"},
-			{"soil", "organic", "earth", "black", "tan"},
-			{"england", "english", "massachusetts", "pilgrims", "british"},
-			{"fourth", "july", "declaration", "independence"},
-			{"nitrogen", "oxygen", "carbon", "dioxide"},
+			[]string{"sunlight"},
+			[]string{"soil", "organic", "earth", "black", "tan"},
+			[]string{"england", "english", "massachusetts", "pilgrims", "british"},
+			[]string{"fourth", "july", "declaration", "independence"},
+			[]string{"nitrogen", "oxygen", "carbon", "dioxide"},
 		}
 }
--- a/llm/ext_server/CMakeLists.txt
+++ b/llm/ext_server/CMakeLists.txt
@ -1,13 +1,13 @@
-set(TARGET ollama_llama_server)
-option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
-include_directories(${CMAKE_CURRENT_SOURCE_DIR})
-add_executable(${TARGET} server.cpp utils.hpp json.hpp httplib.h)
-install(TARGETS ${TARGET} RUNTIME)
-target_compile_definitions(${TARGET} PRIVATE
-    SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
-)
-target_link_libraries(${TARGET} PRIVATE ggml llama common llava ${CMAKE_THREAD_LIBS_INIT})
-if (WIN32)
-    TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
-endif()
+set(TARGET ollama_llama_server)
+option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+add_executable(${TARGET} server.cpp utils.hpp json.hpp httplib.h)
+install(TARGETS ${TARGET} RUNTIME)
+target_compile_definitions(${TARGET} PRIVATE
+    SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
+)
+target_link_libraries(${TARGET} PRIVATE ggml llama common llava ${CMAKE_THREAD_LIBS_INIT})
+if (WIN32)
+    TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
+endif()
 target_compile_features(${TARGET} PRIVATE cxx_std_11)
--- a/llm/ext_server/server.cpp
+++ b/llm/ext_server/server.cpp
@ -44,7 +44,6 @@
 #include <errhandlingapi.h>
 #endif

-#include <algorithm>
 #include <cstddef>
 #include <thread>
 #include <chrono>
@ -1221,7 +1220,6 @@ struct llama_server_context

                res.result_json = json
                {
-                    {"id", res.id},
                    {"embedding", std::vector<float>(embd, embd + n_embd)},
                    {"timings",             slot.get_formated_timings()},
                };
@ -3205,10 +3203,6 @@ int main(int argc, char **argv) {
                    }

                    responses = result.result_json.value("results", std::vector<json>{result.result_json});
-                    std::sort(responses.begin(), responses.end(), [](const json& a, const json& b) {
-                        return a["id"] < b["id"];
-                    });
-
                    json embeddings = json::array();

                    int prompt_n = 0;
--- a/llm/llm.go
+++ b/llm/llm.go
@ -11,9 +11,8 @@ package llm
 // #include <stdlib.h>
 // #include "llama.h"
 import "C"
-
 import (
-	"errors"
+	"fmt"
 	"unsafe"
 )

@ -34,7 +33,7 @@ func Quantize(infile, outfile string, ftype fileType) error {
 	params.ftype = ftype.Value()

 	if rc := C.llama_model_quantize(cinfile, coutfile, &params); rc != 0 {
-		return errors.New("failed to quantize model. This model architecture may not be supported, or you may need to upgrade Ollama to the latest version")
+		return fmt.Errorf("failed to quantize model. This model architecture may not be supported, or you may need to upgrade Ollama to the latest version")
 	}

 	return nil
--- a/llm/memory_test.go
+++ b/llm/memory_test.go
@ -6,11 +6,10 @@ import (
 	"os"
 	"testing"

-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/gpu"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 )

 func TestEstimateGPULayers(t *testing.T) {
--- a/llm/server.go
+++ b/llm/server.go
@ -184,15 +184,15 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr

 	params := []string{
 		"--model", model,
-		"--ctx-size", strconv.Itoa(opts.NumCtx),
-		"--batch-size", strconv.Itoa(opts.NumBatch),
+		"--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
+		"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
 		"--embedding",
 	}

 	params = append(params, "--log-disable")

 	if opts.NumGPU >= 0 {
-		params = append(params, "--n-gpu-layers", strconv.Itoa(opts.NumGPU))
+		params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", opts.NumGPU))
 	}

 	if envconfig.Debug() {
@ -200,7 +200,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 	}

 	if opts.MainGPU > 0 {
-		params = append(params, "--main-gpu", strconv.Itoa(opts.MainGPU))
+		params = append(params, "--main-gpu", fmt.Sprintf("%d", opts.MainGPU))
 	}

 	if len(adapters) > 0 {
@ -214,7 +214,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 	}

 	if opts.NumThread > 0 {
-		params = append(params, "--threads", strconv.Itoa(opts.NumThread))
+		params = append(params, "--threads", fmt.Sprintf("%d", opts.NumThread))
 	}

 	if !opts.F16KV {
@ -256,17 +256,11 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 		params = append(params, "--mlock")
 	}

-	if gpu.IsNUMA() {
-		numaMode := "distribute"
-		if runtime.GOOS == "linux" {
-			if _, err := exec.LookPath("numactl"); err == nil {
-				numaMode = "numactl"
-			}
-		}
-		params = append(params, "--numa", numaMode)
+	if opts.UseNUMA {
+		params = append(params, "--numa")
 	}

-	params = append(params, "--parallel", strconv.Itoa(numParallel))
+	params = append(params, "--parallel", fmt.Sprintf("%d", numParallel))

 	if estimate.TensorSplit != "" {
 		params = append(params, "--tensor-split", estimate.TensorSplit)
@ -431,7 +425,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 				if strings.Contains(s.status.LastErrMsg, "unknown model") {
 					s.status.LastErrMsg = "this model is not supported by your version of Ollama. You may need to upgrade"
 				}
-				s.done <- errors.New(s.status.LastErrMsg)
+				s.done <- fmt.Errorf(s.status.LastErrMsg)
 			} else {
 				s.done <- err
 			}
--- a/main.go
+++ b/main.go
@ -3,9 +3,8 @@ package main
 import (
 	"context"

-	"github.com/spf13/cobra"
-
 	"github.com/ollama/ollama/cmd"
+	"github.com/spf13/cobra"
 )

 func main() {
--- a/openai/openai.go
+++ b/openai/openai.go
@ -5,7 +5,6 @@ import (
 	"bytes"
 	"encoding/base64"
 	"encoding/json"
-	"errors"
 	"fmt"
 	"io"
 	"log/slog"
@ -15,7 +14,6 @@ import (
 	"time"

 	"github.com/gin-gonic/gin"
-
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/types/model"
 )
@ -369,24 +367,24 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
 			for _, c := range content {
 				data, ok := c.(map[string]any)
 				if !ok {
-					return nil, errors.New("invalid message format")
+					return nil, fmt.Errorf("invalid message format")
 				}
 				switch data["type"] {
 				case "text":
 					text, ok := data["text"].(string)
 					if !ok {
-						return nil, errors.New("invalid message format")
+						return nil, fmt.Errorf("invalid message format")
 					}
 					messages = append(messages, api.Message{Role: msg.Role, Content: text})
 				case "image_url":
 					var url string
 					if urlMap, ok := data["image_url"].(map[string]any); ok {
 						if url, ok = urlMap["url"].(string); !ok {
-							return nil, errors.New("invalid message format")
+							return nil, fmt.Errorf("invalid message format")
 						}
 					} else {
 						if url, ok = data["image_url"].(string); !ok {
-							return nil, errors.New("invalid message format")
+							return nil, fmt.Errorf("invalid message format")
 						}
 					}

@ -402,17 +400,17 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
 					}

 					if !valid {
-						return nil, errors.New("invalid image input")
+						return nil, fmt.Errorf("invalid image input")
 					}

 					img, err := base64.StdEncoding.DecodeString(url)
 					if err != nil {
-						return nil, errors.New("invalid message format")
+						return nil, fmt.Errorf("invalid message format")
 					}

 					messages = append(messages, api.Message{Role: msg.Role, Images: []api.ImageData{img}})
 				default:
-					return nil, errors.New("invalid message format")
+					return nil, fmt.Errorf("invalid message format")
 				}
 			}
 		default:
@ -425,7 +423,7 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
 				toolCalls[i].Function.Name = tc.Function.Name
 				err := json.Unmarshal([]byte(tc.Function.Arguments), &toolCalls[i].Function.Arguments)
 				if err != nil {
-					return nil, errors.New("invalid tool call arguments")
+					return nil, fmt.Errorf("invalid tool call arguments")
 				}
 			}
 			messages = append(messages, api.Message{Role: msg.Role, ToolCalls: toolCalls})
@ -739,12 +737,14 @@ func (w *RetrieveWriter) Write(data []byte) (int, error) {
 func (w *EmbedWriter) writeResponse(data []byte) (int, error) {
 	var embedResponse api.EmbedResponse
 	err := json.Unmarshal(data, &embedResponse)
+
 	if err != nil {
 		return 0, err
 	}

 	w.ResponseWriter.Header().Set("Content-Type", "application/json")
 	err = json.NewEncoder(w.ResponseWriter).Encode(toEmbeddingList(w.model, embedResponse))
+
 	if err != nil {
 		return 0, err
 	}
--- a/openai/openai_test.go
+++ b/openai/openai_test.go
@ -12,16 +12,13 @@ import (
 	"time"

 	"github.com/gin-gonic/gin"
-	"github.com/stretchr/testify/assert"
-
 	"github.com/ollama/ollama/api"
+	"github.com/stretchr/testify/assert"
 )

-const (
-	prefix   = `data:image/jpeg;base64,`
-	image    = `iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=`
-	imageURL = prefix + image
-)
+const prefix = `data:image/jpeg;base64,`
+const image = `iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=`
+const imageURL = prefix + image

 func prepareRequest(req *http.Request, body any) {
 	bodyBytes, _ := json.Marshal(body)
--- a/parser/parser_test.go
+++ b/parser/parser_test.go
@ -82,7 +82,7 @@ TEMPLATE """   {{ if .System }}<|start_header_id|>system<|end_header_id|>
 }

 func TestParseFileFrom(t *testing.T) {
-	cases := []struct {
+	var cases = []struct {
 		input    string
 		expected []Command
 		err      error
@ -185,7 +185,7 @@ BADCOMMAND param1 value1
 }

 func TestParseFileMessages(t *testing.T) {
-	cases := []struct {
+	var cases = []struct {
 		input    string
 		expected []Command
 		err      error
@ -276,7 +276,7 @@ MESSAGE system`,
 }

 func TestParseFileQuoted(t *testing.T) {
-	cases := []struct {
+	var cases = []struct {
 		multiline string
 		expected  []Command
 		err       error
@ -430,7 +430,7 @@ TEMPLATE """
 }

 func TestParseFileParameters(t *testing.T) {
-	cases := map[string]struct {
+	var cases = map[string]struct {
 		name, value string
 	}{
 		"numa true":                    {"numa", "true"},
@ -491,7 +491,7 @@ func TestParseFileParameters(t *testing.T) {
 }

 func TestParseFileComments(t *testing.T) {
-	cases := []struct {
+	var cases = []struct {
 		input    string
 		expected []Command
 	}{
@ -516,7 +516,7 @@ FROM foo
 }

 func TestParseFileFormatParseFile(t *testing.T) {
-	cases := []string{
+	var cases = []string{
 		`
 FROM foo
 ADAPTER adapter1
--- a/progress/bar.go
+++ b/progress/bar.go
@ -6,9 +6,8 @@ import (
 	"strings"
 	"time"

-	"golang.org/x/term"
-
 	"github.com/ollama/ollama/format"
+	"golang.org/x/term"
 )

 type Bar struct {
--- a/readline/buffer.go
+++ b/readline/buffer.go
@ -13,7 +13,7 @@ type Buffer struct {
 	DisplayPos int
 	Pos        int
 	Buf        *arraylist.List
-	// LineHasSpace is an arraylist of bools to keep track of whether a line has a space at the end
+	//LineHasSpace is an arraylist of bools to keep track of whether a line has a space at the end
 	LineHasSpace *arraylist.List
 	Prompt       *Prompt
 	LineWidth    int
@ -56,7 +56,7 @@ func (b *Buffer) GetLineSpacing(line int) bool {

 func (b *Buffer) MoveLeft() {
 	if b.Pos > 0 {
-		// asserts that we retrieve a rune
+		//asserts that we retrieve a rune
 		if e, ok := b.Buf.Get(b.Pos - 1); ok {
 			if r, ok := e.(rune); ok {
 				rLength := runewidth.RuneWidth(r)
--- a/readline/errors.go
+++ b/readline/errors.go
@ -4,7 +4,9 @@ import (
 	"errors"
 )

-var ErrInterrupt = errors.New("Interrupt")
+var (
+	ErrInterrupt = errors.New("Interrupt")
+)

 type InterruptError struct {
 	Line []rune
--- a/readline/term_linux.go
+++ b/readline/term_linux.go
@ -7,10 +7,8 @@ import (
 	"unsafe"
 )

-const (
-	tcgets = 0x5401
-	tcsets = 0x5402
-)
+const tcgets = 0x5401
+const tcsets = 0x5402

 func getTermios(fd uintptr) (*Termios, error) {
 	termios := new(Termios)
--- a/server/download.go
+++ b/server/download.go
@ -28,10 +28,8 @@ import (

 const maxRetries = 6

-var (
-	errMaxRetriesExceeded = errors.New("max retries exceeded")
-	errPartStalled        = errors.New("part stalled")
-)
+var errMaxRetriesExceeded = errors.New("max retries exceeded")
+var errPartStalled = errors.New("part stalled")

 var blobDownloadManager sync.Map

--- a/server/images.go
+++ b/server/images.go
@ -828,7 +828,7 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 	fn(api.ProgressResponse{Status: "retrieving manifest"})

 	if mp.ProtocolScheme == "http" && !regOpts.Insecure {
-		return errors.New("insecure protocol http")
+		return fmt.Errorf("insecure protocol http")
 	}

 	manifest, _, err := GetManifest(mp)
@ -895,7 +895,7 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 	}

 	if mp.ProtocolScheme == "http" && !regOpts.Insecure {
-		return errors.New("insecure protocol http")
+		return fmt.Errorf("insecure protocol http")
 	}

 	fn(api.ProgressResponse{Status: "pulling manifest"})
@ -1010,7 +1010,7 @@ func GetSHA256Digest(r io.Reader) (string, int64) {
 	return fmt.Sprintf("sha256:%x", h.Sum(nil)), n
 }

-var errUnauthorized = errors.New("unauthorized: access denied")
+var errUnauthorized = fmt.Errorf("unauthorized: access denied")

 // getTokenSubject returns the subject of a JWT token, it does not validate the token
 func getTokenSubject(token string) string {
--- a/server/manifest.go
+++ b/server/manifest.go
@ -2,9 +2,9 @@ package server

 import (
 	"crypto/sha256"
-	"encoding/hex"
 	"encoding/json"
 	"errors"
+	"fmt"
 	"io"
 	"log/slog"
 	"os"
@ -88,7 +88,7 @@ func ParseNamedManifest(n model.Name) (*Manifest, error) {

 	m.filepath = p
 	m.fi = fi
-	m.digest = hex.EncodeToString(sha256sum.Sum(nil))
+	m.digest = fmt.Sprintf("%x", sha256sum.Sum(nil))

 	return &m, nil
 }
--- a/server/manifest_test.go
+++ b/server/manifest_test.go
@ -14,7 +14,7 @@ func createManifest(t *testing.T, path, name string) {
 	t.Helper()

 	p := filepath.Join(path, "manifests", name)
-	if err := os.MkdirAll(filepath.Dir(p), 0o755); err != nil {
+	if err := os.MkdirAll(filepath.Dir(p), 0755); err != nil {
 		t.Fatal(err)
 	}

--- a/server/model_test.go
+++ b/server/model_test.go
@ -9,7 +9,6 @@ import (
 	"testing"

 	"github.com/google/go-cmp/cmp"
-
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/template"
 )
--- a/server/prompt_test.go
+++ b/server/prompt_test.go
@ -6,7 +6,6 @@ import (
 	"testing"

 	"github.com/google/go-cmp/cmp"
-
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/template"
 )
--- a/server/routes.go
+++ b/server/routes.go
@ -55,10 +55,8 @@ func init() {
 	gin.SetMode(mode)
 }

-var (
-	errRequired    = errors.New("is required")
-	errBadTemplate = errors.New("template error")
-)
+var errRequired = errors.New("is required")
+var errBadTemplate = errors.New("template error")

 func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options, error) {
 	opts := api.DefaultOptions()
@ -371,6 +369,7 @@ func (s *Server) EmbedHandler(c *gin.Context) {
 		input[i] = s
 	}
 	embeddings, err := r.Embed(c.Request.Context(), input)
+
 	if err != nil {
 		slog.Error("embedding generation failed", "error", err)
 		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
@ -431,6 +430,7 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
 	}

 	embeddings, err := r.Embed(c.Request.Context(), []string{req.Prompt})
+
 	if err != nil {
 		slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
 		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
@ -556,7 +556,7 @@ func checkNameExists(name model.Name) error {

 	for n := range names {
 		if strings.EqualFold(n.Filepath(), name.Filepath()) && n != name {
-			return errors.New("a model with that name already exists")
+			return fmt.Errorf("a model with that name already exists")
 		}
 	}

@ -729,7 +729,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {

 	n := model.ParseName(req.Model)
 	if !n.IsValid() {
-		return nil, errors.New("invalid model name")
+		return nil, fmt.Errorf("invalid model name")
 	}

 	manifest, err := ParseNamedManifest(n)
@ -993,7 +993,7 @@ func allowedHost(host string) bool {
 		return true
 	}

-	tlds := []string{
+	var tlds = []string{
 		"localhost",
 		"local",
 		"internal",
--- a/server/routes_create_test.go
+++ b/server/routes_create_test.go
@ -2,7 +2,6 @@ package server

 import (
 	"bytes"
-	"cmp"
 	"encoding/json"
 	"fmt"
 	"io"
@ -14,7 +13,6 @@ import (
 	"testing"

 	"github.com/gin-gonic/gin"
-
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/llm"
 )
@ -54,8 +52,6 @@ func (t *responseRecorder) CloseNotify() <-chan bool {

 func createRequest(t *testing.T, fn func(*gin.Context), body any) *httptest.ResponseRecorder {
 	t.Helper()
-	// if OLLAMA_MODELS is not set, set it to the temp directory
-	t.Setenv("OLLAMA_MODELS", cmp.Or(os.Getenv("OLLAMA_MODELS"), t.TempDir()))

 	w := NewRecorder()
 	c, _ := gin.CreateTestContext(w)
@ -493,7 +489,7 @@ func TestCreateTemplateSystem(t *testing.T) {
 			Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt", createBinFile(t, nil, nil)),
 			Stream:    &stream,
 		})
-
+	
 		if w.Code != http.StatusBadRequest {
 			t.Fatalf("expected status code 400, actual %d", w.Code)
 		}
@ -505,7 +501,7 @@ func TestCreateTemplateSystem(t *testing.T) {
 			Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ if .Prompt }}", createBinFile(t, nil, nil)),
 			Stream:    &stream,
 		})
-
+	
 		if w.Code != http.StatusBadRequest {
 			t.Fatalf("expected status code 400, actual %d", w.Code)
 		}
@ -517,7 +513,7 @@ func TestCreateTemplateSystem(t *testing.T) {
 			Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{  Prompt }}", createBinFile(t, nil, nil)),
 			Stream:    &stream,
 		})
-
+	
 		if w.Code != http.StatusBadRequest {
 			t.Fatalf("expected status code 400, actual %d", w.Code)
 		}
--- a/server/routes_delete_test.go
+++ b/server/routes_delete_test.go
@ -9,7 +9,6 @@ import (
 	"testing"

 	"github.com/gin-gonic/gin"
-
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/types/model"
 )
--- a/server/routes_list_test.go
+++ b/server/routes_list_test.go
@ -8,7 +8,6 @@ import (
 	"testing"

 	"github.com/gin-gonic/gin"
-
 	"github.com/ollama/ollama/api"
 )

--- a/server/routes_test.go
+++ b/server/routes_test.go
@ -333,6 +333,7 @@ func Test_Routes(t *testing.T) {
 					t.Fatalf("expected content type application/json; charset=utf-8, got %s", contentType)
 				}
 				_, err := io.ReadAll(resp.Body)
+
 				if err != nil {
 					t.Fatal(err)
 				}
--- a/server/sched.go
+++ b/server/sched.go
@ -58,7 +58,7 @@ var defaultModelsPerGPU = 3
 // we'll back off down to 1 to try to get it to fit
 var defaultParallel = 4

-var ErrMaxQueue = errors.New("server busy, please try again.  maximum pending requests exceeded")
+var ErrMaxQueue = fmt.Errorf("server busy, please try again.  maximum pending requests exceeded")

 func InitScheduler(ctx context.Context) *Scheduler {
 	maxQueue := envconfig.MaxQueue()
--- a/server/sched_test.go
+++ b/server/sched_test.go
@ -3,25 +3,23 @@ package server
 import (
 	"bytes"
 	"context"
-	"errors"
+	"fmt"
 	"log/slog"
 	"os"
 	"testing"
 	"time"

-	"github.com/stretchr/testify/require"
-
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/app/lifecycle"
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/gpu"
 	"github.com/ollama/ollama/llm"
+	"github.com/stretchr/testify/require"
 )

-func TestMain(m *testing.M) {
+func init() {
 	os.Setenv("OLLAMA_DEBUG", "1")
 	lifecycle.InitLogging()
-	os.Exit(m.Run())
 }

 func TestInitScheduler(t *testing.T) {
@ -48,7 +46,7 @@ func TestLoad(t *testing.T) {
 	}
 	// Fail to load model first
 	s.newServerFn = func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options, numParallel int) (llm.LlamaServer, error) {
-		return nil, errors.New("something failed to load model blah")
+		return nil, fmt.Errorf("something failed to load model blah")
 	}
 	gpus := gpu.GpuInfoList{}
 	s.load(req, ggml, gpus, 0)
@ -77,7 +75,7 @@ func TestLoad(t *testing.T) {
 	}

 	req.model.ModelPath = "dummy_model_path"
-	server.waitResp = errors.New("wait failure")
+	server.waitResp = fmt.Errorf("wait failure")
 	s.load(req, ggml, gpus, 0)
 	select {
 	case err := <-req.errCh:
@ -602,7 +600,7 @@ func TestNeedsReload(t *testing.T) {
 	resp = runner.needsReload(ctx, req)
 	require.True(t, resp)
 	req.opts.NumBatch = runner.Options.NumBatch
-	llm.pingResp = errors.New("foo")
+	llm.pingResp = fmt.Errorf("foo")
 	resp = runner.needsReload(ctx, req)
 	require.True(t, resp)
 	llm.pingResp = nil
@ -726,19 +724,15 @@ func (s *mockLlm) WaitUntilRunning(ctx context.Context) error { return s.waitRes
 func (s *mockLlm) Completion(ctx context.Context, req llm.CompletionRequest, fn func(llm.CompletionResponse)) error {
 	return s.completionResp
 }
-
 func (s *mockLlm) Embed(ctx context.Context, input []string) (*llm.EmbedResponse, error) {
 	return s.embedResp, s.embedRespErr
 }
-
 func (s *mockLlm) Tokenize(ctx context.Context, content string) ([]int, error) {
 	return s.tokenizeResp, s.tokenizeRespErr
 }
-
 func (s *mockLlm) Detokenize(ctx context.Context, tokens []int) (string, error) {
 	return s.detokenizeResp, s.detonekizeRespErr
 }
-
 func (s *mockLlm) Close() error {
 	s.closeCalled = true
 	return s.closeResp
--- a/server/upload.go
+++ b/server/upload.go
@ -12,15 +12,13 @@ import (
 	"net/http"
 	"net/url"
 	"os"
-	"strconv"
 	"sync"
 	"sync/atomic"
 	"time"

-	"golang.org/x/sync/errgroup"
-
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/format"
+	"golang.org/x/sync/errgroup"
 )

 var blobUploadManager sync.Map
@ -214,7 +212,7 @@ func (b *blobUpload) Run(ctx context.Context, opts *registryOptions) {
 func (b *blobUpload) uploadPart(ctx context.Context, method string, requestURL *url.URL, part *blobUploadPart, opts *registryOptions) error {
 	headers := make(http.Header)
 	headers.Set("Content-Type", "application/octet-stream")
-	headers.Set("Content-Length", strconv.FormatInt(part.Size, 10))
+	headers.Set("Content-Length", fmt.Sprintf("%d", part.Size))

 	if method == http.MethodPatch {
 		headers.Set("X-Redirect-Uploads", "1")
--- a/template/template.go
+++ b/template/template.go
@ -15,9 +15,8 @@ import (
 	"text/template/parse"

 	"github.com/agnivade/levenshtein"
-	"golang.org/x/exp/maps"
-
 	"github.com/ollama/ollama/api"
+	"golang.org/x/exp/maps"
 )

 //go:embed index.json
--- a/template/template_test.go
+++ b/template/template_test.go
@ -12,7 +12,6 @@ import (
 	"testing"

 	"github.com/google/go-cmp/cmp"
-
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/llm"
 )
--- a/types/errtypes/errtypes.go
+++ b/types/errtypes/errtypes.go
@ -6,10 +6,8 @@ import (
 	"strings"
 )

-const (
-	UnknownOllamaKeyErrMsg = "unknown ollama key"
-	InvalidModelNameErrMsg = "invalid model name"
-)
+const UnknownOllamaKeyErrMsg = "unknown ollama key"
+const InvalidModelNameErrMsg = "invalid model name"

 // TODO: This should have a structured response from the API
 type UnknownOllamaKey struct {
--- a/types/model/name.go
+++ b/types/model/name.go
@ -258,7 +258,7 @@ func (n Name) IsValid() bool {
 // IsFullyQualified returns true if all parts of the name are present and
 // valid without the digest.
 func (n Name) IsFullyQualified() bool {
-	parts := []string{
+	var parts = []string{
 		n.Host,
 		n.Namespace,
 		n.Model,