Compare commits

..

17 commits

Author SHA1 Message Date
f2d1c842ad
Merge https://github.com/ollama/ollama 2024-08-06 08:21:56 +05:30
royjhan
86b907f82a
sort batch results (#6189) 2024-08-05 16:55:34 -07:00
Michael Yang
10d49bce70
Merge pull request #6190 from ollama/mxyng/fix-integration
fix concurrency test
2024-08-05 16:45:49 -07:00
Michael Yang
7ed367419e fix concurrency test 2024-08-05 16:36:16 -07:00
Daniel Hiltgen
50ee8b5f56
Merge pull request #6186 from dhiltgen/numa
Implement linux NUMA detection
2024-08-05 15:20:06 -07:00
Michael Yang
03bdac0595
Merge pull request #6146 from ollama/mxyng/testing
use testing tempdirs
2024-08-05 13:00:05 -07:00
Daniel Hiltgen
f457d63400 Implement linux NUMA detection
If the system has multiple numa nodes, enable numa support in llama.cpp
If we detect numactl in the path, use that, else use the basic "distribute" mode.
2024-08-05 12:56:20 -07:00
Michael Yang
39f2bc6bfc
Merge pull request #6167 from ollama/mxyng/line-feed
line feed
2024-08-05 00:06:28 -07:00
frob
b73b0940ef
Disable paging for journalctl (#6154)
Users using `journalctl` to get logs for issue logging sometimes don't realize that paging is causing information to be missed.
2024-08-05 00:10:53 -04:00
Michael Yang
6a07344786 line feed 2024-08-04 17:25:41 -07:00
sryu1
8b920f35a4
Add Gemma 2 2b (#6151) 2024-08-04 10:58:39 -04:00
Ivan Charapanau
4221e39867
Reference ollama integration with Harbor (#6147) 2024-08-02 17:03:46 -07:00
Michael Yang
a091fadfda use testing tempdirs 2024-08-02 16:04:06 -07:00
Michael Yang
77ccbf04dc
Merge pull request #6128 from ollama/mxyng/lint
enable gofmt/gofumpt/goimports/tenv
2024-08-02 14:58:40 -07:00
royjhan
4addf6b587
Update OpenAI Compatibility Docs with /v1/completions (#5311)
* Update docs

* token bug corrected

* Update docs/openai.md

* Update docs/openai.md

* add suffix

* merge conflicts

* merge conflicts
2024-08-02 13:16:23 -07:00
royjhan
85c7f11170
Update docs (#5310) 2024-08-02 13:05:57 -07:00
Michael Yang
b732beba6a lint 2024-08-01 17:06:06 -07:00
79 changed files with 458 additions and 262 deletions

1
.gitattributes vendored
View file

@ -1 +1,2 @@
llm/ext_server/* linguist-vendored llm/ext_server/* linguist-vendored
* text eol=lf

View file

@ -273,7 +273,7 @@ jobs:
if: ${{ startsWith(matrix.os, 'macos-') }} if: ${{ startsWith(matrix.os, 'macos-') }}
- uses: golangci/golangci-lint-action@v6 - uses: golangci/golangci-lint-action@v6
with: with:
args: --timeout 8m0s -v ${{ startsWith(matrix.os, 'windows-') && '' || '--disable gofmt --disable goimports' }} args: --timeout 8m0s -v
test: test:
strategy: strategy:
matrix: matrix:

View file

@ -7,22 +7,32 @@ linters:
- bodyclose - bodyclose
- containedctx - containedctx
- contextcheck - contextcheck
- errcheck
- exportloopref - exportloopref
- gci
- gocheckcompilerdirectives - gocheckcompilerdirectives
# conditionally enable this on linux/macos - gofmt
# - gofmt - gofumpt
# - goimports - gosimple
- govet
- ineffassign
- intrange - intrange
- makezero
- misspell - misspell
- nilerr - nilerr
- nolintlint - nolintlint
- nosprintfhostport - nosprintfhostport
- staticcheck
- tenv
- testifylint - testifylint
- unconvert - unconvert
- unused - unused
- usestdlibvars
- wastedassign - wastedassign
- whitespace - whitespace
- usestdlibvars linters-settings:
gci:
sections: [standard, default, localmodule]
severity: severity:
default-severity: error default-severity: error
rules: rules:

View file

@ -54,6 +54,7 @@ Here are some example models that can be downloaded:
| Llama 3.1 | 405B | 231GB | `ollama run llama3.1:405b` | | Llama 3.1 | 405B | 231GB | `ollama run llama3.1:405b` |
| Phi 3 Mini | 3.8B | 2.3GB | `ollama run phi3` | | Phi 3 Mini | 3.8B | 2.3GB | `ollama run phi3` |
| Phi 3 Medium | 14B | 7.9GB | `ollama run phi3:medium` | | Phi 3 Medium | 14B | 7.9GB | `ollama run phi3:medium` |
| Gemma 2 | 2B | 1.6GB | `ollama run gemma2:2b` |
| Gemma 2 | 9B | 5.5GB | `ollama run gemma2` | | Gemma 2 | 9B | 5.5GB | `ollama run gemma2` |
| Gemma 2 | 27B | 16GB | `ollama run gemma2:27b` | | Gemma 2 | 27B | 16GB | `ollama run gemma2:27b` |
| Mistral | 7B | 4.1GB | `ollama run mistral` | | Mistral | 7B | 4.1GB | `ollama run mistral` |
@ -300,6 +301,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client) - [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
- [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows) - [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)
- [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac) - [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac)
- [Harbor](https://github.com/av/harbor) (Containerized LLM Toolkit with Ollama as default backend)
### Terminal ### Terminal

View file

@ -18,6 +18,7 @@ import (
"bytes" "bytes"
"context" "context"
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"io" "io"
"net/http" "net/http"
@ -172,7 +173,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
} }
if errorResponse.Error != "" { if errorResponse.Error != "" {
return fmt.Errorf(errorResponse.Error) return errors.New(errorResponse.Error)
} }
if response.StatusCode >= http.StatusBadRequest { if response.StatusCode >= http.StatusBadRequest {

View file

@ -231,7 +231,6 @@ type Options struct {
// Runner options which must be set when the model is loaded into memory // Runner options which must be set when the model is loaded into memory
type Runner struct { type Runner struct {
UseNUMA bool `json:"numa,omitempty"`
NumCtx int `json:"num_ctx,omitempty"` NumCtx int `json:"num_ctx,omitempty"`
NumBatch int `json:"num_batch,omitempty"` NumBatch int `json:"num_batch,omitempty"`
NumGPU int `json:"num_gpu,omitempty"` NumGPU int `json:"num_gpu,omitempty"`
@ -615,7 +614,6 @@ func DefaultOptions() Options {
F16KV: true, F16KV: true,
UseMLock: false, UseMLock: false,
UseMMap: nil, UseMMap: nil,
UseNUMA: false,
}, },
} }
} }

View file

@ -2,7 +2,7 @@ package api
import ( import (
"encoding/json" "encoding/json"
"fmt" "errors"
"math" "math"
"testing" "testing"
"time" "time"
@ -192,7 +192,7 @@ func TestUseMmapFormatParams(t *testing.T) {
"use_mmap": {"foo"}, "use_mmap": {"foo"},
}, },
exp: nil, exp: nil,
err: fmt.Errorf("invalid bool value [foo]"), err: errors.New("invalid bool value [foo]"),
}, },
} }

View file

@ -2,8 +2,8 @@
package lifecycle package lifecycle
import "fmt" import "errors"
func GetStarted() error { func GetStarted() error {
return fmt.Errorf("GetStarted not implemented") return errors.New("not implemented")
} }

View file

@ -34,7 +34,6 @@ func GetStarted() error {
Sys: &syscall.SysProcAttr{CreationFlags: CREATE_NEW_CONSOLE, HideWindow: false}, Sys: &syscall.SysProcAttr{CreationFlags: CREATE_NEW_CONSOLE, HideWindow: false},
} }
proc, err := os.StartProcess(args[0], args, attrs) proc, err := os.StartProcess(args[0], args, attrs)
if err != nil { if err != nil {
return fmt.Errorf("unable to start getting started shell %w", err) return fmt.Errorf("unable to start getting started shell %w", err)
} }

View file

@ -27,7 +27,7 @@ func InitLogging() {
// TODO - write one-line to the app.log file saying we're running in console mode to help avoid confusion // TODO - write one-line to the app.log file saying we're running in console mode to help avoid confusion
} else { } else {
rotateLogs(AppLogFile) rotateLogs(AppLogFile)
logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755) logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
if err != nil { if err != nil {
slog.Error(fmt.Sprintf("failed to create server log %v", err)) slog.Error(fmt.Sprintf("failed to create server log %v", err))
return return

View file

@ -5,5 +5,5 @@ package lifecycle
import "log/slog" import "log/slog"
func ShowLogs() { func ShowLogs() {
slog.Warn("ShowLogs not yet implemented") slog.Warn("not implemented")
} }

View file

@ -17,7 +17,7 @@ func TestRotateLogs(t *testing.T) {
// No log exists // No log exists
rotateLogs(logFile) rotateLogs(logFile)
require.NoError(t, os.WriteFile(logFile, []byte("1"), 0644)) require.NoError(t, os.WriteFile(logFile, []byte("1"), 0o644))
assert.FileExists(t, logFile) assert.FileExists(t, logFile)
// First rotation // First rotation
rotateLogs(logFile) rotateLogs(logFile)
@ -32,7 +32,7 @@ func TestRotateLogs(t *testing.T) {
assert.NoFileExists(t, logFile) assert.NoFileExists(t, logFile)
for i := 2; i <= LogRotationCount+1; i++ { for i := 2; i <= LogRotationCount+1; i++ {
require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0644)) require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0o644))
assert.FileExists(t, logFile) assert.FileExists(t, logFile)
rotateLogs(logFile) rotateLogs(logFile)
assert.NoFileExists(t, logFile) assert.NoFileExists(t, logFile)

View file

@ -55,7 +55,7 @@ func start(ctx context.Context, command string) (*exec.Cmd, error) {
} }
rotateLogs(ServerLogFile) rotateLogs(ServerLogFile)
logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755) logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create server log: %w", err) return nil, fmt.Errorf("failed to create server log: %w", err)
} }

View file

@ -15,6 +15,7 @@ import (
"path" "path"
"path/filepath" "path/filepath"
"runtime" "runtime"
"strconv"
"strings" "strings"
"time" "time"
@ -46,7 +47,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
query.Add("os", runtime.GOOS) query.Add("os", runtime.GOOS)
query.Add("arch", runtime.GOARCH) query.Add("arch", runtime.GOARCH)
query.Add("version", version.Version) query.Add("version", version.Version)
query.Add("ts", fmt.Sprintf("%d", time.Now().Unix())) query.Add("ts", strconv.FormatInt(time.Now().Unix(), 10))
nonce, err := auth.NewNonce(rand.Reader, 16) nonce, err := auth.NewNonce(rand.Reader, 16)
if err != nil { if err != nil {

View file

@ -4,9 +4,9 @@ package lifecycle
import ( import (
"context" "context"
"fmt" "errors"
) )
func DoUpgrade(cancel context.CancelFunc, done chan int) error { func DoUpgrade(cancel context.CancelFunc, done chan int) error {
return fmt.Errorf("DoUpgrade not yet implemented") return errors.New("not implemented")
} }

View file

@ -2,6 +2,7 @@ package lifecycle
import ( import (
"context" "context"
"errors"
"fmt" "fmt"
"log/slog" "log/slog"
"os" "os"
@ -15,7 +16,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
return fmt.Errorf("failed to lookup downloads: %s", err) return fmt.Errorf("failed to lookup downloads: %s", err)
} }
if len(files) == 0 { if len(files) == 0 {
return fmt.Errorf("no update downloads found") return errors.New("no update downloads found")
} else if len(files) > 1 { } else if len(files) > 1 {
// Shouldn't happen // Shouldn't happen
slog.Warn(fmt.Sprintf("multiple downloads found, using first one %v", files)) slog.Warn(fmt.Sprintf("multiple downloads found, using first one %v", files))
@ -64,7 +65,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
} }
} else { } else {
// TODO - some details about why it didn't start, or is this a pedantic error case? // TODO - some details about why it didn't start, or is this a pedantic error case?
return fmt.Errorf("installer process did not start") return errors.New("installer process did not start")
} }
// TODO should we linger for a moment and check to make sure it's actually running by checking the pid? // TODO should we linger for a moment and check to make sure it's actually running by checking the pid?

View file

@ -3,11 +3,11 @@
package tray package tray
import ( import (
"fmt" "errors"
"github.com/ollama/ollama/app/tray/commontray" "github.com/ollama/ollama/app/tray/commontray"
) )
func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) { func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
return nil, fmt.Errorf("NOT IMPLEMENTED YET") return nil, errors.New("not implemented")
} }

View file

@ -11,9 +11,7 @@ import (
"golang.org/x/sys/windows" "golang.org/x/sys/windows"
) )
var ( var quitOnce sync.Once
quitOnce sync.Once
)
func (t *winTray) Run() { func (t *winTray) Run() {
nativeLoop() nativeLoop()

View file

@ -13,8 +13,9 @@ import (
"sync" "sync"
"unsafe" "unsafe"
"github.com/ollama/ollama/app/tray/commontray"
"golang.org/x/sys/windows" "golang.org/x/sys/windows"
"github.com/ollama/ollama/app/tray/commontray"
) )
// Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32 // Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32
@ -414,7 +415,7 @@ func iconBytesToFilePath(iconBytes []byte) (string, error) {
iconFilePath := filepath.Join(os.TempDir(), "ollama_temp_icon_"+dataHash) iconFilePath := filepath.Join(os.TempDir(), "ollama_temp_icon_"+dataHash)
if _, err := os.Stat(iconFilePath); os.IsNotExist(err) { if _, err := os.Stat(iconFilePath); os.IsNotExist(err) {
if err := os.WriteFile(iconFilePath, iconBytes, 0644); err != nil { if err := os.WriteFile(iconFilePath, iconBytes, 0o644); err != nil {
return "", err return "", err
} }
} }

View file

@ -5,6 +5,7 @@ import (
"context" "context"
"crypto/rand" "crypto/rand"
"encoding/base64" "encoding/base64"
"errors"
"fmt" "fmt"
"io" "io"
"log/slog" "log/slog"
@ -78,7 +79,7 @@ func Sign(ctx context.Context, bts []byte) (string, error) {
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey()) publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
parts := bytes.Split(publicKey, []byte(" ")) parts := bytes.Split(publicKey, []byte(" "))
if len(parts) < 2 { if len(parts) < 2 {
return "", fmt.Errorf("malformed public key") return "", errors.New("malformed public key")
} }
signedData, err := privateKey.Sign(rand.Reader, bts) signedData, err := privateKey.Sign(rand.Reader, bts)

View file

@ -1160,7 +1160,7 @@ func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
return err return err
} }
if err := startApp(cmd.Context(), client); err != nil { if err := startApp(cmd.Context(), client); err != nil {
return fmt.Errorf("could not connect to ollama app, is it running?") return errors.New("could not connect to ollama app, is it running?")
} }
} }
return nil return nil

View file

@ -604,7 +604,7 @@ func getImageData(filePath string) ([]byte, error) {
// Check if the file size exceeds 100MB // Check if the file size exceeds 100MB
var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
if info.Size() > maxSize { if info.Size() > maxSize {
return nil, fmt.Errorf("file size exceeds maximum limit (100MB)") return nil, errors.New("file size exceeds maximum limit (100MB)")
} }
buf = make([]byte, info.Size()) buf = make([]byte, info.Size())

View file

@ -2,7 +2,7 @@ package cmd
import ( import (
"context" "context"
"fmt" "errors"
"os" "os"
"os/exec" "os/exec"
"strings" "strings"
@ -20,7 +20,7 @@ func startApp(ctx context.Context, client *api.Client) error {
return err return err
} }
if !strings.Contains(link, "Ollama.app") { if !strings.Contains(link, "Ollama.app") {
return fmt.Errorf("could not find ollama app") return errors.New("could not find ollama app")
} }
path := strings.Split(link, "Ollama.app") path := strings.Split(link, "Ollama.app")
if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil { if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil {

View file

@ -4,11 +4,11 @@ package cmd
import ( import (
"context" "context"
"fmt" "errors"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
) )
func startApp(ctx context.Context, client *api.Client) error { func startApp(ctx context.Context, client *api.Client) error {
return fmt.Errorf("could not connect to ollama server, run 'ollama serve' to start it") return errors.New("could not connect to ollama server, run 'ollama serve' to start it")
} }

View file

@ -31,7 +31,7 @@ func startApp(ctx context.Context, client *api.Client) error {
// Finally look in the path // Finally look in the path
appExe, err = exec.LookPath(AppName) appExe, err = exec.LookPath(AppName)
if err != nil { if err != nil {
return fmt.Errorf("could not locate ollama app") return errors.New("could not locate ollama app")
} }
} }
} }

View file

@ -5,9 +5,10 @@ import (
"fmt" "fmt"
"strings" "strings"
"github.com/ollama/ollama/llm"
"github.com/pdevine/tensor" "github.com/pdevine/tensor"
"github.com/pdevine/tensor/native" "github.com/pdevine/tensor/native"
"github.com/ollama/ollama/llm"
) )
type llama struct { type llama struct {

View file

@ -2,6 +2,7 @@ package convert
import ( import (
"crypto/sha256" "crypto/sha256"
"encoding/hex"
"encoding/json" "encoding/json"
"flag" "flag"
"fmt" "fmt"
@ -14,8 +15,9 @@ import (
"slices" "slices"
"testing" "testing"
"github.com/ollama/ollama/llm"
"golang.org/x/exp/maps" "golang.org/x/exp/maps"
"github.com/ollama/ollama/llm"
) )
func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) { func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
@ -99,7 +101,7 @@ func TestConvertFull(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
actual[tensor.Name] = fmt.Sprintf("%x", sha256sum.Sum(nil)) actual[tensor.Name] = hex.EncodeToString(sha256sum.Sum(nil))
} }
expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt))) expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt)))

View file

@ -10,8 +10,8 @@ import (
) )
type ZipReader struct { type ZipReader struct {
r *zip.Reader r *zip.Reader
p string p string
// limit is the maximum size of a file that can be read directly // limit is the maximum size of a file that can be read directly
// from the zip archive. Files larger than this size will be extracted // from the zip archive. Files larger than this size will be extracted

View file

@ -111,8 +111,9 @@ func (st safetensor) WriteTo(w io.Writer) (int64, error) {
return 0, err return 0, err
} }
for _, b := range u16s { f32s = make([]float32, len(u16s))
f32s = append(f32s, float16.Frombits(b).Float32()) for i := range u16s {
f32s[i] = float16.Frombits(u16s[i]).Float32()
} }
case "BF16": case "BF16":

View file

@ -28,13 +28,35 @@ chat_completion = client.chat.completions.create(
model='llama3', model='llama3',
) )
response = client.chat.completions.create(
model="llava",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "What's in this image?"},
{
"type": "image_url",
"image_url": "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC",
},
],
}
],
max_tokens=300,
)
completion = client.completions.create(
model="llama3",
prompt="Say this is a test",
)
list_completion = client.models.list() list_completion = client.models.list()
model = client.models.retrieve("llama3") model = client.models.retrieve("llama3")
embeddings = client.embeddings.create( embeddings = client.embeddings.create(
model="all-minilm", model="all-minilm",
input=["why is the sky blue?", "why is the grass green?"] input=["why is the sky blue?", "why is the grass green?"],
) )
``` ```
@ -51,23 +73,44 @@ const openai = new OpenAI({
}) })
const chatCompletion = await openai.chat.completions.create({ const chatCompletion = await openai.chat.completions.create({
messages: [{ role: 'user', content: 'Say this is a test' }], messages: [{ role: 'user', content: 'Say this is a test' }],
model: 'llama3', model: 'llama3',
})
const response = await openai.chat.completions.create({
model: "llava",
messages: [
{
role: "user",
content: [
{ type: "text", text: "What's in this image?" },
{
type: "image_url",
image_url: "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC",
},
],
},
],
})
const completion = await openai.completions.create({
model: "llama3",
prompt: "Say this is a test.",
}) })
const listCompletion = await openai.models.list() const listCompletion = await openai.models.list()
const model = await openai.models.retrieve("llama3"); const model = await openai.models.retrieve("llama3")
const embedding = await openai.embeddings.create({ const embedding = await openai.embeddings.create({
model: "all-minilm", model: "all-minilm",
input: ["why is the sky blue?", "why is the grass green?"], input: ["why is the sky blue?", "why is the grass green?"],
}); })
``` ```
### `curl` ### `curl`
``` ``` shell
curl http://localhost:11434/v1/chat/completions \ curl http://localhost:11434/v1/chat/completions \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d '{ -d '{
@ -84,6 +127,37 @@ curl http://localhost:11434/v1/chat/completions \
] ]
}' }'
curl http://localhost:11434/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "llava",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What'\''s in this image?"
},
{
"type": "image_url",
"image_url": {
"url": "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC"
}
}
]
}
],
"max_tokens": 300
}'
curl http://localhost:11434/v1/completions \
-H "Content-Type: application/json" \
-d '{
"model": "llama3",
"prompt": "Say this is a test"
}'
curl http://localhost:11434/v1/models curl http://localhost:11434/v1/models
curl http://localhost:11434/v1/models/llama3 curl http://localhost:11434/v1/models/llama3
@ -106,6 +180,7 @@ curl http://localhost:11434/v1/embeddings \
- [x] Streaming - [x] Streaming
- [x] JSON mode - [x] JSON mode
- [x] Reproducible outputs - [x] Reproducible outputs
- [x] Vision
- [x] Tools (streaming support coming soon) - [x] Tools (streaming support coming soon)
- [ ] Vision - [ ] Vision
- [ ] Logprobs - [ ] Logprobs
@ -115,7 +190,10 @@ curl http://localhost:11434/v1/embeddings \
- [x] `model` - [x] `model`
- [x] `messages` - [x] `messages`
- [x] Text `content` - [x] Text `content`
- [ ] Array of `content` parts - [x] Image `content`
- [x] Base64 encoded image
- [ ] Image URL
- [x] Array of `content` parts
- [x] `frequency_penalty` - [x] `frequency_penalty`
- [x] `presence_penalty` - [x] `presence_penalty`
- [x] `response_format` - [x] `response_format`
@ -131,6 +209,39 @@ curl http://localhost:11434/v1/embeddings \
- [ ] `user` - [ ] `user`
- [ ] `n` - [ ] `n`
### `/v1/completions`
#### Supported features
- [x] Completions
- [x] Streaming
- [x] JSON mode
- [x] Reproducible outputs
- [ ] Logprobs
#### Supported request fields
- [x] `model`
- [x] `prompt`
- [x] `frequency_penalty`
- [x] `presence_penalty`
- [x] `seed`
- [x] `stop`
- [x] `stream`
- [x] `temperature`
- [x] `top_p`
- [x] `max_tokens`
- [x] `suffix`
- [ ] `best_of`
- [ ] `echo`
- [ ] `logit_bias`
- [ ] `user`
- [ ] `n`
#### Notes
- `prompt` currently only accepts a string
### `/v1/models` ### `/v1/models`
#### Notes #### Notes

View file

@ -9,7 +9,7 @@ cat ~/.ollama/logs/server.log
On **Linux** systems with systemd, the logs can be found with this command: On **Linux** systems with systemd, the logs can be found with this command:
```shell ```shell
journalctl -u ollama journalctl -u ollama --no-pager
``` ```
When you run Ollama in a **container**, the logs go to stdout/stderr in the container: When you run Ollama in a **container**, the logs go to stdout/stderr in the container:

View file

@ -3,6 +3,7 @@ package format
import ( import (
"fmt" "fmt"
"math" "math"
"strconv"
) )
const ( const (
@ -28,6 +29,6 @@ func HumanNumber(b uint64) string {
case b >= Thousand: case b >= Thousand:
return fmt.Sprintf("%.0fK", float64(b)/Thousand) return fmt.Sprintf("%.0fK", float64(b)/Thousand)
default: default:
return fmt.Sprintf("%d", b) return strconv.FormatUint(b, 10)
} }
} }

View file

@ -3,7 +3,7 @@
package gpu package gpu
import ( import (
"fmt" "errors"
"log/slog" "log/slog"
"os" "os"
"path/filepath" "path/filepath"
@ -95,5 +95,5 @@ func commonAMDValidateLibDir() (string, error) {
} }
} }
return "", fmt.Errorf("no suitable rocm found, falling back to CPU") return "", errors.New("no suitable rocm found, falling back to CPU")
} }

View file

@ -1,6 +1,7 @@
package gpu package gpu
import ( import (
"errors"
"fmt" "fmt"
"log/slog" "log/slog"
"syscall" "syscall"
@ -76,7 +77,7 @@ func (hl *HipLib) Release() {
func (hl *HipLib) AMDDriverVersion() (driverMajor, driverMinor int, err error) { func (hl *HipLib) AMDDriverVersion() (driverMajor, driverMinor int, err error) {
if hl.dll == 0 { if hl.dll == 0 {
return 0, 0, fmt.Errorf("dll has been unloaded") return 0, 0, errors.New("dll has been unloaded")
} }
var version int var version int
status, _, err := syscall.SyscallN(hl.hipDriverGetVersion, uintptr(unsafe.Pointer(&version))) status, _, err := syscall.SyscallN(hl.hipDriverGetVersion, uintptr(unsafe.Pointer(&version)))
@ -110,7 +111,7 @@ func (hl *HipLib) HipGetDeviceCount() int {
func (hl *HipLib) HipSetDevice(device int) error { func (hl *HipLib) HipSetDevice(device int) error {
if hl.dll == 0 { if hl.dll == 0 {
return fmt.Errorf("dll has been unloaded") return errors.New("dll has been unloaded")
} }
status, _, err := syscall.SyscallN(hl.hipSetDevice, uintptr(device)) status, _, err := syscall.SyscallN(hl.hipSetDevice, uintptr(device))
if status != hipSuccess { if status != hipSuccess {
@ -121,7 +122,7 @@ func (hl *HipLib) HipSetDevice(device int) error {
func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, error) { func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, error) {
if hl.dll == 0 { if hl.dll == 0 {
return nil, fmt.Errorf("dll has been unloaded") return nil, errors.New("dll has been unloaded")
} }
var props hipDevicePropMinimal var props hipDevicePropMinimal
status, _, err := syscall.SyscallN(hl.hipGetDeviceProperties, uintptr(unsafe.Pointer(&props)), uintptr(device)) status, _, err := syscall.SyscallN(hl.hipGetDeviceProperties, uintptr(unsafe.Pointer(&props)), uintptr(device))
@ -134,7 +135,7 @@ func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, err
// free, total, err // free, total, err
func (hl *HipLib) HipMemGetInfo() (uint64, uint64, error) { func (hl *HipLib) HipMemGetInfo() (uint64, uint64, error) {
if hl.dll == 0 { if hl.dll == 0 {
return 0, 0, fmt.Errorf("dll has been unloaded") return 0, 0, errors.New("dll has been unloaded")
} }
var totalMemory uint64 var totalMemory uint64
var freeMemory uint64 var freeMemory uint64

View file

@ -393,7 +393,7 @@ func AMDValidateLibDir() (string, error) {
// If we still haven't found a usable rocm, the user will have to install it on their own // If we still haven't found a usable rocm, the user will have to install it on their own
slog.Warn("amdgpu detected, but no compatible rocm library found. Either install rocm v6, or follow manual install instructions at https://github.com/ollama/ollama/blob/main/docs/linux.md#manual-install") slog.Warn("amdgpu detected, but no compatible rocm library found. Either install rocm v6, or follow manual install instructions at https://github.com/ollama/ollama/blob/main/docs/linux.md#manual-install")
return "", fmt.Errorf("no suitable rocm found, falling back to CPU") return "", errors.New("no suitable rocm found, falling back to CPU")
} }
func AMDDriverVersion() (driverMajor, driverMinor int, err error) { func AMDDriverVersion() (driverMajor, driverMinor int, err error) {

View file

@ -2,7 +2,7 @@ package gpu
import ( import (
"bytes" "bytes"
"fmt" "errors"
"log/slog" "log/slog"
"os" "os"
"path/filepath" "path/filepath"
@ -85,7 +85,7 @@ func AMDGetGPUInfo() []RocmGPUInfo {
n = bytes.IndexByte(props.GcnArchName[:], 0) n = bytes.IndexByte(props.GcnArchName[:], 0)
gfx := string(props.GcnArchName[:n]) gfx := string(props.GcnArchName[:n])
slog.Debug("hip device", "id", i, "name", name, "gfx", gfx) slog.Debug("hip device", "id", i, "name", name, "gfx", gfx)
//slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0 // slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0
// TODO Why isn't props.iGPU accurate!? // TODO Why isn't props.iGPU accurate!?
if strings.EqualFold(name, iGPUName) { if strings.EqualFold(name, iGPUName) {
slog.Info("unsupported Radeon iGPU detected skipping", "id", i, "name", name, "gfx", gfx) slog.Info("unsupported Radeon iGPU detected skipping", "id", i, "name", name, "gfx", gfx)
@ -161,7 +161,7 @@ func AMDValidateLibDir() (string, error) {
// Should not happen on windows since we include it in the installer, but stand-alone binary might hit this // Should not happen on windows since we include it in the installer, but stand-alone binary might hit this
slog.Warn("amdgpu detected, but no compatible rocm library found. Please install ROCm") slog.Warn("amdgpu detected, but no compatible rocm library found. Please install ROCm")
return "", fmt.Errorf("no suitable rocm found, falling back to CPU") return "", errors.New("no suitable rocm found, falling back to CPU")
} }
func (gpus RocmGPUInfoList) RefreshFreeMemory() error { func (gpus RocmGPUInfoList) RefreshFreeMemory() error {

View file

@ -42,7 +42,7 @@ func PayloadsDir() (string, error) {
return "", fmt.Errorf("failed to generate tmp dir: %w", err) return "", fmt.Errorf("failed to generate tmp dir: %w", err)
} }
} else { } else {
err = os.MkdirAll(tmpDir, 0755) err = os.MkdirAll(tmpDir, 0o755)
if err != nil { if err != nil {
return "", fmt.Errorf("failed to generate tmp dir %s: %w", tmpDir, err) return "", fmt.Errorf("failed to generate tmp dir %s: %w", tmpDir, err)
} }
@ -54,7 +54,7 @@ func PayloadsDir() (string, error) {
if err != nil { if err != nil {
return "", err return "", err
} }
if _, err := pidFile.Write([]byte(fmt.Sprint(os.Getpid()))); err != nil { if _, err := pidFile.Write([]byte(strconv.Itoa(os.Getpid()))); err != nil {
return "", err return "", err
} }

View file

@ -1,6 +1,11 @@
package gpu package gpu
import ( import (
"os"
"path/filepath"
"runtime"
"strings"
"golang.org/x/sys/cpu" "golang.org/x/sys/cpu"
) )
@ -14,3 +19,19 @@ func GetCPUCapability() CPUCapability {
// else LCD // else LCD
return CPUCapabilityNone return CPUCapabilityNone
} }
func IsNUMA() bool {
if runtime.GOOS != "linux" {
// numa support in llama.cpp is linux only
return false
}
ids := map[string]interface{}{}
packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id")
for _, packageId := range packageIds {
id, err := os.ReadFile(packageId)
if err == nil {
ids[strings.TrimSpace(string(id))] = struct{}{}
}
}
return len(ids) > 1
}

View file

@ -7,9 +7,9 @@ package gpu
#cgo windows LDFLAGS: -lpthread #cgo windows LDFLAGS: -lpthread
#include "gpu_info.h" #include "gpu_info.h"
*/ */
import "C" import "C"
import ( import (
"fmt" "fmt"
"log/slog" "log/slog"
@ -70,7 +70,6 @@ var CudaTegra string = os.Getenv("JETSON_JETPACK")
// Note: gpuMutex must already be held // Note: gpuMutex must already be held
func initCudaHandles() *cudaHandles { func initCudaHandles() *cudaHandles {
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing // TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
cHandles := &cudaHandles{} cHandles := &cudaHandles{}
@ -211,14 +210,16 @@ func GetGPUInfo() GpuInfoList {
if err != nil { if err != nil {
slog.Warn("error looking up system memory", "error", err) slog.Warn("error looking up system memory", "error", err)
} }
cpus = []CPUInfo{CPUInfo{ cpus = []CPUInfo{
GpuInfo: GpuInfo{ {
memInfo: mem, GpuInfo: GpuInfo{
Library: "cpu", memInfo: mem,
Variant: cpuCapability, Library: "cpu",
ID: "0", Variant: cpuCapability,
ID: "0",
},
}, },
}} }
// Fallback to CPU mode if we're lacking required vector extensions on x86 // Fallback to CPU mode if we're lacking required vector extensions on x86
if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" { if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {

View file

@ -8,6 +8,7 @@ package gpu
#include "gpu_info_darwin.h" #include "gpu_info_darwin.h"
*/ */
import "C" import "C"
import ( import (
"runtime" "runtime"

View file

@ -43,10 +43,12 @@ var OneapiGlobs = []string{
"/usr/lib*/libze_intel_gpu.so*", "/usr/lib*/libze_intel_gpu.so*",
} }
var CudartMgmtName = "libcudart.so*" var (
var NvcudaMgmtName = "libcuda.so*" CudartMgmtName = "libcudart.so*"
var NvmlMgmtName = "" // not currently wired on linux NvcudaMgmtName = "libcuda.so*"
var OneapiMgmtName = "libze_intel_gpu.so" NvmlMgmtName = "" // not currently wired on linux
OneapiMgmtName = "libze_intel_gpu.so"
)
func GetCPUMem() (memInfo, error) { func GetCPUMem() (memInfo, error) {
var mem memInfo var mem memInfo

View file

@ -40,10 +40,12 @@ var OneapiGlobs = []string{
"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll", "c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
} }
var CudartMgmtName = "cudart64_*.dll" var (
var NvcudaMgmtName = "nvcuda.dll" CudartMgmtName = "cudart64_*.dll"
var NvmlMgmtName = "nvml.dll" NvcudaMgmtName = "nvcuda.dll"
var OneapiMgmtName = "ze_intel_gpu64.dll" NvmlMgmtName = "nvml.dll"
OneapiMgmtName = "ze_intel_gpu64.dll"
)
func GetCPUMem() (memInfo, error) { func GetCPUMem() (memInfo, error) {
memStatus := MEMORYSTATUSEX{length: sizeofMemoryStatusEx} memStatus := MEMORYSTATUSEX{length: sizeofMemoryStatusEx}

View file

@ -5,6 +5,7 @@ package integration
import ( import (
"context" "context"
"log/slog" "log/slog"
"os"
"strconv" "strconv"
"sync" "sync"
"testing" "testing"
@ -13,7 +14,6 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format" "github.com/ollama/ollama/format"
) )
@ -41,8 +41,8 @@ func TestMultiModelConcurrency(t *testing.T) {
}, },
} }
resp = [2][]string{ resp = [2][]string{
[]string{"sunlight"}, {"sunlight"},
[]string{"england", "english", "massachusetts", "pilgrims", "british"}, {"england", "english", "massachusetts", "pilgrims", "british"},
} }
) )
var wg sync.WaitGroup var wg sync.WaitGroup
@ -71,12 +71,11 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
reqLimit := len(req) reqLimit := len(req)
iterLimit := 5 iterLimit := 5
vram := os.Getenv("OLLAMA_MAX_VRAM") // TODO - discover actual VRAM if s := os.Getenv("OLLAMA_MAX_VRAM"); s != "" {
if vram != "" { maxVram, err := strconv.ParseUint(s, 10, 64)
max, err := strconv.ParseUint(vram, 10, 64)
require.NoError(t, err) require.NoError(t, err)
// Don't hammer on small VRAM cards... // Don't hammer on small VRAM cards...
if max < 4*1024*1024*1024 { if maxVram < 4*format.GibiByte {
reqLimit = min(reqLimit, 2) reqLimit = min(reqLimit, 2)
iterLimit = 2 iterLimit = 2
} }
@ -233,12 +232,12 @@ func TestMultiModelStress(t *testing.T) {
consumed := uint64(256 * format.MebiByte) // Assume some baseline usage consumed := uint64(256 * format.MebiByte) // Assume some baseline usage
for i := 0; i < len(req); i++ { for i := 0; i < len(req); i++ {
// Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long // Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long
if i > 1 && consumed > vram { if i > 1 && consumed > maxVram {
slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed)) slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
break break
} }
consumed += chosenModels[i].size consumed += chosenModels[i].size
slog.Info("target vram", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed)) slog.Info("target vram", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
wg.Add(1) wg.Add(1)
go func(i int) { go func(i int) {

View file

@ -35,8 +35,8 @@ var (
}, },
} }
resp = [2][]string{ resp = [2][]string{
[]string{"sunlight"}, {"sunlight"},
[]string{"england", "english", "massachusetts", "pilgrims"}, {"england", "english", "massachusetts", "pilgrims"},
} }
) )

View file

@ -29,7 +29,7 @@ func TestMaxQueue(t *testing.T) {
// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits // Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
threadCount := 32 threadCount := 32
if maxQueue := envconfig.MaxQueue(); maxQueue != 0 { if maxQueue := envconfig.MaxQueue(); maxQueue != 0 {
threadCount = maxQueue threadCount = int(maxQueue)
} else { } else {
t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount)) t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount))
} }

View file

@ -162,7 +162,7 @@ func PullIfMissing(ctx context.Context, client *api.Client, modelName string) er
fn := func(resp api.ProgressResponse) error { fn := func(resp api.ProgressResponse) error {
// fmt.Print(".") // fmt.Print(".")
if !stallTimer.Reset(stallDuration) { if !stallTimer.Reset(stallDuration) {
return fmt.Errorf("stall was detected, aborting status reporting") return errors.New("stall was detected, aborting status reporting")
} }
return nil return nil
} }
@ -180,7 +180,7 @@ func PullIfMissing(ctx context.Context, client *api.Client, modelName string) er
select { select {
case <-stallTimer.C: case <-stallTimer.C:
return fmt.Errorf("download stalled") return errors.New("download stalled")
case <-done: case <-done:
return pullError return pullError
} }
@ -243,7 +243,7 @@ func DoGenerate(ctx context.Context, t *testing.T, client *api.Client, genReq ap
// fmt.Print(".") // fmt.Print(".")
buf.Write([]byte(response.Response)) buf.Write([]byte(response.Response))
if !stallTimer.Reset(streamTimeout) { if !stallTimer.Reset(streamTimeout) {
return fmt.Errorf("stall was detected while streaming response, aborting") return errors.New("stall was detected while streaming response, aborting")
} }
return nil return nil
} }
@ -334,10 +334,10 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
}, },
}, },
[][]string{ [][]string{
[]string{"sunlight"}, {"sunlight"},
[]string{"soil", "organic", "earth", "black", "tan"}, {"soil", "organic", "earth", "black", "tan"},
[]string{"england", "english", "massachusetts", "pilgrims", "british"}, {"england", "english", "massachusetts", "pilgrims", "british"},
[]string{"fourth", "july", "declaration", "independence"}, {"fourth", "july", "declaration", "independence"},
[]string{"nitrogen", "oxygen", "carbon", "dioxide"}, {"nitrogen", "oxygen", "carbon", "dioxide"},
} }
} }

View file

@ -44,6 +44,7 @@
#include <errhandlingapi.h> #include <errhandlingapi.h>
#endif #endif
#include <algorithm>
#include <cstddef> #include <cstddef>
#include <thread> #include <thread>
#include <chrono> #include <chrono>
@ -1220,6 +1221,7 @@ struct llama_server_context
res.result_json = json res.result_json = json
{ {
{"id", res.id},
{"embedding", std::vector<float>(embd, embd + n_embd)}, {"embedding", std::vector<float>(embd, embd + n_embd)},
{"timings", slot.get_formated_timings()}, {"timings", slot.get_formated_timings()},
}; };
@ -3203,6 +3205,10 @@ int main(int argc, char **argv) {
} }
responses = result.result_json.value("results", std::vector<json>{result.result_json}); responses = result.result_json.value("results", std::vector<json>{result.result_json});
std::sort(responses.begin(), responses.end(), [](const json& a, const json& b) {
return a["id"] < b["id"];
});
json embeddings = json::array(); json embeddings = json::array();
int prompt_n = 0; int prompt_n = 0;

View file

@ -11,8 +11,9 @@ package llm
// #include <stdlib.h> // #include <stdlib.h>
// #include "llama.h" // #include "llama.h"
import "C" import "C"
import ( import (
"fmt" "errors"
"unsafe" "unsafe"
) )
@ -33,7 +34,7 @@ func Quantize(infile, outfile string, ftype fileType) error {
params.ftype = ftype.Value() params.ftype = ftype.Value()
if rc := C.llama_model_quantize(cinfile, coutfile, &params); rc != 0 { if rc := C.llama_model_quantize(cinfile, coutfile, &params); rc != 0 {
return fmt.Errorf("failed to quantize model. This model architecture may not be supported, or you may need to upgrade Ollama to the latest version") return errors.New("failed to quantize model. This model architecture may not be supported, or you may need to upgrade Ollama to the latest version")
} }
return nil return nil

View file

@ -6,10 +6,11 @@ import (
"os" "os"
"testing" "testing"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/gpu"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/gpu"
) )
func TestEstimateGPULayers(t *testing.T) { func TestEstimateGPULayers(t *testing.T) {

View file

@ -184,15 +184,15 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
params := []string{ params := []string{
"--model", model, "--model", model,
"--ctx-size", fmt.Sprintf("%d", opts.NumCtx), "--ctx-size", strconv.Itoa(opts.NumCtx),
"--batch-size", fmt.Sprintf("%d", opts.NumBatch), "--batch-size", strconv.Itoa(opts.NumBatch),
"--embedding", "--embedding",
} }
params = append(params, "--log-disable") params = append(params, "--log-disable")
if opts.NumGPU >= 0 { if opts.NumGPU >= 0 {
params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", opts.NumGPU)) params = append(params, "--n-gpu-layers", strconv.Itoa(opts.NumGPU))
} }
if envconfig.Debug() { if envconfig.Debug() {
@ -200,7 +200,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
} }
if opts.MainGPU > 0 { if opts.MainGPU > 0 {
params = append(params, "--main-gpu", fmt.Sprintf("%d", opts.MainGPU)) params = append(params, "--main-gpu", strconv.Itoa(opts.MainGPU))
} }
if len(adapters) > 0 { if len(adapters) > 0 {
@ -214,7 +214,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
} }
if opts.NumThread > 0 { if opts.NumThread > 0 {
params = append(params, "--threads", fmt.Sprintf("%d", opts.NumThread)) params = append(params, "--threads", strconv.Itoa(opts.NumThread))
} }
if !opts.F16KV { if !opts.F16KV {
@ -256,11 +256,17 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
params = append(params, "--mlock") params = append(params, "--mlock")
} }
if opts.UseNUMA { if gpu.IsNUMA() {
params = append(params, "--numa") numaMode := "distribute"
if runtime.GOOS == "linux" {
if _, err := exec.LookPath("numactl"); err == nil {
numaMode = "numactl"
}
}
params = append(params, "--numa", numaMode)
} }
params = append(params, "--parallel", fmt.Sprintf("%d", numParallel)) params = append(params, "--parallel", strconv.Itoa(numParallel))
if estimate.TensorSplit != "" { if estimate.TensorSplit != "" {
params = append(params, "--tensor-split", estimate.TensorSplit) params = append(params, "--tensor-split", estimate.TensorSplit)
@ -425,7 +431,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
if strings.Contains(s.status.LastErrMsg, "unknown model") { if strings.Contains(s.status.LastErrMsg, "unknown model") {
s.status.LastErrMsg = "this model is not supported by your version of Ollama. You may need to upgrade" s.status.LastErrMsg = "this model is not supported by your version of Ollama. You may need to upgrade"
} }
s.done <- fmt.Errorf(s.status.LastErrMsg) s.done <- errors.New(s.status.LastErrMsg)
} else { } else {
s.done <- err s.done <- err
} }

View file

@ -3,8 +3,9 @@ package main
import ( import (
"context" "context"
"github.com/ollama/ollama/cmd"
"github.com/spf13/cobra" "github.com/spf13/cobra"
"github.com/ollama/ollama/cmd"
) )
func main() { func main() {

View file

@ -5,6 +5,7 @@ import (
"bytes" "bytes"
"encoding/base64" "encoding/base64"
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"io" "io"
"log/slog" "log/slog"
@ -14,6 +15,7 @@ import (
"time" "time"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
"github.com/ollama/ollama/types/model" "github.com/ollama/ollama/types/model"
) )
@ -367,24 +369,24 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
for _, c := range content { for _, c := range content {
data, ok := c.(map[string]any) data, ok := c.(map[string]any)
if !ok { if !ok {
return nil, fmt.Errorf("invalid message format") return nil, errors.New("invalid message format")
} }
switch data["type"] { switch data["type"] {
case "text": case "text":
text, ok := data["text"].(string) text, ok := data["text"].(string)
if !ok { if !ok {
return nil, fmt.Errorf("invalid message format") return nil, errors.New("invalid message format")
} }
messages = append(messages, api.Message{Role: msg.Role, Content: text}) messages = append(messages, api.Message{Role: msg.Role, Content: text})
case "image_url": case "image_url":
var url string var url string
if urlMap, ok := data["image_url"].(map[string]any); ok { if urlMap, ok := data["image_url"].(map[string]any); ok {
if url, ok = urlMap["url"].(string); !ok { if url, ok = urlMap["url"].(string); !ok {
return nil, fmt.Errorf("invalid message format") return nil, errors.New("invalid message format")
} }
} else { } else {
if url, ok = data["image_url"].(string); !ok { if url, ok = data["image_url"].(string); !ok {
return nil, fmt.Errorf("invalid message format") return nil, errors.New("invalid message format")
} }
} }
@ -400,17 +402,17 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
} }
if !valid { if !valid {
return nil, fmt.Errorf("invalid image input") return nil, errors.New("invalid image input")
} }
img, err := base64.StdEncoding.DecodeString(url) img, err := base64.StdEncoding.DecodeString(url)
if err != nil { if err != nil {
return nil, fmt.Errorf("invalid message format") return nil, errors.New("invalid message format")
} }
messages = append(messages, api.Message{Role: msg.Role, Images: []api.ImageData{img}}) messages = append(messages, api.Message{Role: msg.Role, Images: []api.ImageData{img}})
default: default:
return nil, fmt.Errorf("invalid message format") return nil, errors.New("invalid message format")
} }
} }
default: default:
@ -423,7 +425,7 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
toolCalls[i].Function.Name = tc.Function.Name toolCalls[i].Function.Name = tc.Function.Name
err := json.Unmarshal([]byte(tc.Function.Arguments), &toolCalls[i].Function.Arguments) err := json.Unmarshal([]byte(tc.Function.Arguments), &toolCalls[i].Function.Arguments)
if err != nil { if err != nil {
return nil, fmt.Errorf("invalid tool call arguments") return nil, errors.New("invalid tool call arguments")
} }
} }
messages = append(messages, api.Message{Role: msg.Role, ToolCalls: toolCalls}) messages = append(messages, api.Message{Role: msg.Role, ToolCalls: toolCalls})
@ -737,14 +739,12 @@ func (w *RetrieveWriter) Write(data []byte) (int, error) {
func (w *EmbedWriter) writeResponse(data []byte) (int, error) { func (w *EmbedWriter) writeResponse(data []byte) (int, error) {
var embedResponse api.EmbedResponse var embedResponse api.EmbedResponse
err := json.Unmarshal(data, &embedResponse) err := json.Unmarshal(data, &embedResponse)
if err != nil { if err != nil {
return 0, err return 0, err
} }
w.ResponseWriter.Header().Set("Content-Type", "application/json") w.ResponseWriter.Header().Set("Content-Type", "application/json")
err = json.NewEncoder(w.ResponseWriter).Encode(toEmbeddingList(w.model, embedResponse)) err = json.NewEncoder(w.ResponseWriter).Encode(toEmbeddingList(w.model, embedResponse))
if err != nil { if err != nil {
return 0, err return 0, err
} }

View file

@ -12,13 +12,16 @@ import (
"time" "time"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/ollama/ollama/api"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/ollama/ollama/api"
) )
const prefix = `data:image/jpeg;base64,` const (
const image = `iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=` prefix = `data:image/jpeg;base64,`
const imageURL = prefix + image image = `iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=`
imageURL = prefix + image
)
func prepareRequest(req *http.Request, body any) { func prepareRequest(req *http.Request, body any) {
bodyBytes, _ := json.Marshal(body) bodyBytes, _ := json.Marshal(body)

View file

@ -82,7 +82,7 @@ TEMPLATE """ {{ if .System }}<|start_header_id|>system<|end_header_id|>
} }
func TestParseFileFrom(t *testing.T) { func TestParseFileFrom(t *testing.T) {
var cases = []struct { cases := []struct {
input string input string
expected []Command expected []Command
err error err error
@ -185,7 +185,7 @@ BADCOMMAND param1 value1
} }
func TestParseFileMessages(t *testing.T) { func TestParseFileMessages(t *testing.T) {
var cases = []struct { cases := []struct {
input string input string
expected []Command expected []Command
err error err error
@ -276,7 +276,7 @@ MESSAGE system`,
} }
func TestParseFileQuoted(t *testing.T) { func TestParseFileQuoted(t *testing.T) {
var cases = []struct { cases := []struct {
multiline string multiline string
expected []Command expected []Command
err error err error
@ -430,7 +430,7 @@ TEMPLATE """
} }
func TestParseFileParameters(t *testing.T) { func TestParseFileParameters(t *testing.T) {
var cases = map[string]struct { cases := map[string]struct {
name, value string name, value string
}{ }{
"numa true": {"numa", "true"}, "numa true": {"numa", "true"},
@ -491,7 +491,7 @@ func TestParseFileParameters(t *testing.T) {
} }
func TestParseFileComments(t *testing.T) { func TestParseFileComments(t *testing.T) {
var cases = []struct { cases := []struct {
input string input string
expected []Command expected []Command
}{ }{
@ -516,7 +516,7 @@ FROM foo
} }
func TestParseFileFormatParseFile(t *testing.T) { func TestParseFileFormatParseFile(t *testing.T) {
var cases = []string{ cases := []string{
` `
FROM foo FROM foo
ADAPTER adapter1 ADAPTER adapter1

View file

@ -6,8 +6,9 @@ import (
"strings" "strings"
"time" "time"
"github.com/ollama/ollama/format"
"golang.org/x/term" "golang.org/x/term"
"github.com/ollama/ollama/format"
) )
type Bar struct { type Bar struct {

View file

@ -13,7 +13,7 @@ type Buffer struct {
DisplayPos int DisplayPos int
Pos int Pos int
Buf *arraylist.List Buf *arraylist.List
//LineHasSpace is an arraylist of bools to keep track of whether a line has a space at the end // LineHasSpace is an arraylist of bools to keep track of whether a line has a space at the end
LineHasSpace *arraylist.List LineHasSpace *arraylist.List
Prompt *Prompt Prompt *Prompt
LineWidth int LineWidth int
@ -56,7 +56,7 @@ func (b *Buffer) GetLineSpacing(line int) bool {
func (b *Buffer) MoveLeft() { func (b *Buffer) MoveLeft() {
if b.Pos > 0 { if b.Pos > 0 {
//asserts that we retrieve a rune // asserts that we retrieve a rune
if e, ok := b.Buf.Get(b.Pos - 1); ok { if e, ok := b.Buf.Get(b.Pos - 1); ok {
if r, ok := e.(rune); ok { if r, ok := e.(rune); ok {
rLength := runewidth.RuneWidth(r) rLength := runewidth.RuneWidth(r)

View file

@ -4,9 +4,7 @@ import (
"errors" "errors"
) )
var ( var ErrInterrupt = errors.New("Interrupt")
ErrInterrupt = errors.New("Interrupt")
)
type InterruptError struct { type InterruptError struct {
Line []rune Line []rune

View file

@ -7,8 +7,10 @@ import (
"unsafe" "unsafe"
) )
const tcgets = 0x5401 const (
const tcsets = 0x5402 tcgets = 0x5401
tcsets = 0x5402
)
func getTermios(fd uintptr) (*Termios, error) { func getTermios(fd uintptr) (*Termios, error) {
termios := new(Termios) termios := new(Termios)

View file

@ -28,8 +28,10 @@ import (
const maxRetries = 6 const maxRetries = 6
var errMaxRetriesExceeded = errors.New("max retries exceeded") var (
var errPartStalled = errors.New("part stalled") errMaxRetriesExceeded = errors.New("max retries exceeded")
errPartStalled = errors.New("part stalled")
)
var blobDownloadManager sync.Map var blobDownloadManager sync.Map

View file

@ -828,7 +828,7 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
fn(api.ProgressResponse{Status: "retrieving manifest"}) fn(api.ProgressResponse{Status: "retrieving manifest"})
if mp.ProtocolScheme == "http" && !regOpts.Insecure { if mp.ProtocolScheme == "http" && !regOpts.Insecure {
return fmt.Errorf("insecure protocol http") return errors.New("insecure protocol http")
} }
manifest, _, err := GetManifest(mp) manifest, _, err := GetManifest(mp)
@ -895,7 +895,7 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
} }
if mp.ProtocolScheme == "http" && !regOpts.Insecure { if mp.ProtocolScheme == "http" && !regOpts.Insecure {
return fmt.Errorf("insecure protocol http") return errors.New("insecure protocol http")
} }
fn(api.ProgressResponse{Status: "pulling manifest"}) fn(api.ProgressResponse{Status: "pulling manifest"})
@ -1010,7 +1010,7 @@ func GetSHA256Digest(r io.Reader) (string, int64) {
return fmt.Sprintf("sha256:%x", h.Sum(nil)), n return fmt.Sprintf("sha256:%x", h.Sum(nil)), n
} }
var errUnauthorized = fmt.Errorf("unauthorized: access denied") var errUnauthorized = errors.New("unauthorized: access denied")
// getTokenSubject returns the subject of a JWT token, it does not validate the token // getTokenSubject returns the subject of a JWT token, it does not validate the token
func getTokenSubject(token string) string { func getTokenSubject(token string) string {

View file

@ -2,9 +2,9 @@ package server
import ( import (
"crypto/sha256" "crypto/sha256"
"encoding/hex"
"encoding/json" "encoding/json"
"errors" "errors"
"fmt"
"io" "io"
"log/slog" "log/slog"
"os" "os"
@ -88,7 +88,7 @@ func ParseNamedManifest(n model.Name) (*Manifest, error) {
m.filepath = p m.filepath = p
m.fi = fi m.fi = fi
m.digest = fmt.Sprintf("%x", sha256sum.Sum(nil)) m.digest = hex.EncodeToString(sha256sum.Sum(nil))
return &m, nil return &m, nil
} }

View file

@ -14,7 +14,7 @@ func createManifest(t *testing.T, path, name string) {
t.Helper() t.Helper()
p := filepath.Join(path, "manifests", name) p := filepath.Join(path, "manifests", name)
if err := os.MkdirAll(filepath.Dir(p), 0755); err != nil { if err := os.MkdirAll(filepath.Dir(p), 0o755); err != nil {
t.Fatal(err) t.Fatal(err)
} }

View file

@ -9,6 +9,7 @@ import (
"testing" "testing"
"github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
"github.com/ollama/ollama/template" "github.com/ollama/ollama/template"
) )

View file

@ -6,6 +6,7 @@ import (
"testing" "testing"
"github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
"github.com/ollama/ollama/template" "github.com/ollama/ollama/template"
) )

View file

@ -55,8 +55,10 @@ func init() {
gin.SetMode(mode) gin.SetMode(mode)
} }
var errRequired = errors.New("is required") var (
var errBadTemplate = errors.New("template error") errRequired = errors.New("is required")
errBadTemplate = errors.New("template error")
)
func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options, error) { func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options, error) {
opts := api.DefaultOptions() opts := api.DefaultOptions()
@ -369,7 +371,6 @@ func (s *Server) EmbedHandler(c *gin.Context) {
input[i] = s input[i] = s
} }
embeddings, err := r.Embed(c.Request.Context(), input) embeddings, err := r.Embed(c.Request.Context(), input)
if err != nil { if err != nil {
slog.Error("embedding generation failed", "error", err) slog.Error("embedding generation failed", "error", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"}) c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
@ -430,7 +431,6 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
} }
embeddings, err := r.Embed(c.Request.Context(), []string{req.Prompt}) embeddings, err := r.Embed(c.Request.Context(), []string{req.Prompt})
if err != nil { if err != nil {
slog.Info(fmt.Sprintf("embedding generation failed: %v", err)) slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"}) c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
@ -556,7 +556,7 @@ func checkNameExists(name model.Name) error {
for n := range names { for n := range names {
if strings.EqualFold(n.Filepath(), name.Filepath()) && n != name { if strings.EqualFold(n.Filepath(), name.Filepath()) && n != name {
return fmt.Errorf("a model with that name already exists") return errors.New("a model with that name already exists")
} }
} }
@ -729,7 +729,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
n := model.ParseName(req.Model) n := model.ParseName(req.Model)
if !n.IsValid() { if !n.IsValid() {
return nil, fmt.Errorf("invalid model name") return nil, errors.New("invalid model name")
} }
manifest, err := ParseNamedManifest(n) manifest, err := ParseNamedManifest(n)
@ -993,7 +993,7 @@ func allowedHost(host string) bool {
return true return true
} }
var tlds = []string{ tlds := []string{
"localhost", "localhost",
"local", "local",
"internal", "internal",

View file

@ -2,6 +2,7 @@ package server
import ( import (
"bytes" "bytes"
"cmp"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
@ -13,6 +14,7 @@ import (
"testing" "testing"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
"github.com/ollama/ollama/llm" "github.com/ollama/ollama/llm"
) )
@ -52,6 +54,8 @@ func (t *responseRecorder) CloseNotify() <-chan bool {
func createRequest(t *testing.T, fn func(*gin.Context), body any) *httptest.ResponseRecorder { func createRequest(t *testing.T, fn func(*gin.Context), body any) *httptest.ResponseRecorder {
t.Helper() t.Helper()
// if OLLAMA_MODELS is not set, set it to the temp directory
t.Setenv("OLLAMA_MODELS", cmp.Or(os.Getenv("OLLAMA_MODELS"), t.TempDir()))
w := NewRecorder() w := NewRecorder()
c, _ := gin.CreateTestContext(w) c, _ := gin.CreateTestContext(w)

View file

@ -9,6 +9,7 @@ import (
"testing" "testing"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
"github.com/ollama/ollama/types/model" "github.com/ollama/ollama/types/model"
) )

View file

@ -8,6 +8,7 @@ import (
"testing" "testing"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
) )

View file

@ -333,7 +333,6 @@ func Test_Routes(t *testing.T) {
t.Fatalf("expected content type application/json; charset=utf-8, got %s", contentType) t.Fatalf("expected content type application/json; charset=utf-8, got %s", contentType)
} }
_, err := io.ReadAll(resp.Body) _, err := io.ReadAll(resp.Body)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }

View file

@ -58,7 +58,7 @@ var defaultModelsPerGPU = 3
// we'll back off down to 1 to try to get it to fit // we'll back off down to 1 to try to get it to fit
var defaultParallel = 4 var defaultParallel = 4
var ErrMaxQueue = fmt.Errorf("server busy, please try again. maximum pending requests exceeded") var ErrMaxQueue = errors.New("server busy, please try again. maximum pending requests exceeded")
func InitScheduler(ctx context.Context) *Scheduler { func InitScheduler(ctx context.Context) *Scheduler {
maxQueue := envconfig.MaxQueue() maxQueue := envconfig.MaxQueue()

View file

@ -3,23 +3,25 @@ package server
import ( import (
"bytes" "bytes"
"context" "context"
"fmt" "errors"
"log/slog" "log/slog"
"os" "os"
"testing" "testing"
"time" "time"
"github.com/stretchr/testify/require"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
"github.com/ollama/ollama/app/lifecycle" "github.com/ollama/ollama/app/lifecycle"
"github.com/ollama/ollama/format" "github.com/ollama/ollama/format"
"github.com/ollama/ollama/gpu" "github.com/ollama/ollama/gpu"
"github.com/ollama/ollama/llm" "github.com/ollama/ollama/llm"
"github.com/stretchr/testify/require"
) )
func init() { func TestMain(m *testing.M) {
os.Setenv("OLLAMA_DEBUG", "1") os.Setenv("OLLAMA_DEBUG", "1")
lifecycle.InitLogging() lifecycle.InitLogging()
os.Exit(m.Run())
} }
func TestInitScheduler(t *testing.T) { func TestInitScheduler(t *testing.T) {
@ -46,7 +48,7 @@ func TestLoad(t *testing.T) {
} }
// Fail to load model first // Fail to load model first
s.newServerFn = func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options, numParallel int) (llm.LlamaServer, error) { s.newServerFn = func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options, numParallel int) (llm.LlamaServer, error) {
return nil, fmt.Errorf("something failed to load model blah") return nil, errors.New("something failed to load model blah")
} }
gpus := gpu.GpuInfoList{} gpus := gpu.GpuInfoList{}
s.load(req, ggml, gpus, 0) s.load(req, ggml, gpus, 0)
@ -75,7 +77,7 @@ func TestLoad(t *testing.T) {
} }
req.model.ModelPath = "dummy_model_path" req.model.ModelPath = "dummy_model_path"
server.waitResp = fmt.Errorf("wait failure") server.waitResp = errors.New("wait failure")
s.load(req, ggml, gpus, 0) s.load(req, ggml, gpus, 0)
select { select {
case err := <-req.errCh: case err := <-req.errCh:
@ -600,7 +602,7 @@ func TestNeedsReload(t *testing.T) {
resp = runner.needsReload(ctx, req) resp = runner.needsReload(ctx, req)
require.True(t, resp) require.True(t, resp)
req.opts.NumBatch = runner.Options.NumBatch req.opts.NumBatch = runner.Options.NumBatch
llm.pingResp = fmt.Errorf("foo") llm.pingResp = errors.New("foo")
resp = runner.needsReload(ctx, req) resp = runner.needsReload(ctx, req)
require.True(t, resp) require.True(t, resp)
llm.pingResp = nil llm.pingResp = nil
@ -724,15 +726,19 @@ func (s *mockLlm) WaitUntilRunning(ctx context.Context) error { return s.waitRes
func (s *mockLlm) Completion(ctx context.Context, req llm.CompletionRequest, fn func(llm.CompletionResponse)) error { func (s *mockLlm) Completion(ctx context.Context, req llm.CompletionRequest, fn func(llm.CompletionResponse)) error {
return s.completionResp return s.completionResp
} }
func (s *mockLlm) Embed(ctx context.Context, input []string) (*llm.EmbedResponse, error) { func (s *mockLlm) Embed(ctx context.Context, input []string) (*llm.EmbedResponse, error) {
return s.embedResp, s.embedRespErr return s.embedResp, s.embedRespErr
} }
func (s *mockLlm) Tokenize(ctx context.Context, content string) ([]int, error) { func (s *mockLlm) Tokenize(ctx context.Context, content string) ([]int, error) {
return s.tokenizeResp, s.tokenizeRespErr return s.tokenizeResp, s.tokenizeRespErr
} }
func (s *mockLlm) Detokenize(ctx context.Context, tokens []int) (string, error) { func (s *mockLlm) Detokenize(ctx context.Context, tokens []int) (string, error) {
return s.detokenizeResp, s.detonekizeRespErr return s.detokenizeResp, s.detonekizeRespErr
} }
func (s *mockLlm) Close() error { func (s *mockLlm) Close() error {
s.closeCalled = true s.closeCalled = true
return s.closeResp return s.closeResp

View file

@ -12,13 +12,15 @@ import (
"net/http" "net/http"
"net/url" "net/url"
"os" "os"
"strconv"
"sync" "sync"
"sync/atomic" "sync/atomic"
"time" "time"
"golang.org/x/sync/errgroup"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
"github.com/ollama/ollama/format" "github.com/ollama/ollama/format"
"golang.org/x/sync/errgroup"
) )
var blobUploadManager sync.Map var blobUploadManager sync.Map
@ -212,7 +214,7 @@ func (b *blobUpload) Run(ctx context.Context, opts *registryOptions) {
func (b *blobUpload) uploadPart(ctx context.Context, method string, requestURL *url.URL, part *blobUploadPart, opts *registryOptions) error { func (b *blobUpload) uploadPart(ctx context.Context, method string, requestURL *url.URL, part *blobUploadPart, opts *registryOptions) error {
headers := make(http.Header) headers := make(http.Header)
headers.Set("Content-Type", "application/octet-stream") headers.Set("Content-Type", "application/octet-stream")
headers.Set("Content-Length", fmt.Sprintf("%d", part.Size)) headers.Set("Content-Length", strconv.FormatInt(part.Size, 10))
if method == http.MethodPatch { if method == http.MethodPatch {
headers.Set("X-Redirect-Uploads", "1") headers.Set("X-Redirect-Uploads", "1")

View file

@ -15,8 +15,9 @@ import (
"text/template/parse" "text/template/parse"
"github.com/agnivade/levenshtein" "github.com/agnivade/levenshtein"
"github.com/ollama/ollama/api"
"golang.org/x/exp/maps" "golang.org/x/exp/maps"
"github.com/ollama/ollama/api"
) )
//go:embed index.json //go:embed index.json

View file

@ -12,6 +12,7 @@ import (
"testing" "testing"
"github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
"github.com/ollama/ollama/llm" "github.com/ollama/ollama/llm"
) )

View file

@ -6,8 +6,10 @@ import (
"strings" "strings"
) )
const UnknownOllamaKeyErrMsg = "unknown ollama key" const (
const InvalidModelNameErrMsg = "invalid model name" UnknownOllamaKeyErrMsg = "unknown ollama key"
InvalidModelNameErrMsg = "invalid model name"
)
// TODO: This should have a structured response from the API // TODO: This should have a structured response from the API
type UnknownOllamaKey struct { type UnknownOllamaKey struct {

View file

@ -258,7 +258,7 @@ func (n Name) IsValid() bool {
// IsFullyQualified returns true if all parts of the name are present and // IsFullyQualified returns true if all parts of the name are present and
// valid without the digest. // valid without the digest.
func (n Name) IsFullyQualified() bool { func (n Name) IsFullyQualified() bool {
var parts = []string{ parts := []string{
n.Host, n.Host,
n.Namespace, n.Namespace,
n.Model, n.Model,