Mechanical switch from log to slog
A few obvious levels were adjusted, but generally everything mapped to "info" level.
This commit is contained in:
parent
df40b11d03
commit
fedd705aea
12 changed files with 90 additions and 81 deletions
|
@ -1,9 +1,5 @@
|
||||||
# Development
|
# Development
|
||||||
|
|
||||||
- Install cmake or (optionally, required tools for GPUs)
|
|
||||||
- run `go generate ./...`
|
|
||||||
- run `go build .`
|
|
||||||
|
|
||||||
Install required tools:
|
Install required tools:
|
||||||
|
|
||||||
- cmake version 3.24 or higher
|
- cmake version 3.24 or higher
|
||||||
|
@ -17,7 +13,11 @@ brew install go cmake gcc
|
||||||
Optionally enable debugging and more verbose logging:
|
Optionally enable debugging and more verbose logging:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# At build time
|
||||||
export CGO_CFLAGS="-g"
|
export CGO_CFLAGS="-g"
|
||||||
|
|
||||||
|
# At runtime
|
||||||
|
export OLLAMA_DEBUG=1
|
||||||
```
|
```
|
||||||
|
|
||||||
Get the required libraries and build the native LLM code:
|
Get the required libraries and build the native LLM code:
|
||||||
|
|
|
@ -1,21 +1,21 @@
|
||||||
package gpu
|
package gpu
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"log"
|
"log/slog"
|
||||||
|
|
||||||
"golang.org/x/sys/cpu"
|
"golang.org/x/sys/cpu"
|
||||||
)
|
)
|
||||||
|
|
||||||
func GetCPUVariant() string {
|
func GetCPUVariant() string {
|
||||||
if cpu.X86.HasAVX2 {
|
if cpu.X86.HasAVX2 {
|
||||||
log.Printf("CPU has AVX2")
|
slog.Info("CPU has AVX2")
|
||||||
return "avx2"
|
return "avx2"
|
||||||
}
|
}
|
||||||
if cpu.X86.HasAVX {
|
if cpu.X86.HasAVX {
|
||||||
log.Printf("CPU has AVX")
|
slog.Info("CPU has AVX")
|
||||||
return "avx"
|
return "avx"
|
||||||
}
|
}
|
||||||
log.Printf("CPU does not have vector extensions")
|
slog.Info("CPU does not have vector extensions")
|
||||||
// else LCD
|
// else LCD
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
31
gpu/gpu.go
31
gpu/gpu.go
|
@ -12,7 +12,7 @@ package gpu
|
||||||
import "C"
|
import "C"
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
@ -85,13 +85,13 @@ func initGPUHandles() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("Detecting GPU type")
|
slog.Info("Detecting GPU type")
|
||||||
gpuHandles = &handles{nil, nil}
|
gpuHandles = &handles{nil, nil}
|
||||||
cudaLibPaths := FindGPULibs(cudaMgmtName, cudaMgmtPatterns)
|
cudaLibPaths := FindGPULibs(cudaMgmtName, cudaMgmtPatterns)
|
||||||
if len(cudaLibPaths) > 0 {
|
if len(cudaLibPaths) > 0 {
|
||||||
cuda := LoadCUDAMgmt(cudaLibPaths)
|
cuda := LoadCUDAMgmt(cudaLibPaths)
|
||||||
if cuda != nil {
|
if cuda != nil {
|
||||||
log.Printf("Nvidia GPU detected")
|
slog.Info("Nvidia GPU detected")
|
||||||
gpuHandles.cuda = cuda
|
gpuHandles.cuda = cuda
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -101,7 +101,7 @@ func initGPUHandles() {
|
||||||
if len(rocmLibPaths) > 0 {
|
if len(rocmLibPaths) > 0 {
|
||||||
rocm := LoadROCMMgmt(rocmLibPaths)
|
rocm := LoadROCMMgmt(rocmLibPaths)
|
||||||
if rocm != nil {
|
if rocm != nil {
|
||||||
log.Printf("Radeon GPU detected")
|
slog.Info("Radeon GPU detected")
|
||||||
gpuHandles.rocm = rocm
|
gpuHandles.rocm = rocm
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -122,26 +122,26 @@ func GetGPUInfo() GpuInfo {
|
||||||
if gpuHandles.cuda != nil {
|
if gpuHandles.cuda != nil {
|
||||||
C.cuda_check_vram(*gpuHandles.cuda, &memInfo)
|
C.cuda_check_vram(*gpuHandles.cuda, &memInfo)
|
||||||
if memInfo.err != nil {
|
if memInfo.err != nil {
|
||||||
log.Printf("error looking up CUDA GPU memory: %s", C.GoString(memInfo.err))
|
slog.Info(fmt.Sprintf("error looking up CUDA GPU memory: %s", C.GoString(memInfo.err)))
|
||||||
C.free(unsafe.Pointer(memInfo.err))
|
C.free(unsafe.Pointer(memInfo.err))
|
||||||
} else {
|
} else {
|
||||||
// Verify minimum compute capability
|
// Verify minimum compute capability
|
||||||
var cc C.cuda_compute_capability_t
|
var cc C.cuda_compute_capability_t
|
||||||
C.cuda_compute_capability(*gpuHandles.cuda, &cc)
|
C.cuda_compute_capability(*gpuHandles.cuda, &cc)
|
||||||
if cc.err != nil {
|
if cc.err != nil {
|
||||||
log.Printf("error looking up CUDA GPU compute capability: %s", C.GoString(cc.err))
|
slog.Info(fmt.Sprintf("error looking up CUDA GPU compute capability: %s", C.GoString(cc.err)))
|
||||||
C.free(unsafe.Pointer(cc.err))
|
C.free(unsafe.Pointer(cc.err))
|
||||||
} else if cc.major >= CudaComputeMajorMin {
|
} else if cc.major >= CudaComputeMajorMin {
|
||||||
log.Printf("CUDA Compute Capability detected: %d.%d", cc.major, cc.minor)
|
slog.Info(fmt.Sprintf("CUDA Compute Capability detected: %d.%d", cc.major, cc.minor))
|
||||||
resp.Library = "cuda"
|
resp.Library = "cuda"
|
||||||
} else {
|
} else {
|
||||||
log.Printf("CUDA GPU is too old. Falling back to CPU mode. Compute Capability detected: %d.%d", cc.major, cc.minor)
|
slog.Info(fmt.Sprintf("CUDA GPU is too old. Falling back to CPU mode. Compute Capability detected: %d.%d", cc.major, cc.minor))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if gpuHandles.rocm != nil {
|
} else if gpuHandles.rocm != nil {
|
||||||
C.rocm_check_vram(*gpuHandles.rocm, &memInfo)
|
C.rocm_check_vram(*gpuHandles.rocm, &memInfo)
|
||||||
if memInfo.err != nil {
|
if memInfo.err != nil {
|
||||||
log.Printf("error looking up ROCm GPU memory: %s", C.GoString(memInfo.err))
|
slog.Info(fmt.Sprintf("error looking up ROCm GPU memory: %s", C.GoString(memInfo.err)))
|
||||||
C.free(unsafe.Pointer(memInfo.err))
|
C.free(unsafe.Pointer(memInfo.err))
|
||||||
} else {
|
} else {
|
||||||
resp.Library = "rocm"
|
resp.Library = "rocm"
|
||||||
|
@ -151,7 +151,7 @@ func GetGPUInfo() GpuInfo {
|
||||||
if version.status == 0 {
|
if version.status == 0 {
|
||||||
resp.Variant = "v" + verString
|
resp.Variant = "v" + verString
|
||||||
} else {
|
} else {
|
||||||
log.Printf("failed to look up ROCm version: %s", verString)
|
slog.Info(fmt.Sprintf("failed to look up ROCm version: %s", verString))
|
||||||
}
|
}
|
||||||
C.free(unsafe.Pointer(version.str))
|
C.free(unsafe.Pointer(version.str))
|
||||||
}
|
}
|
||||||
|
@ -162,7 +162,7 @@ func GetGPUInfo() GpuInfo {
|
||||||
resp.Variant = GetCPUVariant()
|
resp.Variant = GetCPUVariant()
|
||||||
}
|
}
|
||||||
if memInfo.err != nil {
|
if memInfo.err != nil {
|
||||||
log.Printf("error looking up CPU memory: %s", C.GoString(memInfo.err))
|
slog.Info(fmt.Sprintf("error looking up CPU memory: %s", C.GoString(memInfo.err)))
|
||||||
C.free(unsafe.Pointer(memInfo.err))
|
C.free(unsafe.Pointer(memInfo.err))
|
||||||
return resp
|
return resp
|
||||||
}
|
}
|
||||||
|
@ -205,7 +205,7 @@ func FindGPULibs(baseLibName string, patterns []string) []string {
|
||||||
// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
|
// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
|
||||||
var ldPaths []string
|
var ldPaths []string
|
||||||
gpuLibPaths := []string{}
|
gpuLibPaths := []string{}
|
||||||
log.Printf("Searching for GPU management library %s", baseLibName)
|
slog.Info(fmt.Sprintf("Searching for GPU management library %s", baseLibName))
|
||||||
|
|
||||||
switch runtime.GOOS {
|
switch runtime.GOOS {
|
||||||
case "windows":
|
case "windows":
|
||||||
|
@ -223,6 +223,7 @@ func FindGPULibs(baseLibName string, patterns []string) []string {
|
||||||
}
|
}
|
||||||
patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
|
patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
|
||||||
}
|
}
|
||||||
|
slog.Debug(fmt.Sprintf("gpu management search paths: %v", patterns))
|
||||||
for _, pattern := range patterns {
|
for _, pattern := range patterns {
|
||||||
// Ignore glob discovery errors
|
// Ignore glob discovery errors
|
||||||
matches, _ := filepath.Glob(pattern)
|
matches, _ := filepath.Glob(pattern)
|
||||||
|
@ -250,7 +251,7 @@ func FindGPULibs(baseLibName string, patterns []string) []string {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
log.Printf("Discovered GPU libraries: %v", gpuLibPaths)
|
slog.Info(fmt.Sprintf("Discovered GPU libraries: %v", gpuLibPaths))
|
||||||
return gpuLibPaths
|
return gpuLibPaths
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -261,7 +262,7 @@ func LoadCUDAMgmt(cudaLibPaths []string) *C.cuda_handle_t {
|
||||||
defer C.free(unsafe.Pointer(lib))
|
defer C.free(unsafe.Pointer(lib))
|
||||||
C.cuda_init(lib, &resp)
|
C.cuda_init(lib, &resp)
|
||||||
if resp.err != nil {
|
if resp.err != nil {
|
||||||
log.Printf("Unable to load CUDA management library %s: %s", libPath, C.GoString(resp.err))
|
slog.Info(fmt.Sprintf("Unable to load CUDA management library %s: %s", libPath, C.GoString(resp.err)))
|
||||||
C.free(unsafe.Pointer(resp.err))
|
C.free(unsafe.Pointer(resp.err))
|
||||||
} else {
|
} else {
|
||||||
return &resp.ch
|
return &resp.ch
|
||||||
|
@ -277,7 +278,7 @@ func LoadROCMMgmt(rocmLibPaths []string) *C.rocm_handle_t {
|
||||||
defer C.free(unsafe.Pointer(lib))
|
defer C.free(unsafe.Pointer(lib))
|
||||||
C.rocm_init(lib, &resp)
|
C.rocm_init(lib, &resp)
|
||||||
if resp.err != nil {
|
if resp.err != nil {
|
||||||
log.Printf("Unable to load ROCm management library %s: %s", libPath, C.GoString(resp.err))
|
slog.Info(fmt.Sprintf("Unable to load ROCm management library %s: %s", libPath, C.GoString(resp.err)))
|
||||||
C.free(unsafe.Pointer(resp.err))
|
C.free(unsafe.Pointer(resp.err))
|
||||||
} else {
|
} else {
|
||||||
return &resp.rh
|
return &resp.rh
|
||||||
|
|
|
@ -25,7 +25,7 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
@ -69,7 +69,7 @@ var llm *dynExtServer
|
||||||
|
|
||||||
func newDynExtServer(library, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
|
func newDynExtServer(library, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
|
||||||
if !mutex.TryLock() {
|
if !mutex.TryLock() {
|
||||||
log.Printf("concurrent llm servers not yet supported, waiting for prior server to complete")
|
slog.Info("concurrent llm servers not yet supported, waiting for prior server to complete")
|
||||||
mutex.Lock()
|
mutex.Lock()
|
||||||
}
|
}
|
||||||
updatePath(filepath.Dir(library))
|
updatePath(filepath.Dir(library))
|
||||||
|
@ -87,7 +87,7 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts
|
||||||
s: srv,
|
s: srv,
|
||||||
options: opts,
|
options: opts,
|
||||||
}
|
}
|
||||||
log.Printf("Loading Dynamic llm server: %s", library)
|
slog.Info(fmt.Sprintf("Loading Dynamic llm server: %s", library))
|
||||||
|
|
||||||
var sparams C.ext_server_params_t
|
var sparams C.ext_server_params_t
|
||||||
sparams.model = C.CString(model)
|
sparams.model = C.CString(model)
|
||||||
|
@ -136,7 +136,7 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts
|
||||||
|
|
||||||
sparams.n_threads = C.uint(opts.NumThread)
|
sparams.n_threads = C.uint(opts.NumThread)
|
||||||
|
|
||||||
log.Printf("Initializing llama server")
|
slog.Info("Initializing llama server")
|
||||||
initResp := newExtServerResp(128)
|
initResp := newExtServerResp(128)
|
||||||
defer freeExtServerResp(initResp)
|
defer freeExtServerResp(initResp)
|
||||||
C.dyn_llama_server_init(llm.s, &sparams, &initResp)
|
C.dyn_llama_server_init(llm.s, &sparams, &initResp)
|
||||||
|
@ -144,7 +144,7 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts
|
||||||
return nil, extServerResponseToErr(initResp)
|
return nil, extServerResponseToErr(initResp)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("Starting llama main loop")
|
slog.Info("Starting llama main loop")
|
||||||
C.dyn_llama_server_start(llm.s)
|
C.dyn_llama_server_start(llm.s)
|
||||||
return llm, nil
|
return llm, nil
|
||||||
}
|
}
|
||||||
|
@ -158,7 +158,7 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu
|
||||||
imageData = append(imageData, ImageData{Data: i, ID: cnt})
|
imageData = append(imageData, ImageData{Data: i, ID: cnt})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
log.Printf("loaded %d images", len(imageData))
|
slog.Info(fmt.Sprintf("loaded %d images", len(imageData)))
|
||||||
|
|
||||||
request := map[string]any{
|
request := map[string]any{
|
||||||
"prompt": predict.Prompt,
|
"prompt": predict.Prompt,
|
||||||
|
@ -370,7 +370,7 @@ func updatePath(dir string) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
newPath := strings.Join(append([]string{dir}, pathComponents...), ";")
|
newPath := strings.Join(append([]string{dir}, pathComponents...), ";")
|
||||||
log.Printf("Updating PATH to %s", newPath)
|
slog.Info(fmt.Sprintf("Updating PATH to %s", newPath))
|
||||||
os.Setenv("PATH", newPath)
|
os.Setenv("PATH", newPath)
|
||||||
}
|
}
|
||||||
// linux and darwin rely on rpath
|
// linux and darwin rely on rpath
|
||||||
|
|
16
llm/llm.go
16
llm/llm.go
|
@ -3,7 +3,7 @@ package llm
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts.NumCtx > int(ggml.NumCtx()) {
|
if opts.NumCtx > int(ggml.NumCtx()) {
|
||||||
log.Printf("WARNING: requested context length is greater than model's max context length (%d > %d), using %d instead", opts.NumCtx, ggml.NumCtx(), ggml.NumCtx())
|
slog.Warn(fmt.Sprintf("requested context length is greater than model's max context length (%d > %d), using %d instead", opts.NumCtx, ggml.NumCtx(), ggml.NumCtx()))
|
||||||
opts.NumCtx = int(ggml.NumCtx())
|
opts.NumCtx = int(ggml.NumCtx())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,7 +63,7 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
|
||||||
}
|
}
|
||||||
|
|
||||||
if size+kv+graph > vram {
|
if size+kv+graph > vram {
|
||||||
log.Println("not enough vram available, falling back to CPU only")
|
slog.Info("not enough vram available, falling back to CPU only")
|
||||||
info.Library = "cpu"
|
info.Library = "cpu"
|
||||||
info.Variant = gpu.GetCPUVariant()
|
info.Variant = gpu.GetCPUVariant()
|
||||||
opts.NumGPU = 0
|
opts.NumGPU = 0
|
||||||
|
@ -73,7 +73,7 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
|
||||||
opts.NumGPU = 1
|
opts.NumGPU = 1
|
||||||
default:
|
default:
|
||||||
if info.Library == "cpu" {
|
if info.Library == "cpu" {
|
||||||
log.Println("GPU not available, falling back to CPU")
|
slog.Info("GPU not available, falling back to CPU")
|
||||||
opts.NumGPU = 0
|
opts.NumGPU = 0
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
@ -107,7 +107,7 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
|
||||||
// 1 + 2 must fit on the main gpu
|
// 1 + 2 must fit on the main gpu
|
||||||
min := graph + kv*layers/maxlayers
|
min := graph + kv*layers/maxlayers
|
||||||
if layers <= 0 || min > avg {
|
if layers <= 0 || min > avg {
|
||||||
log.Printf("not enough vram available, falling back to CPU only")
|
slog.Info("not enough vram available, falling back to CPU only")
|
||||||
info.Library = "cpu"
|
info.Library = "cpu"
|
||||||
info.Variant = gpu.GetCPUVariant()
|
info.Variant = gpu.GetCPUVariant()
|
||||||
opts.NumGPU = 0
|
opts.NumGPU = 0
|
||||||
|
@ -135,9 +135,9 @@ func newLlmServer(gpuInfo gpu.GpuInfo, model string, adapters, projectors []stri
|
||||||
if demandLib != "" {
|
if demandLib != "" {
|
||||||
libPath := availableDynLibs[demandLib]
|
libPath := availableDynLibs[demandLib]
|
||||||
if libPath == "" {
|
if libPath == "" {
|
||||||
log.Printf("Invalid OLLAMA_LLM_LIBRARY %s - not found", demandLib)
|
slog.Info(fmt.Sprintf("Invalid OLLAMA_LLM_LIBRARY %s - not found", demandLib))
|
||||||
} else {
|
} else {
|
||||||
log.Printf("Loading OLLAMA_LLM_LIBRARY=%s", demandLib)
|
slog.Info(fmt.Sprintf("Loading OLLAMA_LLM_LIBRARY=%s", demandLib))
|
||||||
dynLibs = []string{libPath}
|
dynLibs = []string{libPath}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -148,7 +148,7 @@ func newLlmServer(gpuInfo gpu.GpuInfo, model string, adapters, projectors []stri
|
||||||
if err == nil {
|
if err == nil {
|
||||||
return srv, nil
|
return srv, nil
|
||||||
}
|
}
|
||||||
log.Printf("Failed to load dynamic library %s %s", dynLib, err)
|
slog.Warn(fmt.Sprintf("Failed to load dynamic library %s %s", dynLib, err))
|
||||||
err2 = err
|
err2 = err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"io/fs"
|
"io/fs"
|
||||||
"log"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
@ -103,13 +103,13 @@ func rocmDynLibPresent() bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
func nativeInit(workdir string) error {
|
func nativeInit(workdir string) error {
|
||||||
log.Printf("Extracting dynamic libraries...")
|
slog.Info("Extracting dynamic libraries...")
|
||||||
if runtime.GOOS == "darwin" {
|
if runtime.GOOS == "darwin" {
|
||||||
err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal")
|
err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if err == payloadMissing {
|
if err == payloadMissing {
|
||||||
// TODO perhaps consider this a hard failure on arm macs?
|
// TODO perhaps consider this a hard failure on arm macs?
|
||||||
log.Printf("ggml-meta.metal payload missing")
|
slog.Info("ggml-meta.metal payload missing")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
|
@ -120,7 +120,7 @@ func nativeInit(workdir string) error {
|
||||||
libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/*/lib/*")
|
libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/*/lib/*")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if err == payloadMissing {
|
if err == payloadMissing {
|
||||||
log.Printf("%s", payloadMissing)
|
slog.Info(fmt.Sprintf("%s", payloadMissing))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
|
@ -142,8 +142,8 @@ func nativeInit(workdir string) error {
|
||||||
variants[i] = variant
|
variants[i] = variant
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
log.Printf("Dynamic LLM libraries %v", variants)
|
slog.Info(fmt.Sprintf("Dynamic LLM libraries %v", variants))
|
||||||
log.Printf("Override detection logic by setting OLLAMA_LLM_LIBRARY")
|
slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -163,7 +163,7 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
|
||||||
for _, file := range files {
|
for _, file := range files {
|
||||||
pathComps := strings.Split(file, "/")
|
pathComps := strings.Split(file, "/")
|
||||||
if len(pathComps) != pathComponentCount {
|
if len(pathComps) != pathComponentCount {
|
||||||
log.Printf("unexpected payload components: %v", pathComps)
|
slog.Error(fmt.Sprintf("unexpected payload components: %v", pathComps))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log"
|
"log/slog"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Command struct {
|
type Command struct {
|
||||||
|
@ -59,7 +59,7 @@ func Parse(reader io.Reader) ([]Command, error) {
|
||||||
default:
|
default:
|
||||||
if !bytes.HasPrefix(fields[0], []byte("#")) {
|
if !bytes.HasPrefix(fields[0], []byte("#")) {
|
||||||
// log a warning for unknown commands
|
// log a warning for unknown commands
|
||||||
log.Printf("WARNING: Unknown command: %s", fields[0])
|
slog.Warn(fmt.Sprintf("Unknown command: %s", fields[0]))
|
||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,7 +10,7 @@ import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log"
|
"log/slog"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
|
@ -86,7 +86,7 @@ func getAuthToken(ctx context.Context, redirData AuthRedirect) (string, error) {
|
||||||
|
|
||||||
rawKey, err := os.ReadFile(keyPath)
|
rawKey, err := os.ReadFile(keyPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("Failed to load private key: %v", err)
|
slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -105,7 +105,7 @@ func getAuthToken(ctx context.Context, redirData AuthRedirect) (string, error) {
|
||||||
headers.Set("Authorization", sig)
|
headers.Set("Authorization", sig)
|
||||||
resp, err := makeRequest(ctx, http.MethodGet, redirectURL, headers, nil, nil)
|
resp, err := makeRequest(ctx, http.MethodGet, redirectURL, headers, nil, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("couldn't get token: %q", err)
|
slog.Info(fmt.Sprintf("couldn't get token: %q", err))
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
|
@ -6,7 +6,7 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log"
|
"log/slog"
|
||||||
"math"
|
"math"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
@ -120,7 +120,7 @@ func (b *blobDownload) Prepare(ctx context.Context, requestURL *url.URL, opts *R
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("downloading %s in %d %s part(s)", b.Digest[7:19], len(b.Parts), format.HumanBytes(b.Parts[0].Size))
|
slog.Info(fmt.Sprintf("downloading %s in %d %s part(s)", b.Digest[7:19], len(b.Parts), format.HumanBytes(b.Parts[0].Size)))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -159,7 +159,7 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *Regis
|
||||||
return err
|
return err
|
||||||
case err != nil:
|
case err != nil:
|
||||||
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
|
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
|
||||||
log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
|
slog.Info(fmt.Sprintf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep))
|
||||||
time.Sleep(sleep)
|
time.Sleep(sleep)
|
||||||
continue
|
continue
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -10,6 +10,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log"
|
"log"
|
||||||
|
"log/slog"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
|
@ -336,7 +337,7 @@ func GetModel(name string) (*Model, error) {
|
||||||
case "application/vnd.ollama.image.embed":
|
case "application/vnd.ollama.image.embed":
|
||||||
// Deprecated in versions > 0.1.2
|
// Deprecated in versions > 0.1.2
|
||||||
// TODO: remove this warning in a future version
|
// TODO: remove this warning in a future version
|
||||||
log.Print("WARNING: model contains embeddings, but embeddings in modelfiles have been deprecated and will be ignored.")
|
slog.Info("WARNING: model contains embeddings, but embeddings in modelfiles have been deprecated and will be ignored.")
|
||||||
case "application/vnd.ollama.image.adapter":
|
case "application/vnd.ollama.image.adapter":
|
||||||
model.AdapterPaths = append(model.AdapterPaths, filename)
|
model.AdapterPaths = append(model.AdapterPaths, filename)
|
||||||
case "application/vnd.ollama.image.projector":
|
case "application/vnd.ollama.image.projector":
|
||||||
|
@ -427,7 +428,7 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
|
||||||
fromParams := make(map[string]any)
|
fromParams := make(map[string]any)
|
||||||
|
|
||||||
for _, c := range commands {
|
for _, c := range commands {
|
||||||
log.Printf("[%s] - %s", c.Name, c.Args)
|
slog.Info(fmt.Sprintf("[%s] - %s", c.Name, c.Args))
|
||||||
mediatype := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name)
|
mediatype := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name)
|
||||||
|
|
||||||
switch c.Name {
|
switch c.Name {
|
||||||
|
@ -767,16 +768,16 @@ func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]struct{},
|
||||||
for k := range deleteMap {
|
for k := range deleteMap {
|
||||||
fp, err := GetBlobsPath(k)
|
fp, err := GetBlobsPath(k)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("couldn't get file path for '%s': %v", k, err)
|
slog.Info(fmt.Sprintf("couldn't get file path for '%s': %v", k, err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if !dryRun {
|
if !dryRun {
|
||||||
if err := os.Remove(fp); err != nil {
|
if err := os.Remove(fp); err != nil {
|
||||||
log.Printf("couldn't remove file '%s': %v", fp, err)
|
slog.Info(fmt.Sprintf("couldn't remove file '%s': %v", fp, err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
log.Printf("wanted to remove: %s", fp)
|
slog.Info(fmt.Sprintf("wanted to remove: %s", fp))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -792,7 +793,7 @@ func PruneLayers() error {
|
||||||
|
|
||||||
blobs, err := os.ReadDir(p)
|
blobs, err := os.ReadDir(p)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("couldn't read dir '%s': %v", p, err)
|
slog.Info(fmt.Sprintf("couldn't read dir '%s': %v", p, err))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -806,14 +807,14 @@ func PruneLayers() error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("total blobs: %d", len(deleteMap))
|
slog.Info(fmt.Sprintf("total blobs: %d", len(deleteMap)))
|
||||||
|
|
||||||
err = deleteUnusedLayers(nil, deleteMap, false)
|
err = deleteUnusedLayers(nil, deleteMap, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("total unused blobs removed: %d", len(deleteMap))
|
slog.Info(fmt.Sprintf("total unused blobs removed: %d", len(deleteMap)))
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -875,7 +876,7 @@ func DeleteModel(name string) error {
|
||||||
}
|
}
|
||||||
err = os.Remove(fp)
|
err = os.Remove(fp)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("couldn't remove manifest file '%s': %v", fp, err)
|
slog.Info(fmt.Sprintf("couldn't remove manifest file '%s': %v", fp, err))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -929,14 +930,14 @@ PARAMETER {{ $k }} {{ printf "%#v" $parameter }}
|
||||||
|
|
||||||
tmpl, err := template.New("").Parse(modelFile)
|
tmpl, err := template.New("").Parse(modelFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("error parsing template: %q", err)
|
slog.Info(fmt.Sprintf("error parsing template: %q", err))
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
var buf bytes.Buffer
|
var buf bytes.Buffer
|
||||||
|
|
||||||
if err = tmpl.Execute(&buf, mt); err != nil {
|
if err = tmpl.Execute(&buf, mt); err != nil {
|
||||||
log.Printf("error executing template: %q", err)
|
slog.Info(fmt.Sprintf("error executing template: %q", err))
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -963,7 +964,7 @@ func PushModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu
|
||||||
|
|
||||||
for _, layer := range layers {
|
for _, layer := range layers {
|
||||||
if err := uploadBlob(ctx, mp, layer, regOpts, fn); err != nil {
|
if err := uploadBlob(ctx, mp, layer, regOpts, fn); err != nil {
|
||||||
log.Printf("error uploading blob: %v", err)
|
slog.Info(fmt.Sprintf("error uploading blob: %v", err))
|
||||||
if errors.Is(err, errUnauthorized) {
|
if errors.Is(err, errUnauthorized) {
|
||||||
return fmt.Errorf("unable to push %s, make sure this namespace exists and you are authorized to push to it", ParseModelPath(name).GetNamespaceRepository())
|
return fmt.Errorf("unable to push %s, make sure this namespace exists and you are authorized to push to it", ParseModelPath(name).GetNamespaceRepository())
|
||||||
}
|
}
|
||||||
|
@ -1058,7 +1059,7 @@ func PullModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu
|
||||||
}
|
}
|
||||||
if err := os.Remove(fp); err != nil {
|
if err := os.Remove(fp); err != nil {
|
||||||
// log this, but return the original error
|
// log this, but return the original error
|
||||||
log.Printf("couldn't remove file with digest mismatch '%s': %v", fp, err)
|
slog.Info(fmt.Sprintf("couldn't remove file with digest mismatch '%s': %v", fp, err))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
|
@ -1082,7 +1083,7 @@ func PullModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu
|
||||||
|
|
||||||
err = os.WriteFile(fp, manifestJSON, 0o644)
|
err = os.WriteFile(fp, manifestJSON, 0o644)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("couldn't write to %s", fp)
|
slog.Info(fmt.Sprintf("couldn't write to %s", fp))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1136,7 +1137,7 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
|
||||||
resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
|
resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if !errors.Is(err, context.Canceled) {
|
if !errors.Is(err, context.Canceled) {
|
||||||
log.Printf("request failed: %v", err)
|
slog.Info(fmt.Sprintf("request failed: %v", err))
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil, err
|
return nil, err
|
||||||
|
|
|
@ -7,7 +7,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"io/fs"
|
"io/fs"
|
||||||
"log"
|
"log/slog"
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
|
@ -73,7 +73,7 @@ func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.D
|
||||||
|
|
||||||
if needLoad {
|
if needLoad {
|
||||||
if loaded.runner != nil {
|
if loaded.runner != nil {
|
||||||
log.Println("changing loaded model")
|
slog.Info("changing loaded model")
|
||||||
loaded.runner.Close()
|
loaded.runner.Close()
|
||||||
loaded.runner = nil
|
loaded.runner = nil
|
||||||
loaded.Model = nil
|
loaded.Model = nil
|
||||||
|
@ -391,7 +391,7 @@ func EmbeddingHandler(c *gin.Context) {
|
||||||
|
|
||||||
embedding, err := loaded.runner.Embedding(c.Request.Context(), req.Prompt)
|
embedding, err := loaded.runner.Embedding(c.Request.Context(), req.Prompt)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("embedding generation failed: %v", err)
|
slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
|
||||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
|
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -732,7 +732,7 @@ func ListModelsHandler(c *gin.Context) {
|
||||||
|
|
||||||
resp, err := modelResponse(canonicalModelPath)
|
resp, err := modelResponse(canonicalModelPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("skipping file: %s", canonicalModelPath)
|
slog.Info(fmt.Sprintf("skipping file: %s", canonicalModelPath))
|
||||||
// nolint: nilerr
|
// nolint: nilerr
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -892,6 +892,13 @@ func (s *Server) GenerateRoutes() http.Handler {
|
||||||
}
|
}
|
||||||
|
|
||||||
func Serve(ln net.Listener) error {
|
func Serve(ln net.Listener) error {
|
||||||
|
if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
|
||||||
|
var programLevel = new(slog.LevelVar)
|
||||||
|
h := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: programLevel, AddSource: true})
|
||||||
|
slog.SetDefault(slog.New(h))
|
||||||
|
programLevel.Set(slog.LevelDebug)
|
||||||
|
slog.Debug("Debug logging enabled")
|
||||||
|
}
|
||||||
if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
|
if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
|
||||||
// clean up unused layers and manifests
|
// clean up unused layers and manifests
|
||||||
if err := PruneLayers(); err != nil {
|
if err := PruneLayers(); err != nil {
|
||||||
|
@ -914,7 +921,7 @@ func Serve(ln net.Listener) error {
|
||||||
}
|
}
|
||||||
r := s.GenerateRoutes()
|
r := s.GenerateRoutes()
|
||||||
|
|
||||||
log.Printf("Listening on %s (version %s)", ln.Addr(), version.Version)
|
slog.Info(fmt.Sprintf("Listening on %s (version %s)", ln.Addr(), version.Version))
|
||||||
srvr := &http.Server{
|
srvr := &http.Server{
|
||||||
Handler: r,
|
Handler: r,
|
||||||
}
|
}
|
||||||
|
@ -937,7 +944,7 @@ func Serve(ln net.Listener) error {
|
||||||
if runtime.GOOS == "linux" { // TODO - windows too
|
if runtime.GOOS == "linux" { // TODO - windows too
|
||||||
// check compatibility to log warnings
|
// check compatibility to log warnings
|
||||||
if _, err := gpu.CheckVRAM(); err != nil {
|
if _, err := gpu.CheckVRAM(); err != nil {
|
||||||
log.Print(err.Error())
|
slog.Info(err.Error())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -979,14 +986,14 @@ func streamResponse(c *gin.Context, ch chan any) {
|
||||||
|
|
||||||
bts, err := json.Marshal(val)
|
bts, err := json.Marshal(val)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("streamResponse: json.Marshal failed with %s", err)
|
slog.Info(fmt.Sprintf("streamResponse: json.Marshal failed with %s", err))
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delineate chunks with new-line delimiter
|
// Delineate chunks with new-line delimiter
|
||||||
bts = append(bts, '\n')
|
bts = append(bts, '\n')
|
||||||
if _, err := w.Write(bts); err != nil {
|
if _, err := w.Write(bts); err != nil {
|
||||||
log.Printf("streamResponse: w.Write failed with %s", err)
|
slog.Info(fmt.Sprintf("streamResponse: w.Write failed with %s", err))
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"hash"
|
"hash"
|
||||||
"io"
|
"io"
|
||||||
"log"
|
"log/slog"
|
||||||
"math"
|
"math"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
@ -107,7 +107,7 @@ func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *Reg
|
||||||
offset += size
|
offset += size
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("uploading %s in %d %s part(s)", b.Digest[7:19], len(b.Parts), format.HumanBytes(b.Parts[0].Size))
|
slog.Info(fmt.Sprintf("uploading %s in %d %s part(s)", b.Digest[7:19], len(b.Parts), format.HumanBytes(b.Parts[0].Size)))
|
||||||
|
|
||||||
requestURL, err = url.Parse(location)
|
requestURL, err = url.Parse(location)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -156,7 +156,7 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
|
||||||
return err
|
return err
|
||||||
case err != nil:
|
case err != nil:
|
||||||
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
|
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
|
||||||
log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
|
slog.Info(fmt.Sprintf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep))
|
||||||
time.Sleep(sleep)
|
time.Sleep(sleep)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -200,7 +200,7 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
|
||||||
break
|
break
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
|
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
|
||||||
log.Printf("%s complete upload attempt %d failed: %v, retrying in %s", b.Digest[7:19], try, err, sleep)
|
slog.Info(fmt.Sprintf("%s complete upload attempt %d failed: %v, retrying in %s", b.Digest[7:19], try, err, sleep))
|
||||||
time.Sleep(sleep)
|
time.Sleep(sleep)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -265,7 +265,7 @@ func (b *blobUpload) uploadPart(ctx context.Context, method string, requestURL *
|
||||||
return err
|
return err
|
||||||
case err != nil:
|
case err != nil:
|
||||||
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
|
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
|
||||||
log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
|
slog.Info(fmt.Sprintf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep))
|
||||||
time.Sleep(sleep)
|
time.Sleep(sleep)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue