ollama/envconfig/config.go

329 lines
10 KiB
Go
Raw Normal View History

package envconfig
import (
"fmt"
"log/slog"
"math"
2024-05-22 04:53:44 +00:00
"net"
2024-07-03 23:44:57 +00:00
"net/url"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"time"
)
2024-07-03 23:44:57 +00:00
// Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable.
// Default is scheme "http" and host "127.0.0.1:11434"
func Host() *url.URL {
defaultPort := "11434"
2024-07-08 17:34:12 +00:00
s := strings.TrimSpace(Var("OLLAMA_HOST"))
2024-07-03 23:44:57 +00:00
scheme, hostport, ok := strings.Cut(s, "://")
switch {
case !ok:
scheme, hostport = "http", s
case scheme == "http":
defaultPort = "80"
case scheme == "https":
defaultPort = "443"
}
2024-08-23 20:16:30 +00:00
hostport, path, _ := strings.Cut(hostport, "/")
2024-07-03 23:44:57 +00:00
host, port, err := net.SplitHostPort(hostport)
if err != nil {
host, port = "127.0.0.1", defaultPort
if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
host = ip.String()
} else if hostport != "" {
host = hostport
}
}
if n, err := strconv.ParseInt(port, 10, 32); err != nil || n > 65535 || n < 0 {
2024-07-08 17:34:12 +00:00
slog.Warn("invalid port, using default", "port", port, "default", defaultPort)
2024-08-23 20:16:30 +00:00
port = defaultPort
2024-07-03 23:44:57 +00:00
}
return &url.URL{
Scheme: scheme,
Host: net.JoinHostPort(host, port),
2024-08-23 20:16:30 +00:00
Path: path,
2024-07-03 23:44:57 +00:00
}
}
2024-07-04 00:02:07 +00:00
// Origins returns a list of allowed origins. Origins can be configured via the OLLAMA_ORIGINS environment variable.
func Origins() (origins []string) {
2024-07-08 17:34:12 +00:00
if s := Var("OLLAMA_ORIGINS"); s != "" {
2024-07-04 00:02:07 +00:00
origins = strings.Split(s, ",")
}
for _, origin := range []string{"localhost", "127.0.0.1", "0.0.0.0"} {
origins = append(origins,
fmt.Sprintf("http://%s", origin),
fmt.Sprintf("https://%s", origin),
fmt.Sprintf("http://%s", net.JoinHostPort(origin, "*")),
fmt.Sprintf("https://%s", net.JoinHostPort(origin, "*")),
)
}
origins = append(origins,
"app://*",
"file://*",
"tauri://*",
)
return origins
}
2024-07-04 00:07:42 +00:00
// Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable.
// Default is $HOME/.ollama/models
func Models() string {
2024-07-08 17:34:12 +00:00
if s := Var("OLLAMA_MODELS"); s != "" {
2024-07-04 00:07:42 +00:00
return s
}
home, err := os.UserHomeDir()
if err != nil {
panic(err)
}
return filepath.Join(home, ".ollama", "models")
}
2024-07-04 01:39:35 +00:00
// KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
// Negative values are treated as infinite. Zero is treated as no keep alive.
// Default is 5 minutes.
func KeepAlive() (keepAlive time.Duration) {
keepAlive = 5 * time.Minute
2024-07-08 17:34:12 +00:00
if s := Var("OLLAMA_KEEP_ALIVE"); s != "" {
2024-07-04 01:39:35 +00:00
if d, err := time.ParseDuration(s); err == nil {
keepAlive = d
} else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
keepAlive = time.Duration(n) * time.Second
}
}
if keepAlive < 0 {
return time.Duration(math.MaxInt64)
}
return keepAlive
}
// LoadTimeout returns the duration for stall detection during model loads. LoadTimeout can be configured via the OLLAMA_LOAD_TIMEOUT environment variable.
// Zero or Negative values are treated as infinite.
// Default is 5 minutes.
func LoadTimeout() (loadTimeout time.Duration) {
loadTimeout = 5 * time.Minute
if s := Var("OLLAMA_LOAD_TIMEOUT"); s != "" {
if d, err := time.ParseDuration(s); err == nil {
loadTimeout = d
} else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
loadTimeout = time.Duration(n) * time.Second
}
}
if loadTimeout <= 0 {
return time.Duration(math.MaxInt64)
}
return loadTimeout
}
2024-07-04 00:22:13 +00:00
func Bool(k string) func() bool {
return func() bool {
2024-07-08 17:34:12 +00:00
if s := Var(k); s != "" {
2024-07-04 00:22:13 +00:00
b, err := strconv.ParseBool(s)
if err != nil {
return true
}
return b
}
return false
}
}
var (
// Debug enabled additional debug information.
Debug = Bool("OLLAMA_DEBUG")
// FlashAttention enables the experimental flash attention feature.
FlashAttention = Bool("OLLAMA_FLASH_ATTENTION")
// NoHistory disables readline history.
NoHistory = Bool("OLLAMA_NOHISTORY")
// NoPrune disables pruning of model blobs on startup.
NoPrune = Bool("OLLAMA_NOPRUNE")
// SchedSpread allows scheduling models across all GPUs.
SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
// IntelGPU enables experimental Intel GPU detection.
IntelGPU = Bool("OLLAMA_INTEL_GPU")
)
2024-07-04 02:30:19 +00:00
func String(s string) func() string {
return func() string {
2024-07-08 17:34:12 +00:00
return Var(s)
2024-07-04 02:30:19 +00:00
}
}
var (
LLMLibrary = String("OLLAMA_LLM_LIBRARY")
TmpDir = String("OLLAMA_TMPDIR")
CudaVisibleDevices = String("CUDA_VISIBLE_DEVICES")
HipVisibleDevices = String("HIP_VISIBLE_DEVICES")
RocrVisibleDevices = String("ROCR_VISIBLE_DEVICES")
GpuDeviceOrdinal = String("GPU_DEVICE_ORDINAL")
HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
)
func RunnersDir() (p string) {
2024-07-08 17:34:12 +00:00
if p := Var("OLLAMA_RUNNERS_DIR"); p != "" {
2024-07-04 02:30:19 +00:00
return p
}
if runtime.GOOS != "windows" {
return
}
defer func() {
if p == "" {
2024-08-14 23:32:57 +00:00
slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama/runners'")
2024-07-04 02:30:19 +00:00
}
}()
// On Windows we do not carry the payloads inside the main executable
exe, err := os.Executable()
if err != nil {
return
}
cwd, err := os.Getwd()
if err != nil {
return
}
var paths []string
for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), LibRelativeToExe()), cwd} {
2024-07-04 02:30:19 +00:00
paths = append(paths,
root,
filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH),
filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH),
2024-07-04 02:30:19 +00:00
)
}
// Try a few variations to improve developer experience when building from source in the local tree
for _, path := range paths {
2024-08-14 23:32:57 +00:00
candidate := filepath.Join(path, "lib", "ollama", "runners")
2024-07-04 02:30:19 +00:00
if _, err := os.Stat(candidate); err == nil {
p = candidate
break
}
}
return p
}
2024-07-08 17:34:12 +00:00
func Uint(key string, defaultValue uint) func() uint {
return func() uint {
if s := Var(key); s != "" {
if n, err := strconv.ParseUint(s, 10, 64); err != nil {
slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
} else {
return uint(n)
2024-07-04 02:41:17 +00:00
}
}
2024-07-08 17:34:12 +00:00
return defaultValue
2024-07-04 02:41:17 +00:00
}
}
var (
2024-07-08 17:34:12 +00:00
// NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
NumParallel = Uint("OLLAMA_NUM_PARALLEL", 0)
// MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0)
// MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512)
// MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable.
MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0)
)
func Uint64(key string, defaultValue uint64) func() uint64 {
return func() uint64 {
if s := Var(key); s != "" {
if n, err := strconv.ParseUint(s, 10, 64); err != nil {
slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
} else {
return n
}
}
return defaultValue
}
}
// Set aside VRAM per GPU
var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)
type EnvVar struct {
Name string
Value any
Description string
}
func AsMap() map[string]EnvVar {
ret := map[string]EnvVar{
2024-07-03 23:00:54 +00:00
"OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
2024-07-04 00:22:13 +00:00
"OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"},
"OLLAMA_GPU_OVERHEAD": {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"},
2024-07-03 23:44:57 +00:00
"OLLAMA_HOST": {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
2024-07-04 01:39:35 +00:00
"OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
2024-07-04 02:30:19 +00:00
"OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
"OLLAMA_LOAD_TIMEOUT": {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"},
2024-07-04 02:41:17 +00:00
"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
"OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
2024-07-04 00:07:42 +00:00
"OLLAMA_MODELS": {"OLLAMA_MODELS", Models(), "The path to the models directory"},
2024-07-04 00:22:13 +00:00
"OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
"OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
2024-07-04 02:41:17 +00:00
"OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
2024-07-04 00:02:07 +00:00
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
2024-07-04 02:30:19 +00:00
"OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir(), "Location for runners"},
2024-07-04 00:22:13 +00:00
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
2024-07-04 02:30:19 +00:00
"OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"},
}
if runtime.GOOS != "darwin" {
2024-07-04 02:30:19 +00:00
ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"}
ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible"}
ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible"}
ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible"}
ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
2024-07-04 00:22:13 +00:00
ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"}
}
return ret
}
func Values() map[string]string {
vals := make(map[string]string)
for k, v := range AsMap() {
vals[k] = fmt.Sprintf("%v", v.Value)
}
return vals
}
2024-07-08 17:34:12 +00:00
// Var returns an environment variable stripped of leading and trailing quotes or spaces
func Var(key string) string {
return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'")
}
// On windows, we keep the binary at the top directory, but
// other platforms use a "bin" directory, so this returns ".."
func LibRelativeToExe() string {
if runtime.GOOS == "windows" {
return "."
}
return ".."
}