package envconfig import ( "fmt" "log/slog" "math" "net" "net/url" "os" "path/filepath" "runtime" "strconv" "strings" "time" ) // Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable. // Default is scheme "http" and host "127.0.0.1:11434" func Host() *url.URL { defaultPort := "11434" s := strings.TrimSpace(Var("OLLAMA_HOST")) scheme, hostport, ok := strings.Cut(s, "://") switch { case !ok: scheme, hostport = "http", s case scheme == "http": defaultPort = "80" case scheme == "https": defaultPort = "443" } hostport, path, _ := strings.Cut(hostport, "/") host, port, err := net.SplitHostPort(hostport) if err != nil { host, port = "127.0.0.1", defaultPort if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil { host = ip.String() } else if hostport != "" { host = hostport } } if n, err := strconv.ParseInt(port, 10, 32); err != nil || n > 65535 || n < 0 { slog.Warn("invalid port, using default", "port", port, "default", defaultPort) port = defaultPort } return &url.URL{ Scheme: scheme, Host: net.JoinHostPort(host, port), Path: path, } } // Origins returns a list of allowed origins. Origins can be configured via the OLLAMA_ORIGINS environment variable. func Origins() (origins []string) { if s := Var("OLLAMA_ORIGINS"); s != "" { origins = strings.Split(s, ",") } for _, origin := range []string{"localhost", "127.0.0.1", "0.0.0.0"} { origins = append(origins, fmt.Sprintf("http://%s", origin), fmt.Sprintf("https://%s", origin), fmt.Sprintf("http://%s", net.JoinHostPort(origin, "*")), fmt.Sprintf("https://%s", net.JoinHostPort(origin, "*")), ) } origins = append(origins, "app://*", "file://*", "tauri://*", ) return origins } // Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable. // Default is $HOME/.ollama/models func Models() string { if s := Var("OLLAMA_MODELS"); s != "" { return s } home, err := os.UserHomeDir() if err != nil { panic(err) } return filepath.Join(home, ".ollama", "models") } // KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable. // Negative values are treated as infinite. Zero is treated as no keep alive. // Default is 5 minutes. func KeepAlive() (keepAlive time.Duration) { keepAlive = 5 * time.Minute if s := Var("OLLAMA_KEEP_ALIVE"); s != "" { if d, err := time.ParseDuration(s); err == nil { keepAlive = d } else if n, err := strconv.ParseInt(s, 10, 64); err == nil { keepAlive = time.Duration(n) * time.Second } } if keepAlive < 0 { return time.Duration(math.MaxInt64) } return keepAlive } // LoadTimeout returns the duration for stall detection during model loads. LoadTimeout can be configured via the OLLAMA_LOAD_TIMEOUT environment variable. // Zero or Negative values are treated as infinite. // Default is 5 minutes. func LoadTimeout() (loadTimeout time.Duration) { loadTimeout = 5 * time.Minute if s := Var("OLLAMA_LOAD_TIMEOUT"); s != "" { if d, err := time.ParseDuration(s); err == nil { loadTimeout = d } else if n, err := strconv.ParseInt(s, 10, 64); err == nil { loadTimeout = time.Duration(n) * time.Second } } if loadTimeout <= 0 { return time.Duration(math.MaxInt64) } return loadTimeout } func Bool(k string) func() bool { return func() bool { if s := Var(k); s != "" { b, err := strconv.ParseBool(s) if err != nil { return true } return b } return false } } var ( // Debug enabled additional debug information. Debug = Bool("OLLAMA_DEBUG") // FlashAttention enables the experimental flash attention feature. FlashAttention = Bool("OLLAMA_FLASH_ATTENTION") // NoHistory disables readline history. NoHistory = Bool("OLLAMA_NOHISTORY") // NoPrune disables pruning of model blobs on startup. NoPrune = Bool("OLLAMA_NOPRUNE") // SchedSpread allows scheduling models across all GPUs. SchedSpread = Bool("OLLAMA_SCHED_SPREAD") // IntelGPU enables experimental Intel GPU detection. IntelGPU = Bool("OLLAMA_INTEL_GPU") ) func String(s string) func() string { return func() string { return Var(s) } } var ( LLMLibrary = String("OLLAMA_LLM_LIBRARY") TmpDir = String("OLLAMA_TMPDIR") CudaVisibleDevices = String("CUDA_VISIBLE_DEVICES") HipVisibleDevices = String("HIP_VISIBLE_DEVICES") RocrVisibleDevices = String("ROCR_VISIBLE_DEVICES") GpuDeviceOrdinal = String("GPU_DEVICE_ORDINAL") HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION") ) func RunnersDir() (p string) { if p := Var("OLLAMA_RUNNERS_DIR"); p != "" { return p } if runtime.GOOS != "windows" { return } defer func() { if p == "" { slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama/runners'") } }() // On Windows we do not carry the payloads inside the main executable exe, err := os.Executable() if err != nil { return } cwd, err := os.Getwd() if err != nil { return } var paths []string for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), LibRelativeToExe()), cwd} { paths = append(paths, root, filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH), filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH), ) } // Try a few variations to improve developer experience when building from source in the local tree for _, path := range paths { candidate := filepath.Join(path, "lib", "ollama", "runners") if _, err := os.Stat(candidate); err == nil { p = candidate break } } return p } func Uint(key string, defaultValue uint) func() uint { return func() uint { if s := Var(key); s != "" { if n, err := strconv.ParseUint(s, 10, 64); err != nil { slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue) } else { return uint(n) } } return defaultValue } } var ( // NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable. NumParallel = Uint("OLLAMA_NUM_PARALLEL", 0) // MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable. MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0) // MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable. MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512) // MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable. MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0) ) func Uint64(key string, defaultValue uint64) func() uint64 { return func() uint64 { if s := Var(key); s != "" { if n, err := strconv.ParseUint(s, 10, 64); err != nil { slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue) } else { return n } } return defaultValue } } // Set aside VRAM per GPU var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0) type EnvVar struct { Name string Value any Description string } func AsMap() map[string]EnvVar { ret := map[string]EnvVar{ "OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"}, "OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"}, "OLLAMA_GPU_OVERHEAD": {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"}, "OLLAMA_HOST": {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"}, "OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"}, "OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"}, "OLLAMA_LOAD_TIMEOUT": {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"}, "OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"}, "OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"}, "OLLAMA_MODELS": {"OLLAMA_MODELS", Models(), "The path to the models directory"}, "OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"}, "OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"}, "OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"}, "OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"}, "OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir(), "Location for runners"}, "OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"}, "OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"}, } if runtime.GOOS != "darwin" { ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"} ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible"} ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible"} ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible"} ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"} ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"} } return ret } func Values() map[string]string { vals := make(map[string]string) for k, v := range AsMap() { vals[k] = fmt.Sprintf("%v", v.Value) } return vals } // Var returns an environment variable stripped of leading and trailing quotes or spaces func Var(key string) string { return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'") } // On windows, we keep the binary at the top directory, but // other platforms use a "bin" directory, so this returns ".." func LibRelativeToExe() string { if runtime.GOOS == "windows" { return "." } return ".." }