2024-05-04 11:46:01 -07:00
package envconfig
import (
"fmt"
"log/slog"
2024-07-02 15:12:43 -07:00
"math"
2024-05-21 21:53:44 -07:00
"net"
2024-07-03 16:44:57 -07:00
"net/url"
2024-05-04 11:46:01 -07:00
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
2024-07-02 15:12:43 -07:00
"time"
2024-05-04 11:46:01 -07:00
)
2024-07-03 16:44:57 -07:00
// Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable.
// Default is scheme "http" and host "127.0.0.1:11434"
func Host ( ) * url . URL {
defaultPort := "11434"
2024-07-08 10:34:12 -07:00
s := strings . TrimSpace ( Var ( "OLLAMA_HOST" ) )
2024-07-03 16:44:57 -07:00
scheme , hostport , ok := strings . Cut ( s , "://" )
switch {
case ! ok :
scheme , hostport = "http" , s
case scheme == "http" :
defaultPort = "80"
case scheme == "https" :
defaultPort = "443"
}
2024-08-23 13:16:30 -07:00
hostport , path , _ := strings . Cut ( hostport , "/" )
2024-07-03 16:44:57 -07:00
host , port , err := net . SplitHostPort ( hostport )
if err != nil {
host , port = "127.0.0.1" , defaultPort
if ip := net . ParseIP ( strings . Trim ( hostport , "[]" ) ) ; ip != nil {
host = ip . String ( )
} else if hostport != "" {
host = hostport
}
}
if n , err := strconv . ParseInt ( port , 10 , 32 ) ; err != nil || n > 65535 || n < 0 {
2024-07-08 10:34:12 -07:00
slog . Warn ( "invalid port, using default" , "port" , port , "default" , defaultPort )
2024-08-23 13:16:30 -07:00
port = defaultPort
2024-07-03 16:44:57 -07:00
}
return & url . URL {
Scheme : scheme ,
Host : net . JoinHostPort ( host , port ) ,
2024-08-23 13:16:30 -07:00
Path : path ,
2024-07-03 16:44:57 -07:00
}
}
2024-07-03 17:02:07 -07:00
// Origins returns a list of allowed origins. Origins can be configured via the OLLAMA_ORIGINS environment variable.
func Origins ( ) ( origins [ ] string ) {
2024-07-08 10:34:12 -07:00
if s := Var ( "OLLAMA_ORIGINS" ) ; s != "" {
2024-07-03 17:02:07 -07:00
origins = strings . Split ( s , "," )
}
for _ , origin := range [ ] string { "localhost" , "127.0.0.1" , "0.0.0.0" } {
origins = append ( origins ,
fmt . Sprintf ( "http://%s" , origin ) ,
fmt . Sprintf ( "https://%s" , origin ) ,
fmt . Sprintf ( "http://%s" , net . JoinHostPort ( origin , "*" ) ) ,
fmt . Sprintf ( "https://%s" , net . JoinHostPort ( origin , "*" ) ) ,
)
}
origins = append ( origins ,
"app://*" ,
"file://*" ,
"tauri://*" ,
)
return origins
}
2024-07-03 17:07:42 -07:00
// Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable.
// Default is $HOME/.ollama/models
func Models ( ) string {
2024-07-08 10:34:12 -07:00
if s := Var ( "OLLAMA_MODELS" ) ; s != "" {
2024-07-03 17:07:42 -07:00
return s
}
home , err := os . UserHomeDir ( )
if err != nil {
panic ( err )
}
return filepath . Join ( home , ".ollama" , "models" )
}
2024-07-03 18:39:35 -07:00
// KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
// Negative values are treated as infinite. Zero is treated as no keep alive.
// Default is 5 minutes.
func KeepAlive ( ) ( keepAlive time . Duration ) {
keepAlive = 5 * time . Minute
2024-07-08 10:34:12 -07:00
if s := Var ( "OLLAMA_KEEP_ALIVE" ) ; s != "" {
2024-07-03 18:39:35 -07:00
if d , err := time . ParseDuration ( s ) ; err == nil {
keepAlive = d
} else if n , err := strconv . ParseInt ( s , 10 , 64 ) ; err == nil {
keepAlive = time . Duration ( n ) * time . Second
}
}
if keepAlive < 0 {
return time . Duration ( math . MaxInt64 )
}
return keepAlive
}
2024-09-05 14:00:08 -07:00
// LoadTimeout returns the duration for stall detection during model loads. LoadTimeout can be configured via the OLLAMA_LOAD_TIMEOUT environment variable.
// Zero or Negative values are treated as infinite.
// Default is 5 minutes.
func LoadTimeout ( ) ( loadTimeout time . Duration ) {
loadTimeout = 5 * time . Minute
if s := Var ( "OLLAMA_LOAD_TIMEOUT" ) ; s != "" {
if d , err := time . ParseDuration ( s ) ; err == nil {
loadTimeout = d
} else if n , err := strconv . ParseInt ( s , 10 , 64 ) ; err == nil {
loadTimeout = time . Duration ( n ) * time . Second
}
}
if loadTimeout <= 0 {
return time . Duration ( math . MaxInt64 )
}
return loadTimeout
}
2024-07-03 17:22:13 -07:00
func Bool ( k string ) func ( ) bool {
return func ( ) bool {
2024-07-08 10:34:12 -07:00
if s := Var ( k ) ; s != "" {
2024-07-03 17:22:13 -07:00
b , err := strconv . ParseBool ( s )
if err != nil {
return true
}
return b
}
return false
}
}
var (
// Debug enabled additional debug information.
Debug = Bool ( "OLLAMA_DEBUG" )
// FlashAttention enables the experimental flash attention feature.
FlashAttention = Bool ( "OLLAMA_FLASH_ATTENTION" )
// NoHistory disables readline history.
NoHistory = Bool ( "OLLAMA_NOHISTORY" )
// NoPrune disables pruning of model blobs on startup.
NoPrune = Bool ( "OLLAMA_NOPRUNE" )
// SchedSpread allows scheduling models across all GPUs.
SchedSpread = Bool ( "OLLAMA_SCHED_SPREAD" )
// IntelGPU enables experimental Intel GPU detection.
IntelGPU = Bool ( "OLLAMA_INTEL_GPU" )
)
2024-07-03 19:30:19 -07:00
func String ( s string ) func ( ) string {
return func ( ) string {
2024-07-08 10:34:12 -07:00
return Var ( s )
2024-07-03 19:30:19 -07:00
}
}
var (
LLMLibrary = String ( "OLLAMA_LLM_LIBRARY" )
TmpDir = String ( "OLLAMA_TMPDIR" )
CudaVisibleDevices = String ( "CUDA_VISIBLE_DEVICES" )
HipVisibleDevices = String ( "HIP_VISIBLE_DEVICES" )
RocrVisibleDevices = String ( "ROCR_VISIBLE_DEVICES" )
GpuDeviceOrdinal = String ( "GPU_DEVICE_ORDINAL" )
HsaOverrideGfxVersion = String ( "HSA_OVERRIDE_GFX_VERSION" )
)
func RunnersDir ( ) ( p string ) {
2024-07-08 10:34:12 -07:00
if p := Var ( "OLLAMA_RUNNERS_DIR" ) ; p != "" {
2024-07-03 19:30:19 -07:00
return p
}
if runtime . GOOS != "windows" {
return
}
defer func ( ) {
if p == "" {
2024-08-14 16:32:57 -07:00
slog . Error ( "unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama/runners'" )
2024-07-03 19:30:19 -07:00
}
} ( )
// On Windows we do not carry the payloads inside the main executable
exe , err := os . Executable ( )
if err != nil {
return
}
cwd , err := os . Getwd ( )
if err != nil {
return
}
var paths [ ] string
2024-08-27 16:19:00 -07:00
for _ , root := range [ ] string { filepath . Dir ( exe ) , filepath . Join ( filepath . Dir ( exe ) , LibRelativeToExe ( ) ) , cwd } {
2024-07-03 19:30:19 -07:00
paths = append ( paths ,
root ,
2024-07-08 12:50:11 -07:00
filepath . Join ( root , runtime . GOOS + "-" + runtime . GOARCH ) ,
filepath . Join ( root , "dist" , runtime . GOOS + "-" + runtime . GOARCH ) ,
2024-07-03 19:30:19 -07:00
)
}
// Try a few variations to improve developer experience when building from source in the local tree
for _ , path := range paths {
2024-08-14 16:32:57 -07:00
candidate := filepath . Join ( path , "lib" , "ollama" , "runners" )
2024-07-03 19:30:19 -07:00
if _ , err := os . Stat ( candidate ) ; err == nil {
p = candidate
break
}
}
return p
}
2024-07-08 10:34:12 -07:00
func Uint ( key string , defaultValue uint ) func ( ) uint {
return func ( ) uint {
if s := Var ( key ) ; s != "" {
if n , err := strconv . ParseUint ( s , 10 , 64 ) ; err != nil {
slog . Warn ( "invalid environment variable, using default" , "key" , key , "value" , s , "default" , defaultValue )
} else {
return uint ( n )
2024-07-03 19:41:17 -07:00
}
}
2024-07-08 10:34:12 -07:00
return defaultValue
2024-07-03 19:41:17 -07:00
}
}
2024-05-04 11:46:01 -07:00
var (
2024-07-08 10:34:12 -07:00
// NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
NumParallel = Uint ( "OLLAMA_NUM_PARALLEL" , 0 )
// MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
MaxRunners = Uint ( "OLLAMA_MAX_LOADED_MODELS" , 0 )
// MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
MaxQueue = Uint ( "OLLAMA_MAX_QUEUE" , 512 )
// MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable.
MaxVRAM = Uint ( "OLLAMA_MAX_VRAM" , 0 )
2024-05-04 11:46:01 -07:00
)
2024-09-05 13:46:35 -07:00
func Uint64 ( key string , defaultValue uint64 ) func ( ) uint64 {
return func ( ) uint64 {
if s := Var ( key ) ; s != "" {
if n , err := strconv . ParseUint ( s , 10 , 64 ) ; err != nil {
slog . Warn ( "invalid environment variable, using default" , "key" , key , "value" , s , "default" , defaultValue )
} else {
return n
}
}
return defaultValue
}
}
2024-09-05 14:00:08 -07:00
// Set aside VRAM per GPU
var GpuOverhead = Uint64 ( "OLLAMA_GPU_OVERHEAD" , 0 )
2024-09-05 13:46:35 -07:00
2024-05-24 14:57:15 -07:00
type EnvVar struct {
Name string
Value any
Description string
}
func AsMap ( ) map [ string ] EnvVar {
2024-05-08 11:11:50 -07:00
ret := map [ string ] EnvVar {
2024-07-03 16:00:54 -07:00
"OLLAMA_DEBUG" : { "OLLAMA_DEBUG" , Debug ( ) , "Show additional debug information (e.g. OLLAMA_DEBUG=1)" } ,
2024-07-03 17:22:13 -07:00
"OLLAMA_FLASH_ATTENTION" : { "OLLAMA_FLASH_ATTENTION" , FlashAttention ( ) , "Enabled flash attention" } ,
2024-09-05 13:46:35 -07:00
"OLLAMA_GPU_OVERHEAD" : { "OLLAMA_GPU_OVERHEAD" , GpuOverhead ( ) , "Reserve a portion of VRAM per GPU (bytes)" } ,
2024-07-03 16:44:57 -07:00
"OLLAMA_HOST" : { "OLLAMA_HOST" , Host ( ) , "IP Address for the ollama server (default 127.0.0.1:11434)" } ,
2024-07-03 18:39:35 -07:00
"OLLAMA_KEEP_ALIVE" : { "OLLAMA_KEEP_ALIVE" , KeepAlive ( ) , "The duration that models stay loaded in memory (default \"5m\")" } ,
2024-07-03 19:30:19 -07:00
"OLLAMA_LLM_LIBRARY" : { "OLLAMA_LLM_LIBRARY" , LLMLibrary ( ) , "Set LLM library to bypass autodetection" } ,
2024-09-05 14:00:08 -07:00
"OLLAMA_LOAD_TIMEOUT" : { "OLLAMA_LOAD_TIMEOUT" , LoadTimeout ( ) , "How long to allow model loads to stall before giving up (default \"5m\")" } ,
2024-07-03 19:41:17 -07:00
"OLLAMA_MAX_LOADED_MODELS" : { "OLLAMA_MAX_LOADED_MODELS" , MaxRunners ( ) , "Maximum number of loaded models per GPU" } ,
"OLLAMA_MAX_QUEUE" : { "OLLAMA_MAX_QUEUE" , MaxQueue ( ) , "Maximum number of queued requests" } ,
2024-07-03 17:07:42 -07:00
"OLLAMA_MODELS" : { "OLLAMA_MODELS" , Models ( ) , "The path to the models directory" } ,
2024-07-03 17:22:13 -07:00
"OLLAMA_NOHISTORY" : { "OLLAMA_NOHISTORY" , NoHistory ( ) , "Do not preserve readline history" } ,
"OLLAMA_NOPRUNE" : { "OLLAMA_NOPRUNE" , NoPrune ( ) , "Do not prune model blobs on startup" } ,
2024-07-03 19:41:17 -07:00
"OLLAMA_NUM_PARALLEL" : { "OLLAMA_NUM_PARALLEL" , NumParallel ( ) , "Maximum number of parallel requests" } ,
2024-07-03 17:02:07 -07:00
"OLLAMA_ORIGINS" : { "OLLAMA_ORIGINS" , Origins ( ) , "A comma separated list of allowed origins" } ,
2024-07-03 19:30:19 -07:00
"OLLAMA_RUNNERS_DIR" : { "OLLAMA_RUNNERS_DIR" , RunnersDir ( ) , "Location for runners" } ,
2024-07-03 17:22:13 -07:00
"OLLAMA_SCHED_SPREAD" : { "OLLAMA_SCHED_SPREAD" , SchedSpread ( ) , "Always schedule model across all GPUs" } ,
2024-07-03 19:30:19 -07:00
"OLLAMA_TMPDIR" : { "OLLAMA_TMPDIR" , TmpDir ( ) , "Location for temporary files" } ,
2024-09-10 09:36:42 -07:00
// Informational
"HTTP_PROXY" : { "HTTP_PROXY" , String ( "HTTP_PROXY" ) ( ) , "HTTP proxy" } ,
"HTTPS_PROXY" : { "HTTPS_PROXY" , String ( "HTTPS_PROXY" ) ( ) , "HTTPS proxy" } ,
"NO_PROXY" : { "NO_PROXY" , String ( "NO_PROXY" ) ( ) , "No proxy" } ,
}
if runtime . GOOS != "windows" {
// Windows environment variables are case-insensitive so there's no need to duplicate them
ret [ "http_proxy" ] = EnvVar { "http_proxy" , String ( "http_proxy" ) ( ) , "HTTP proxy" }
ret [ "https_proxy" ] = EnvVar { "https_proxy" , String ( "https_proxy" ) ( ) , "HTTPS proxy" }
ret [ "no_proxy" ] = EnvVar { "no_proxy" , String ( "no_proxy" ) ( ) , "No proxy" }
2024-05-04 11:46:01 -07:00
}
2024-09-10 09:36:42 -07:00
2024-05-08 11:11:50 -07:00
if runtime . GOOS != "darwin" {
2024-07-03 19:30:19 -07:00
ret [ "CUDA_VISIBLE_DEVICES" ] = EnvVar { "CUDA_VISIBLE_DEVICES" , CudaVisibleDevices ( ) , "Set which NVIDIA devices are visible" }
ret [ "HIP_VISIBLE_DEVICES" ] = EnvVar { "HIP_VISIBLE_DEVICES" , HipVisibleDevices ( ) , "Set which AMD devices are visible" }
ret [ "ROCR_VISIBLE_DEVICES" ] = EnvVar { "ROCR_VISIBLE_DEVICES" , RocrVisibleDevices ( ) , "Set which AMD devices are visible" }
ret [ "GPU_DEVICE_ORDINAL" ] = EnvVar { "GPU_DEVICE_ORDINAL" , GpuDeviceOrdinal ( ) , "Set which AMD devices are visible" }
ret [ "HSA_OVERRIDE_GFX_VERSION" ] = EnvVar { "HSA_OVERRIDE_GFX_VERSION" , HsaOverrideGfxVersion ( ) , "Override the gfx used for all detected AMD GPUs" }
2024-07-03 17:22:13 -07:00
ret [ "OLLAMA_INTEL_GPU" ] = EnvVar { "OLLAMA_INTEL_GPU" , IntelGPU ( ) , "Enable experimental Intel GPU detection" }
2024-05-08 11:11:50 -07:00
}
2024-09-10 09:36:42 -07:00
2024-05-08 11:11:50 -07:00
return ret
2024-05-04 11:46:01 -07:00
}
2024-05-24 14:57:15 -07:00
func Values ( ) map [ string ] string {
vals := make ( map [ string ] string )
for k , v := range AsMap ( ) {
vals [ k ] = fmt . Sprintf ( "%v" , v . Value )
}
return vals
}
2024-07-08 10:34:12 -07:00
// Var returns an environment variable stripped of leading and trailing quotes or spaces
func Var ( key string ) string {
return strings . Trim ( strings . TrimSpace ( os . Getenv ( key ) ) , "\"'" )
2024-05-04 11:46:01 -07:00
}
2024-08-27 16:19:00 -07:00
// On windows, we keep the binary at the top directory, but
// other platforms use a "bin" directory, so this returns ".."
func LibRelativeToExe ( ) string {
if runtime . GOOS == "windows" {
return "."
}
return ".."
}