diff --git a/envconfig/config.go b/envconfig/config.go index 62bfad64..34cc4dac 100644 --- a/envconfig/config.go +++ b/envconfig/config.go @@ -149,30 +149,77 @@ var ( IntelGPU = Bool("OLLAMA_INTEL_GPU") ) +func String(s string) func() string { + return func() string { + return getenv(s) + } +} + +var ( + LLMLibrary = String("OLLAMA_LLM_LIBRARY") + TmpDir = String("OLLAMA_TMPDIR") + + CudaVisibleDevices = String("CUDA_VISIBLE_DEVICES") + HipVisibleDevices = String("HIP_VISIBLE_DEVICES") + RocrVisibleDevices = String("ROCR_VISIBLE_DEVICES") + GpuDeviceOrdinal = String("GPU_DEVICE_ORDINAL") + HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION") +) + +func RunnersDir() (p string) { + if p := getenv("OLLAMA_RUNNERS_DIR"); p != "" { + return p + } + + if runtime.GOOS != "windows" { + return + } + + defer func() { + if p == "" { + slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'") + } + }() + + // On Windows we do not carry the payloads inside the main executable + exe, err := os.Executable() + if err != nil { + return + } + + cwd, err := os.Getwd() + if err != nil { + return + } + + var paths []string + for _, root := range []string{filepath.Dir(exe), cwd} { + paths = append(paths, + root, + filepath.Join(root, "windows-"+runtime.GOARCH), + filepath.Join(root, "dist", "windows-"+runtime.GOARCH), + ) + } + + // Try a few variations to improve developer experience when building from source in the local tree + for _, path := range paths { + candidate := filepath.Join(path, "ollama_runners") + if _, err := os.Stat(candidate); err == nil { + p = candidate + break + } + } + + return p +} + var ( - // Set via OLLAMA_LLM_LIBRARY in the environment - LLMLibrary string // Set via OLLAMA_MAX_LOADED_MODELS in the environment MaxRunners int // Set via OLLAMA_MAX_QUEUE in the environment MaxQueuedRequests int // Set via OLLAMA_NUM_PARALLEL in the environment NumParallel int - // Set via OLLAMA_RUNNERS_DIR in the environment - RunnersDir string - // Set via OLLAMA_TMPDIR in the environment - TmpDir string - - // Set via CUDA_VISIBLE_DEVICES in the environment - CudaVisibleDevices string - // Set via HIP_VISIBLE_DEVICES in the environment - HipVisibleDevices string - // Set via ROCR_VISIBLE_DEVICES in the environment - RocrVisibleDevices string - // Set via GPU_DEVICE_ORDINAL in the environment - GpuDeviceOrdinal string - // Set via HSA_OVERRIDE_GFX_VERSION in the environment - HsaOverrideGfxVersion string ) type EnvVar struct { @@ -187,7 +234,7 @@ func AsMap() map[string]EnvVar { "OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"}, "OLLAMA_HOST": {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"}, "OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"}, - "OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary, "Set LLM library to bypass autodetection"}, + "OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"}, "OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners, "Maximum number of loaded models per GPU"}, "OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueuedRequests, "Maximum number of queued requests"}, "OLLAMA_MODELS": {"OLLAMA_MODELS", Models(), "The path to the models directory"}, @@ -195,16 +242,16 @@ func AsMap() map[string]EnvVar { "OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"}, "OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel, "Maximum number of parallel requests"}, "OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"}, - "OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir, "Location for runners"}, + "OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir(), "Location for runners"}, "OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"}, - "OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir, "Location for temporary files"}, + "OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"}, } if runtime.GOOS != "darwin" { - ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices, "Set which NVIDIA devices are visible"} - ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices, "Set which AMD devices are visible"} - ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices, "Set which AMD devices are visible"} - ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal, "Set which AMD devices are visible"} - ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion, "Override the gfx used for all detected AMD GPUs"} + ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"} + ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible"} + ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible"} + ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible"} + ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"} ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"} } return ret @@ -233,46 +280,6 @@ func init() { } func LoadConfig() { - RunnersDir = getenv("OLLAMA_RUNNERS_DIR") - if runtime.GOOS == "windows" && RunnersDir == "" { - // On Windows we do not carry the payloads inside the main executable - appExe, err := os.Executable() - if err != nil { - slog.Error("failed to lookup executable path", "error", err) - } - - cwd, err := os.Getwd() - if err != nil { - slog.Error("failed to lookup working directory", "error", err) - } - - var paths []string - for _, root := range []string{filepath.Dir(appExe), cwd} { - paths = append(paths, - root, - filepath.Join(root, "windows-"+runtime.GOARCH), - filepath.Join(root, "dist", "windows-"+runtime.GOARCH), - ) - } - - // Try a few variations to improve developer experience when building from source in the local tree - for _, p := range paths { - candidate := filepath.Join(p, "ollama_runners") - _, err := os.Stat(candidate) - if err == nil { - RunnersDir = candidate - break - } - } - if RunnersDir == "" { - slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'") - } - } - - TmpDir = getenv("OLLAMA_TMPDIR") - - LLMLibrary = getenv("OLLAMA_LLM_LIBRARY") - if onp := getenv("OLLAMA_NUM_PARALLEL"); onp != "" { val, err := strconv.Atoi(onp) if err != nil { @@ -300,10 +307,4 @@ func LoadConfig() { MaxQueuedRequests = p } } - - CudaVisibleDevices = getenv("CUDA_VISIBLE_DEVICES") - HipVisibleDevices = getenv("HIP_VISIBLE_DEVICES") - RocrVisibleDevices = getenv("ROCR_VISIBLE_DEVICES") - GpuDeviceOrdinal = getenv("GPU_DEVICE_ORDINAL") - HsaOverrideGfxVersion = getenv("HSA_OVERRIDE_GFX_VERSION") } diff --git a/gpu/amd_linux.go b/gpu/amd_linux.go index 15b6fc61..33dd03ab 100644 --- a/gpu/amd_linux.go +++ b/gpu/amd_linux.go @@ -60,9 +60,9 @@ func AMDGetGPUInfo() []RocmGPUInfo { // Determine if the user has already pre-selected which GPUs to look at, then ignore the others var visibleDevices []string - hipVD := envconfig.HipVisibleDevices // zero based index only - rocrVD := envconfig.RocrVisibleDevices // zero based index or UUID, but consumer cards seem to not support UUID - gpuDO := envconfig.GpuDeviceOrdinal // zero based index + hipVD := envconfig.HipVisibleDevices() // zero based index only + rocrVD := envconfig.RocrVisibleDevices() // zero based index or UUID, but consumer cards seem to not support UUID + gpuDO := envconfig.GpuDeviceOrdinal() // zero based index switch { // TODO is this priorty order right? case hipVD != "": @@ -75,7 +75,7 @@ func AMDGetGPUInfo() []RocmGPUInfo { visibleDevices = strings.Split(gpuDO, ",") } - gfxOverride := envconfig.HsaOverrideGfxVersion + gfxOverride := envconfig.HsaOverrideGfxVersion() var supported []string libDir := "" diff --git a/gpu/amd_windows.go b/gpu/amd_windows.go index 20aed447..a170dfdc 100644 --- a/gpu/amd_windows.go +++ b/gpu/amd_windows.go @@ -53,7 +53,7 @@ func AMDGetGPUInfo() []RocmGPUInfo { } var supported []string - gfxOverride := envconfig.HsaOverrideGfxVersion + gfxOverride := envconfig.HsaOverrideGfxVersion() if gfxOverride == "" { supported, err = GetSupportedGFX(libDir) if err != nil { diff --git a/gpu/assets.go b/gpu/assets.go index 073d2e81..39ff7c21 100644 --- a/gpu/assets.go +++ b/gpu/assets.go @@ -26,7 +26,7 @@ func PayloadsDir() (string, error) { defer lock.Unlock() var err error if payloadsDir == "" { - runnersDir := envconfig.RunnersDir + runnersDir := envconfig.RunnersDir() if runnersDir != "" { payloadsDir = runnersDir @@ -35,7 +35,7 @@ func PayloadsDir() (string, error) { // The remainder only applies on non-windows where we still carry payloads in the main executable cleanupTmpDirs() - tmpDir := envconfig.TmpDir + tmpDir := envconfig.TmpDir() if tmpDir == "" { tmpDir, err = os.MkdirTemp("", "ollama") if err != nil { @@ -105,7 +105,7 @@ func cleanupTmpDirs() { func Cleanup() { lock.Lock() defer lock.Unlock() - runnersDir := envconfig.RunnersDir + runnersDir := envconfig.RunnersDir() if payloadsDir != "" && runnersDir == "" && runtime.GOOS != "windows" { // We want to fully clean up the tmpdir parent of the payloads dir tmpDir := filepath.Clean(filepath.Join(payloadsDir, "..")) diff --git a/gpu/gpu.go b/gpu/gpu.go index c3059542..acab1c8d 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -230,8 +230,8 @@ func GetGPUInfo() GpuInfoList { // On windows we bundle the nvidia library one level above the runner dir depPath := "" - if runtime.GOOS == "windows" && envconfig.RunnersDir != "" { - depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir), "cuda") + if runtime.GOOS == "windows" && envconfig.RunnersDir() != "" { + depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir()), "cuda") } // Load ALL libraries @@ -306,8 +306,8 @@ func GetGPUInfo() GpuInfoList { oHandles = initOneAPIHandles() // On windows we bundle the oneapi library one level above the runner dir depPath = "" - if runtime.GOOS == "windows" && envconfig.RunnersDir != "" { - depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir), "oneapi") + if runtime.GOOS == "windows" && envconfig.RunnersDir() != "" { + depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir()), "oneapi") } for d := range oHandles.oneapi.num_drivers { diff --git a/llm/server.go b/llm/server.go index 84d9e93a..0741d386 100644 --- a/llm/server.go +++ b/llm/server.go @@ -163,7 +163,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr } else { servers = serversForGpu(gpus[0]) // All GPUs in the list are matching Library and Variant } - demandLib := envconfig.LLMLibrary + demandLib := envconfig.LLMLibrary() if demandLib != "" { serverPath := availableServers[demandLib] if serverPath == "" {