From 35b89b2eaba4ac6fc4ae1ba4bf2ec6c8bafe9529 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Wed, 3 Jul 2024 16:00:54 -0700
Subject: [PATCH] rfc: dynamic environ lookup

---
 app/lifecycle/logging.go |  2 +-
 envconfig/config.go      | 28 ++++++++++++++++------------
 envconfig/config_test.go | 13 ++++++-------
 gpu/gpu.go               |  2 +-
 llm/memory_test.go       |  4 ++--
 llm/server.go            |  4 ++--
 server/routes.go         |  2 +-
 7 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/app/lifecycle/logging.go b/app/lifecycle/logging.go
index a8f1f7cd..3672aad5 100644
--- a/app/lifecycle/logging.go
+++ b/app/lifecycle/logging.go
@@ -14,7 +14,7 @@ import (
 func InitLogging() {
 	level := slog.LevelInfo
 
-	if envconfig.Debug {
+	if envconfig.Debug() {
 		level = slog.LevelDebug
 	}
 
diff --git a/envconfig/config.go b/envconfig/config.go
index 0abc6968..426507be 100644
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -26,11 +26,24 @@ func (o OllamaHost) String() string {
 
 var ErrInvalidHostPort = errors.New("invalid port specified in OLLAMA_HOST")
 
+// Debug returns true if the OLLAMA_DEBUG environment variable is set to a truthy value.
+func Debug() bool {
+	if s := clean("OLLAMA_DEBUG"); s != "" {
+		b, err := strconv.ParseBool(s)
+		if err != nil {
+			// non-empty value is truthy
+			return true
+		}
+
+		return b
+	}
+
+	return false
+}
+
 var (
 	// Set via OLLAMA_ORIGINS in the environment
 	AllowOrigins []string
-	// Set via OLLAMA_DEBUG in the environment
-	Debug bool
 	// Experimental flash attention
 	FlashAttention bool
 	// Set via OLLAMA_HOST in the environment
@@ -80,7 +93,7 @@ type EnvVar struct {
 
 func AsMap() map[string]EnvVar {
 	ret := map[string]EnvVar{
-		"OLLAMA_DEBUG":             {"OLLAMA_DEBUG", Debug, "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
+		"OLLAMA_DEBUG":             {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
 		"OLLAMA_FLASH_ATTENTION":   {"OLLAMA_FLASH_ATTENTION", FlashAttention, "Enabled flash attention"},
 		"OLLAMA_HOST":              {"OLLAMA_HOST", Host, "IP Address for the ollama server (default 127.0.0.1:11434)"},
 		"OLLAMA_KEEP_ALIVE":        {"OLLAMA_KEEP_ALIVE", KeepAlive, "The duration that models stay loaded in memory (default \"5m\")"},
@@ -137,15 +150,6 @@ func init() {
 }
 
 func LoadConfig() {
-	if debug := clean("OLLAMA_DEBUG"); debug != "" {
-		d, err := strconv.ParseBool(debug)
-		if err == nil {
-			Debug = d
-		} else {
-			Debug = true
-		}
-	}
-
 	if fa := clean("OLLAMA_FLASH_ATTENTION"); fa != "" {
 		d, err := strconv.ParseBool(fa)
 		if err == nil {
diff --git a/envconfig/config_test.go b/envconfig/config_test.go
index a5d73fd7..f083bb03 100644
--- a/envconfig/config_test.go
+++ b/envconfig/config_test.go
@@ -12,16 +12,15 @@ import (
 )
 
 func TestConfig(t *testing.T) {
-	Debug = false // Reset whatever was loaded in init()
 	t.Setenv("OLLAMA_DEBUG", "")
-	LoadConfig()
-	require.False(t, Debug)
+	require.False(t, Debug())
+
 	t.Setenv("OLLAMA_DEBUG", "false")
-	LoadConfig()
-	require.False(t, Debug)
+	require.False(t, Debug())
+
 	t.Setenv("OLLAMA_DEBUG", "1")
-	LoadConfig()
-	require.True(t, Debug)
+	require.True(t, Debug())
+
 	t.Setenv("OLLAMA_FLASH_ATTENTION", "1")
 	LoadConfig()
 	require.True(t, FlashAttention)
diff --git a/gpu/gpu.go b/gpu/gpu.go
index 6e25cb46..1815668f 100644
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -611,7 +611,7 @@ func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
 }
 
 func getVerboseState() C.uint16_t {
-	if envconfig.Debug {
+	if envconfig.Debug() {
 		return C.uint16_t(1)
 	}
 	return C.uint16_t(0)
diff --git a/llm/memory_test.go b/llm/memory_test.go
index f972f927..06ae7438 100644
--- a/llm/memory_test.go
+++ b/llm/memory_test.go
@@ -8,14 +8,14 @@ import (
 	"testing"
 
 	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/gpu"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
 
 func TestEstimateGPULayers(t *testing.T) {
-	envconfig.Debug = true
+	t.Setenv("OLLAMA_DEBUG", "1")
+
 	modelName := "dummy"
 	f, err := os.CreateTemp(t.TempDir(), modelName)
 	require.NoError(t, err)
diff --git a/llm/server.go b/llm/server.go
index 08463ef0..eb966650 100644
--- a/llm/server.go
+++ b/llm/server.go
@@ -195,7 +195,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 		params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", opts.NumGPU))
 	}
 
-	if envconfig.Debug {
+	if envconfig.Debug() {
 		params = append(params, "--verbose")
 	}
 
@@ -381,7 +381,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 		}
 
 		slog.Info("starting llama server", "cmd", s.cmd.String())
-		if envconfig.Debug {
+		if envconfig.Debug() {
 			filteredEnv := []string{}
 			for _, ev := range s.cmd.Env {
 				if strings.HasPrefix(ev, "CUDA_") ||
diff --git a/server/routes.go b/server/routes.go
index 0d7ca003..c049421b 100644
--- a/server/routes.go
+++ b/server/routes.go
@@ -1093,7 +1093,7 @@ func (s *Server) GenerateRoutes() http.Handler {
 
 func Serve(ln net.Listener) error {
 	level := slog.LevelInfo
-	if envconfig.Debug {
+	if envconfig.Debug() {
 		level = slog.LevelDebug
 	}