diff --git a/app/lifecycle/logging.go b/app/lifecycle/logging.go index 4be90648..df2597a8 100644 --- a/app/lifecycle/logging.go +++ b/app/lifecycle/logging.go @@ -6,7 +6,7 @@ import ( "os" "path/filepath" - "github.com/ollama/ollama/server/envconfig" + "github.com/ollama/ollama/envconfig" ) func InitLogging() { diff --git a/cmd/cmd.go b/cmd/cmd.go index 5d919d9a..36d4af08 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -34,6 +34,7 @@ import ( "github.com/ollama/ollama/api" "github.com/ollama/ollama/auth" + "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/format" "github.com/ollama/ollama/parser" "github.com/ollama/ollama/progress" @@ -1079,12 +1080,7 @@ func versionHandler(cmd *cobra.Command, _ []string) { } } -type EnvironmentVar struct { - Name string - Description string -} - -func appendEnvDocs(cmd *cobra.Command, envs []EnvironmentVar) { +func appendEnvDocs(cmd *cobra.Command, envs []envconfig.EnvVar) { if len(envs) == 0 { return } @@ -1093,7 +1089,7 @@ func appendEnvDocs(cmd *cobra.Command, envs []EnvironmentVar) { Environment Variables: ` for _, e := range envs { - envUsage += fmt.Sprintf(" %-16s %s\n", e.Name, e.Description) + envUsage += fmt.Sprintf(" %-24s %s\n", e.Name, e.Description) } cmd.SetUsageTemplate(cmd.UsageTemplate() + envUsage) @@ -1172,15 +1168,6 @@ func NewCLI() *cobra.Command { Args: cobra.ExactArgs(0), RunE: RunServer, } - serveCmd.SetUsageTemplate(serveCmd.UsageTemplate() + ` -Environment Variables: - - OLLAMA_HOST The host:port to bind to (default "127.0.0.1:11434") - OLLAMA_ORIGINS A comma separated list of allowed origins - OLLAMA_MODELS The path to the models directory (default "~/.ollama/models") - OLLAMA_KEEP_ALIVE The duration that models stay loaded in memory (default "5m") - OLLAMA_DEBUG Set to 1 to enable additional debug logging -`) pullCmd := &cobra.Command{ Use: "pull MODEL", @@ -1233,9 +1220,9 @@ Environment Variables: RunE: DeleteHandler, } - ollamaHostEnv := EnvironmentVar{"OLLAMA_HOST", "The host:port or base URL of the Ollama server (e.g. http://localhost:11434)"} - ollamaNoHistoryEnv := EnvironmentVar{"OLLAMA_NOHISTORY", "Disable readline history"} - envs := []EnvironmentVar{ollamaHostEnv} + envVars := envconfig.AsMap() + + envs := []envconfig.EnvVar{envVars["OLLAMA_HOST"]} for _, cmd := range []*cobra.Command{ createCmd, @@ -1247,10 +1234,24 @@ Environment Variables: psCmd, copyCmd, deleteCmd, + serveCmd, } { switch cmd { case runCmd: - appendEnvDocs(cmd, []EnvironmentVar{ollamaHostEnv, ollamaNoHistoryEnv}) + appendEnvDocs(cmd, []envconfig.EnvVar{envVars["OLLAMA_HOST"], envVars["OLLAMA_NOHISTORY"]}) + case serveCmd: + appendEnvDocs(cmd, []envconfig.EnvVar{ + envVars["OLLAMA_DEBUG"], + envVars["OLLAMA_HOST"], + envVars["OLLAMA_KEEP_ALIVE"], + envVars["OLLAMA_MAX_LOADED_MODELS"], + envVars["OLLAMA_MAX_QUEUE"], + envVars["OLLAMA_MODELS"], + envVars["OLLAMA_NUM_PARALLEL"], + envVars["OLLAMA_NOPRUNE"], + envVars["OLLAMA_ORIGINS"], + envVars["OLLAMA_TMPDIR"], + }) default: appendEnvDocs(cmd, envs) } diff --git a/cmd/interactive.go b/cmd/interactive.go index 0a31efb5..c055df0e 100644 --- a/cmd/interactive.go +++ b/cmd/interactive.go @@ -15,6 +15,7 @@ import ( "golang.org/x/exp/slices" "github.com/ollama/ollama/api" + "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/progress" "github.com/ollama/ollama/readline" "github.com/ollama/ollama/types/errtypes" @@ -183,7 +184,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error { return err } - if os.Getenv("OLLAMA_NOHISTORY") != "" { + if envconfig.NoHistory { scanner.HistoryDisable() } diff --git a/server/envconfig/config.go b/envconfig/config.go similarity index 66% rename from server/envconfig/config.go rename to envconfig/config.go index ae7d89b2..2304c4f0 100644 --- a/server/envconfig/config.go +++ b/envconfig/config.go @@ -15,6 +15,10 @@ var ( AllowOrigins []string // Set via OLLAMA_DEBUG in the environment Debug bool + // Experimental flash attention + FlashAttention bool + // Set via OLLAMA_KEEP_ALIVE in the environment + KeepAlive string // Set via OLLAMA_LLM_LIBRARY in the environment LLMLibrary string // Set via OLLAMA_MAX_LOADED_MODELS in the environment @@ -23,6 +27,8 @@ var ( MaxQueuedRequests int // Set via OLLAMA_MAX_VRAM in the environment MaxVRAM uint64 + // Set via OLLAMA_NOHISTORY in the environment + NoHistory bool // Set via OLLAMA_NOPRUNE in the environment NoPrune bool // Set via OLLAMA_NUM_PARALLEL in the environment @@ -31,26 +37,42 @@ var ( RunnersDir string // Set via OLLAMA_TMPDIR in the environment TmpDir string - // Experimental flash attention - FlashAttention bool ) -func AsMap() map[string]string { - return map[string]string{ - "OLLAMA_ORIGINS": fmt.Sprintf("%v", AllowOrigins), - "OLLAMA_DEBUG": fmt.Sprintf("%v", Debug), - "OLLAMA_LLM_LIBRARY": fmt.Sprintf("%v", LLMLibrary), - "OLLAMA_MAX_LOADED_MODELS": fmt.Sprintf("%v", MaxRunners), - "OLLAMA_MAX_QUEUE": fmt.Sprintf("%v", MaxQueuedRequests), - "OLLAMA_MAX_VRAM": fmt.Sprintf("%v", MaxVRAM), - "OLLAMA_NOPRUNE": fmt.Sprintf("%v", NoPrune), - "OLLAMA_NUM_PARALLEL": fmt.Sprintf("%v", NumParallel), - "OLLAMA_RUNNERS_DIR": fmt.Sprintf("%v", RunnersDir), - "OLLAMA_TMPDIR": fmt.Sprintf("%v", TmpDir), - "OLLAMA_FLASH_ATTENTION": fmt.Sprintf("%v", FlashAttention), +type EnvVar struct { + Name string + Value any + Description string +} + +func AsMap() map[string]EnvVar { + return map[string]EnvVar{ + "OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug, "Show additional debug information (e.g. OLLAMA_DEBUG=1)"}, + "OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention, "Enabled flash attention"}, + "OLLAMA_HOST": {"OLLAMA_HOST", "", "IP Address for the ollama server (default 127.0.0.1:11434)"}, + "OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive, "The duration that models stay loaded in memory (default \"5m\")"}, + "OLLAMA_LLM_LIBRARY": {"OLLAMA_ORIGINS", LLMLibrary, ""}, + "OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners, "Maximum number of loaded models (default 1)"}, + "OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueuedRequests, "Maximum number of queued requests"}, + "OLLAMA_MAX_VRAM": {"OLLAMA_MAX_VRAM", MaxVRAM, ""}, + "OLLAMA_MODELS": {"OLLAMA_MODELS", "", "The path to the models directory"}, + "OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory, "Do not preserve readline history"}, + "OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune, "Do not prune model blobs on startup"}, + "OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel, "Maximum number of parallel requests (default 1)"}, + "OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowOrigins, "A comma separated list of allowed origins"}, + "OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir, ""}, + "OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir, "Location for temporary files"}, } } +func Values() map[string]string { + vals := make(map[string]string) + for k, v := range AsMap() { + vals[k] = fmt.Sprintf("%v", v.Value) + } + return vals +} + var defaultAllowOrigins = []string{ "localhost", "127.0.0.1", @@ -147,6 +169,10 @@ func LoadConfig() { } } + if nohistory := clean("OLLAMA_NOHISTORY"); nohistory != "" { + NoHistory = true + } + if noprune := clean("OLLAMA_NOPRUNE"); noprune != "" { NoPrune = true } @@ -181,4 +207,6 @@ func LoadConfig() { MaxQueuedRequests = p } } + + KeepAlive = clean("OLLAMA_KEEP_ALIVE") } diff --git a/server/envconfig/config_test.go b/envconfig/config_test.go similarity index 100% rename from server/envconfig/config_test.go rename to envconfig/config_test.go diff --git a/gpu/assets.go b/gpu/assets.go index 911a6977..e3fbe47c 100644 --- a/gpu/assets.go +++ b/gpu/assets.go @@ -13,7 +13,7 @@ import ( "syscall" "time" - "github.com/ollama/ollama/server/envconfig" + "github.com/ollama/ollama/envconfig" ) var ( diff --git a/gpu/gpu.go b/gpu/gpu.go index 781e23df..74160b60 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -21,7 +21,7 @@ import ( "unsafe" "github.com/ollama/ollama/format" - "github.com/ollama/ollama/server/envconfig" + "github.com/ollama/ollama/envconfig" ) type handles struct { diff --git a/llm/memory.go b/llm/memory.go index acc2dd0b..ff64baf1 100644 --- a/llm/memory.go +++ b/llm/memory.go @@ -7,7 +7,7 @@ import ( "github.com/ollama/ollama/api" "github.com/ollama/ollama/format" "github.com/ollama/ollama/gpu" - "github.com/ollama/ollama/server/envconfig" + "github.com/ollama/ollama/envconfig" ) // This algorithm looks for a complete fit to determine if we need to unload other models diff --git a/llm/server.go b/llm/server.go index 384d31ca..36fc727c 100644 --- a/llm/server.go +++ b/llm/server.go @@ -26,7 +26,7 @@ import ( "github.com/ollama/ollama/api" "github.com/ollama/ollama/format" "github.com/ollama/ollama/gpu" - "github.com/ollama/ollama/server/envconfig" + "github.com/ollama/ollama/envconfig" ) type LlamaServer interface { diff --git a/server/images.go b/server/images.go index 520c899b..cf6edf95 100644 --- a/server/images.go +++ b/server/images.go @@ -28,7 +28,7 @@ import ( "github.com/ollama/ollama/format" "github.com/ollama/ollama/llm" "github.com/ollama/ollama/parser" - "github.com/ollama/ollama/server/envconfig" + "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/types/errtypes" "github.com/ollama/ollama/types/model" "github.com/ollama/ollama/version" diff --git a/server/routes.go b/server/routes.go index 4b3239e1..4ac284a4 100644 --- a/server/routes.go +++ b/server/routes.go @@ -26,11 +26,11 @@ import ( "golang.org/x/exp/slices" "github.com/ollama/ollama/api" + "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/gpu" "github.com/ollama/ollama/llm" "github.com/ollama/ollama/openai" "github.com/ollama/ollama/parser" - "github.com/ollama/ollama/server/envconfig" "github.com/ollama/ollama/types/errtypes" "github.com/ollama/ollama/types/model" "github.com/ollama/ollama/version" @@ -315,10 +315,10 @@ func (s *Server) GenerateHandler(c *gin.Context) { } func getDefaultSessionDuration() time.Duration { - if t, exists := os.LookupEnv("OLLAMA_KEEP_ALIVE"); exists { - v, err := strconv.Atoi(t) + if envconfig.KeepAlive != "" { + v, err := strconv.Atoi(envconfig.KeepAlive) if err != nil { - d, err := time.ParseDuration(t) + d, err := time.ParseDuration(envconfig.KeepAlive) if err != nil { return defaultSessionDuration } @@ -1025,7 +1025,7 @@ func Serve(ln net.Listener) error { level = slog.LevelDebug } - slog.Info("server config", "env", envconfig.AsMap()) + slog.Info("server config", "env", envconfig.Values()) handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{ Level: level, AddSource: true, diff --git a/server/routes_test.go b/server/routes_test.go index a48819fe..37671d0c 100644 --- a/server/routes_test.go +++ b/server/routes_test.go @@ -209,14 +209,14 @@ func Test_Routes(t *testing.T) { }, } + t.Setenv("OLLAMA_MODELS", t.TempDir()) + s := &Server{} router := s.GenerateRoutes() httpSrv := httptest.NewServer(router) t.Cleanup(httpSrv.Close) - t.Setenv("OLLAMA_MODELS", t.TempDir()) - for _, tc := range testCases { t.Run(tc.Name, func(t *testing.T) { u := httpSrv.URL + tc.Path diff --git a/server/sched.go b/server/sched.go index 8b97e354..8c72177f 100644 --- a/server/sched.go +++ b/server/sched.go @@ -16,7 +16,7 @@ import ( "github.com/ollama/ollama/format" "github.com/ollama/ollama/gpu" "github.com/ollama/ollama/llm" - "github.com/ollama/ollama/server/envconfig" + "github.com/ollama/ollama/envconfig" "golang.org/x/exp/slices" ) diff --git a/server/sched_test.go b/server/sched_test.go index addc1ad8..3ee1b989 100644 --- a/server/sched_test.go +++ b/server/sched_test.go @@ -15,7 +15,7 @@ import ( "github.com/ollama/ollama/format" "github.com/ollama/ollama/gpu" "github.com/ollama/ollama/llm" - "github.com/ollama/ollama/server/envconfig" + "github.com/ollama/ollama/envconfig" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" )