Move envconfig and consolidate env vars (#4608)

This commit is contained in:
Patrick Devine 2024-05-24 14:57:15 -07:00 committed by GitHub
parent afd2b058b4
commit 4cc3be3035
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 81 additions and 51 deletions

View file

@ -6,7 +6,7 @@ import (
"os"
"path/filepath"
"github.com/ollama/ollama/server/envconfig"
"github.com/ollama/ollama/envconfig"
)
func InitLogging() {

View file

@ -34,6 +34,7 @@ import (
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/auth"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/parser"
"github.com/ollama/ollama/progress"
@ -1079,12 +1080,7 @@ func versionHandler(cmd *cobra.Command, _ []string) {
}
}
type EnvironmentVar struct {
Name string
Description string
}
func appendEnvDocs(cmd *cobra.Command, envs []EnvironmentVar) {
func appendEnvDocs(cmd *cobra.Command, envs []envconfig.EnvVar) {
if len(envs) == 0 {
return
}
@ -1093,7 +1089,7 @@ func appendEnvDocs(cmd *cobra.Command, envs []EnvironmentVar) {
Environment Variables:
`
for _, e := range envs {
envUsage += fmt.Sprintf(" %-16s %s\n", e.Name, e.Description)
envUsage += fmt.Sprintf(" %-24s %s\n", e.Name, e.Description)
}
cmd.SetUsageTemplate(cmd.UsageTemplate() + envUsage)
@ -1172,15 +1168,6 @@ func NewCLI() *cobra.Command {
Args: cobra.ExactArgs(0),
RunE: RunServer,
}
serveCmd.SetUsageTemplate(serveCmd.UsageTemplate() + `
Environment Variables:
OLLAMA_HOST The host:port to bind to (default "127.0.0.1:11434")
OLLAMA_ORIGINS A comma separated list of allowed origins
OLLAMA_MODELS The path to the models directory (default "~/.ollama/models")
OLLAMA_KEEP_ALIVE The duration that models stay loaded in memory (default "5m")
OLLAMA_DEBUG Set to 1 to enable additional debug logging
`)
pullCmd := &cobra.Command{
Use: "pull MODEL",
@ -1233,9 +1220,9 @@ Environment Variables:
RunE: DeleteHandler,
}
ollamaHostEnv := EnvironmentVar{"OLLAMA_HOST", "The host:port or base URL of the Ollama server (e.g. http://localhost:11434)"}
ollamaNoHistoryEnv := EnvironmentVar{"OLLAMA_NOHISTORY", "Disable readline history"}
envs := []EnvironmentVar{ollamaHostEnv}
envVars := envconfig.AsMap()
envs := []envconfig.EnvVar{envVars["OLLAMA_HOST"]}
for _, cmd := range []*cobra.Command{
createCmd,
@ -1247,10 +1234,24 @@ Environment Variables:
psCmd,
copyCmd,
deleteCmd,
serveCmd,
} {
switch cmd {
case runCmd:
appendEnvDocs(cmd, []EnvironmentVar{ollamaHostEnv, ollamaNoHistoryEnv})
appendEnvDocs(cmd, []envconfig.EnvVar{envVars["OLLAMA_HOST"], envVars["OLLAMA_NOHISTORY"]})
case serveCmd:
appendEnvDocs(cmd, []envconfig.EnvVar{
envVars["OLLAMA_DEBUG"],
envVars["OLLAMA_HOST"],
envVars["OLLAMA_KEEP_ALIVE"],
envVars["OLLAMA_MAX_LOADED_MODELS"],
envVars["OLLAMA_MAX_QUEUE"],
envVars["OLLAMA_MODELS"],
envVars["OLLAMA_NUM_PARALLEL"],
envVars["OLLAMA_NOPRUNE"],
envVars["OLLAMA_ORIGINS"],
envVars["OLLAMA_TMPDIR"],
})
default:
appendEnvDocs(cmd, envs)
}

View file

@ -15,6 +15,7 @@ import (
"golang.org/x/exp/slices"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/progress"
"github.com/ollama/ollama/readline"
"github.com/ollama/ollama/types/errtypes"
@ -183,7 +184,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
return err
}
if os.Getenv("OLLAMA_NOHISTORY") != "" {
if envconfig.NoHistory {
scanner.HistoryDisable()
}

View file

@ -15,6 +15,10 @@ var (
AllowOrigins []string
// Set via OLLAMA_DEBUG in the environment
Debug bool
// Experimental flash attention
FlashAttention bool
// Set via OLLAMA_KEEP_ALIVE in the environment
KeepAlive string
// Set via OLLAMA_LLM_LIBRARY in the environment
LLMLibrary string
// Set via OLLAMA_MAX_LOADED_MODELS in the environment
@ -23,6 +27,8 @@ var (
MaxQueuedRequests int
// Set via OLLAMA_MAX_VRAM in the environment
MaxVRAM uint64
// Set via OLLAMA_NOHISTORY in the environment
NoHistory bool
// Set via OLLAMA_NOPRUNE in the environment
NoPrune bool
// Set via OLLAMA_NUM_PARALLEL in the environment
@ -31,26 +37,42 @@ var (
RunnersDir string
// Set via OLLAMA_TMPDIR in the environment
TmpDir string
// Experimental flash attention
FlashAttention bool
)
func AsMap() map[string]string {
return map[string]string{
"OLLAMA_ORIGINS": fmt.Sprintf("%v", AllowOrigins),
"OLLAMA_DEBUG": fmt.Sprintf("%v", Debug),
"OLLAMA_LLM_LIBRARY": fmt.Sprintf("%v", LLMLibrary),
"OLLAMA_MAX_LOADED_MODELS": fmt.Sprintf("%v", MaxRunners),
"OLLAMA_MAX_QUEUE": fmt.Sprintf("%v", MaxQueuedRequests),
"OLLAMA_MAX_VRAM": fmt.Sprintf("%v", MaxVRAM),
"OLLAMA_NOPRUNE": fmt.Sprintf("%v", NoPrune),
"OLLAMA_NUM_PARALLEL": fmt.Sprintf("%v", NumParallel),
"OLLAMA_RUNNERS_DIR": fmt.Sprintf("%v", RunnersDir),
"OLLAMA_TMPDIR": fmt.Sprintf("%v", TmpDir),
"OLLAMA_FLASH_ATTENTION": fmt.Sprintf("%v", FlashAttention),
type EnvVar struct {
Name string
Value any
Description string
}
func AsMap() map[string]EnvVar {
return map[string]EnvVar{
"OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug, "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
"OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention, "Enabled flash attention"},
"OLLAMA_HOST": {"OLLAMA_HOST", "", "IP Address for the ollama server (default 127.0.0.1:11434)"},
"OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive, "The duration that models stay loaded in memory (default \"5m\")"},
"OLLAMA_LLM_LIBRARY": {"OLLAMA_ORIGINS", LLMLibrary, ""},
"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners, "Maximum number of loaded models (default 1)"},
"OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueuedRequests, "Maximum number of queued requests"},
"OLLAMA_MAX_VRAM": {"OLLAMA_MAX_VRAM", MaxVRAM, ""},
"OLLAMA_MODELS": {"OLLAMA_MODELS", "", "The path to the models directory"},
"OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory, "Do not preserve readline history"},
"OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune, "Do not prune model blobs on startup"},
"OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel, "Maximum number of parallel requests (default 1)"},
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowOrigins, "A comma separated list of allowed origins"},
"OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir, ""},
"OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir, "Location for temporary files"},
}
}
func Values() map[string]string {
vals := make(map[string]string)
for k, v := range AsMap() {
vals[k] = fmt.Sprintf("%v", v.Value)
}
return vals
}
var defaultAllowOrigins = []string{
"localhost",
"127.0.0.1",
@ -147,6 +169,10 @@ func LoadConfig() {
}
}
if nohistory := clean("OLLAMA_NOHISTORY"); nohistory != "" {
NoHistory = true
}
if noprune := clean("OLLAMA_NOPRUNE"); noprune != "" {
NoPrune = true
}
@ -181,4 +207,6 @@ func LoadConfig() {
MaxQueuedRequests = p
}
}
KeepAlive = clean("OLLAMA_KEEP_ALIVE")
}

View file

@ -13,7 +13,7 @@ import (
"syscall"
"time"
"github.com/ollama/ollama/server/envconfig"
"github.com/ollama/ollama/envconfig"
)
var (

View file

@ -21,7 +21,7 @@ import (
"unsafe"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/server/envconfig"
"github.com/ollama/ollama/envconfig"
)
type handles struct {

View file

@ -7,7 +7,7 @@ import (
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/gpu"
"github.com/ollama/ollama/server/envconfig"
"github.com/ollama/ollama/envconfig"
)
// This algorithm looks for a complete fit to determine if we need to unload other models

View file

@ -26,7 +26,7 @@ import (
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/gpu"
"github.com/ollama/ollama/server/envconfig"
"github.com/ollama/ollama/envconfig"
)
type LlamaServer interface {

View file

@ -28,7 +28,7 @@ import (
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/parser"
"github.com/ollama/ollama/server/envconfig"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/types/errtypes"
"github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/version"

View file

@ -26,11 +26,11 @@ import (
"golang.org/x/exp/slices"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/gpu"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/openai"
"github.com/ollama/ollama/parser"
"github.com/ollama/ollama/server/envconfig"
"github.com/ollama/ollama/types/errtypes"
"github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/version"
@ -315,10 +315,10 @@ func (s *Server) GenerateHandler(c *gin.Context) {
}
func getDefaultSessionDuration() time.Duration {
if t, exists := os.LookupEnv("OLLAMA_KEEP_ALIVE"); exists {
v, err := strconv.Atoi(t)
if envconfig.KeepAlive != "" {
v, err := strconv.Atoi(envconfig.KeepAlive)
if err != nil {
d, err := time.ParseDuration(t)
d, err := time.ParseDuration(envconfig.KeepAlive)
if err != nil {
return defaultSessionDuration
}
@ -1025,7 +1025,7 @@ func Serve(ln net.Listener) error {
level = slog.LevelDebug
}
slog.Info("server config", "env", envconfig.AsMap())
slog.Info("server config", "env", envconfig.Values())
handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
Level: level,
AddSource: true,

View file

@ -209,14 +209,14 @@ func Test_Routes(t *testing.T) {
},
}
t.Setenv("OLLAMA_MODELS", t.TempDir())
s := &Server{}
router := s.GenerateRoutes()
httpSrv := httptest.NewServer(router)
t.Cleanup(httpSrv.Close)
t.Setenv("OLLAMA_MODELS", t.TempDir())
for _, tc := range testCases {
t.Run(tc.Name, func(t *testing.T) {
u := httpSrv.URL + tc.Path

View file

@ -16,7 +16,7 @@ import (
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/gpu"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/server/envconfig"
"github.com/ollama/ollama/envconfig"
"golang.org/x/exp/slices"
)

View file

@ -15,7 +15,7 @@ import (
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/gpu"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/server/envconfig"
"github.com/ollama/ollama/envconfig"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)