Move envconfig and consolidate env vars (#4608)
This commit is contained in:
parent
afd2b058b4
commit
4cc3be3035
14 changed files with 81 additions and 51 deletions
|
@ -6,7 +6,7 @@ import (
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
"github.com/ollama/ollama/server/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
)
|
)
|
||||||
|
|
||||||
func InitLogging() {
|
func InitLogging() {
|
||||||
|
|
41
cmd/cmd.go
41
cmd/cmd.go
|
@ -34,6 +34,7 @@ import (
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/auth"
|
"github.com/ollama/ollama/auth"
|
||||||
|
"github.com/ollama/ollama/envconfig"
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/parser"
|
"github.com/ollama/ollama/parser"
|
||||||
"github.com/ollama/ollama/progress"
|
"github.com/ollama/ollama/progress"
|
||||||
|
@ -1079,12 +1080,7 @@ func versionHandler(cmd *cobra.Command, _ []string) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type EnvironmentVar struct {
|
func appendEnvDocs(cmd *cobra.Command, envs []envconfig.EnvVar) {
|
||||||
Name string
|
|
||||||
Description string
|
|
||||||
}
|
|
||||||
|
|
||||||
func appendEnvDocs(cmd *cobra.Command, envs []EnvironmentVar) {
|
|
||||||
if len(envs) == 0 {
|
if len(envs) == 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -1093,7 +1089,7 @@ func appendEnvDocs(cmd *cobra.Command, envs []EnvironmentVar) {
|
||||||
Environment Variables:
|
Environment Variables:
|
||||||
`
|
`
|
||||||
for _, e := range envs {
|
for _, e := range envs {
|
||||||
envUsage += fmt.Sprintf(" %-16s %s\n", e.Name, e.Description)
|
envUsage += fmt.Sprintf(" %-24s %s\n", e.Name, e.Description)
|
||||||
}
|
}
|
||||||
|
|
||||||
cmd.SetUsageTemplate(cmd.UsageTemplate() + envUsage)
|
cmd.SetUsageTemplate(cmd.UsageTemplate() + envUsage)
|
||||||
|
@ -1172,15 +1168,6 @@ func NewCLI() *cobra.Command {
|
||||||
Args: cobra.ExactArgs(0),
|
Args: cobra.ExactArgs(0),
|
||||||
RunE: RunServer,
|
RunE: RunServer,
|
||||||
}
|
}
|
||||||
serveCmd.SetUsageTemplate(serveCmd.UsageTemplate() + `
|
|
||||||
Environment Variables:
|
|
||||||
|
|
||||||
OLLAMA_HOST The host:port to bind to (default "127.0.0.1:11434")
|
|
||||||
OLLAMA_ORIGINS A comma separated list of allowed origins
|
|
||||||
OLLAMA_MODELS The path to the models directory (default "~/.ollama/models")
|
|
||||||
OLLAMA_KEEP_ALIVE The duration that models stay loaded in memory (default "5m")
|
|
||||||
OLLAMA_DEBUG Set to 1 to enable additional debug logging
|
|
||||||
`)
|
|
||||||
|
|
||||||
pullCmd := &cobra.Command{
|
pullCmd := &cobra.Command{
|
||||||
Use: "pull MODEL",
|
Use: "pull MODEL",
|
||||||
|
@ -1233,9 +1220,9 @@ Environment Variables:
|
||||||
RunE: DeleteHandler,
|
RunE: DeleteHandler,
|
||||||
}
|
}
|
||||||
|
|
||||||
ollamaHostEnv := EnvironmentVar{"OLLAMA_HOST", "The host:port or base URL of the Ollama server (e.g. http://localhost:11434)"}
|
envVars := envconfig.AsMap()
|
||||||
ollamaNoHistoryEnv := EnvironmentVar{"OLLAMA_NOHISTORY", "Disable readline history"}
|
|
||||||
envs := []EnvironmentVar{ollamaHostEnv}
|
envs := []envconfig.EnvVar{envVars["OLLAMA_HOST"]}
|
||||||
|
|
||||||
for _, cmd := range []*cobra.Command{
|
for _, cmd := range []*cobra.Command{
|
||||||
createCmd,
|
createCmd,
|
||||||
|
@ -1247,10 +1234,24 @@ Environment Variables:
|
||||||
psCmd,
|
psCmd,
|
||||||
copyCmd,
|
copyCmd,
|
||||||
deleteCmd,
|
deleteCmd,
|
||||||
|
serveCmd,
|
||||||
} {
|
} {
|
||||||
switch cmd {
|
switch cmd {
|
||||||
case runCmd:
|
case runCmd:
|
||||||
appendEnvDocs(cmd, []EnvironmentVar{ollamaHostEnv, ollamaNoHistoryEnv})
|
appendEnvDocs(cmd, []envconfig.EnvVar{envVars["OLLAMA_HOST"], envVars["OLLAMA_NOHISTORY"]})
|
||||||
|
case serveCmd:
|
||||||
|
appendEnvDocs(cmd, []envconfig.EnvVar{
|
||||||
|
envVars["OLLAMA_DEBUG"],
|
||||||
|
envVars["OLLAMA_HOST"],
|
||||||
|
envVars["OLLAMA_KEEP_ALIVE"],
|
||||||
|
envVars["OLLAMA_MAX_LOADED_MODELS"],
|
||||||
|
envVars["OLLAMA_MAX_QUEUE"],
|
||||||
|
envVars["OLLAMA_MODELS"],
|
||||||
|
envVars["OLLAMA_NUM_PARALLEL"],
|
||||||
|
envVars["OLLAMA_NOPRUNE"],
|
||||||
|
envVars["OLLAMA_ORIGINS"],
|
||||||
|
envVars["OLLAMA_TMPDIR"],
|
||||||
|
})
|
||||||
default:
|
default:
|
||||||
appendEnvDocs(cmd, envs)
|
appendEnvDocs(cmd, envs)
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,6 +15,7 @@ import (
|
||||||
"golang.org/x/exp/slices"
|
"golang.org/x/exp/slices"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
|
"github.com/ollama/ollama/envconfig"
|
||||||
"github.com/ollama/ollama/progress"
|
"github.com/ollama/ollama/progress"
|
||||||
"github.com/ollama/ollama/readline"
|
"github.com/ollama/ollama/readline"
|
||||||
"github.com/ollama/ollama/types/errtypes"
|
"github.com/ollama/ollama/types/errtypes"
|
||||||
|
@ -183,7 +184,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if os.Getenv("OLLAMA_NOHISTORY") != "" {
|
if envconfig.NoHistory {
|
||||||
scanner.HistoryDisable()
|
scanner.HistoryDisable()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -15,6 +15,10 @@ var (
|
||||||
AllowOrigins []string
|
AllowOrigins []string
|
||||||
// Set via OLLAMA_DEBUG in the environment
|
// Set via OLLAMA_DEBUG in the environment
|
||||||
Debug bool
|
Debug bool
|
||||||
|
// Experimental flash attention
|
||||||
|
FlashAttention bool
|
||||||
|
// Set via OLLAMA_KEEP_ALIVE in the environment
|
||||||
|
KeepAlive string
|
||||||
// Set via OLLAMA_LLM_LIBRARY in the environment
|
// Set via OLLAMA_LLM_LIBRARY in the environment
|
||||||
LLMLibrary string
|
LLMLibrary string
|
||||||
// Set via OLLAMA_MAX_LOADED_MODELS in the environment
|
// Set via OLLAMA_MAX_LOADED_MODELS in the environment
|
||||||
|
@ -23,6 +27,8 @@ var (
|
||||||
MaxQueuedRequests int
|
MaxQueuedRequests int
|
||||||
// Set via OLLAMA_MAX_VRAM in the environment
|
// Set via OLLAMA_MAX_VRAM in the environment
|
||||||
MaxVRAM uint64
|
MaxVRAM uint64
|
||||||
|
// Set via OLLAMA_NOHISTORY in the environment
|
||||||
|
NoHistory bool
|
||||||
// Set via OLLAMA_NOPRUNE in the environment
|
// Set via OLLAMA_NOPRUNE in the environment
|
||||||
NoPrune bool
|
NoPrune bool
|
||||||
// Set via OLLAMA_NUM_PARALLEL in the environment
|
// Set via OLLAMA_NUM_PARALLEL in the environment
|
||||||
|
@ -31,26 +37,42 @@ var (
|
||||||
RunnersDir string
|
RunnersDir string
|
||||||
// Set via OLLAMA_TMPDIR in the environment
|
// Set via OLLAMA_TMPDIR in the environment
|
||||||
TmpDir string
|
TmpDir string
|
||||||
// Experimental flash attention
|
|
||||||
FlashAttention bool
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func AsMap() map[string]string {
|
type EnvVar struct {
|
||||||
return map[string]string{
|
Name string
|
||||||
"OLLAMA_ORIGINS": fmt.Sprintf("%v", AllowOrigins),
|
Value any
|
||||||
"OLLAMA_DEBUG": fmt.Sprintf("%v", Debug),
|
Description string
|
||||||
"OLLAMA_LLM_LIBRARY": fmt.Sprintf("%v", LLMLibrary),
|
}
|
||||||
"OLLAMA_MAX_LOADED_MODELS": fmt.Sprintf("%v", MaxRunners),
|
|
||||||
"OLLAMA_MAX_QUEUE": fmt.Sprintf("%v", MaxQueuedRequests),
|
func AsMap() map[string]EnvVar {
|
||||||
"OLLAMA_MAX_VRAM": fmt.Sprintf("%v", MaxVRAM),
|
return map[string]EnvVar{
|
||||||
"OLLAMA_NOPRUNE": fmt.Sprintf("%v", NoPrune),
|
"OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug, "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
|
||||||
"OLLAMA_NUM_PARALLEL": fmt.Sprintf("%v", NumParallel),
|
"OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention, "Enabled flash attention"},
|
||||||
"OLLAMA_RUNNERS_DIR": fmt.Sprintf("%v", RunnersDir),
|
"OLLAMA_HOST": {"OLLAMA_HOST", "", "IP Address for the ollama server (default 127.0.0.1:11434)"},
|
||||||
"OLLAMA_TMPDIR": fmt.Sprintf("%v", TmpDir),
|
"OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive, "The duration that models stay loaded in memory (default \"5m\")"},
|
||||||
"OLLAMA_FLASH_ATTENTION": fmt.Sprintf("%v", FlashAttention),
|
"OLLAMA_LLM_LIBRARY": {"OLLAMA_ORIGINS", LLMLibrary, ""},
|
||||||
|
"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners, "Maximum number of loaded models (default 1)"},
|
||||||
|
"OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueuedRequests, "Maximum number of queued requests"},
|
||||||
|
"OLLAMA_MAX_VRAM": {"OLLAMA_MAX_VRAM", MaxVRAM, ""},
|
||||||
|
"OLLAMA_MODELS": {"OLLAMA_MODELS", "", "The path to the models directory"},
|
||||||
|
"OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory, "Do not preserve readline history"},
|
||||||
|
"OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune, "Do not prune model blobs on startup"},
|
||||||
|
"OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel, "Maximum number of parallel requests (default 1)"},
|
||||||
|
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowOrigins, "A comma separated list of allowed origins"},
|
||||||
|
"OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir, ""},
|
||||||
|
"OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir, "Location for temporary files"},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func Values() map[string]string {
|
||||||
|
vals := make(map[string]string)
|
||||||
|
for k, v := range AsMap() {
|
||||||
|
vals[k] = fmt.Sprintf("%v", v.Value)
|
||||||
|
}
|
||||||
|
return vals
|
||||||
|
}
|
||||||
|
|
||||||
var defaultAllowOrigins = []string{
|
var defaultAllowOrigins = []string{
|
||||||
"localhost",
|
"localhost",
|
||||||
"127.0.0.1",
|
"127.0.0.1",
|
||||||
|
@ -147,6 +169,10 @@ func LoadConfig() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if nohistory := clean("OLLAMA_NOHISTORY"); nohistory != "" {
|
||||||
|
NoHistory = true
|
||||||
|
}
|
||||||
|
|
||||||
if noprune := clean("OLLAMA_NOPRUNE"); noprune != "" {
|
if noprune := clean("OLLAMA_NOPRUNE"); noprune != "" {
|
||||||
NoPrune = true
|
NoPrune = true
|
||||||
}
|
}
|
||||||
|
@ -181,4 +207,6 @@ func LoadConfig() {
|
||||||
MaxQueuedRequests = p
|
MaxQueuedRequests = p
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
KeepAlive = clean("OLLAMA_KEEP_ALIVE")
|
||||||
}
|
}
|
|
@ -13,7 +13,7 @@ import (
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ollama/ollama/server/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|
|
@ -21,7 +21,7 @@ import (
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/server/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
)
|
)
|
||||||
|
|
||||||
type handles struct {
|
type handles struct {
|
||||||
|
|
|
@ -7,7 +7,7 @@ import (
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/gpu"
|
"github.com/ollama/ollama/gpu"
|
||||||
"github.com/ollama/ollama/server/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
)
|
)
|
||||||
|
|
||||||
// This algorithm looks for a complete fit to determine if we need to unload other models
|
// This algorithm looks for a complete fit to determine if we need to unload other models
|
||||||
|
|
|
@ -26,7 +26,7 @@ import (
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/gpu"
|
"github.com/ollama/ollama/gpu"
|
||||||
"github.com/ollama/ollama/server/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
)
|
)
|
||||||
|
|
||||||
type LlamaServer interface {
|
type LlamaServer interface {
|
||||||
|
|
|
@ -28,7 +28,7 @@ import (
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
"github.com/ollama/ollama/parser"
|
"github.com/ollama/ollama/parser"
|
||||||
"github.com/ollama/ollama/server/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
"github.com/ollama/ollama/types/errtypes"
|
"github.com/ollama/ollama/types/errtypes"
|
||||||
"github.com/ollama/ollama/types/model"
|
"github.com/ollama/ollama/types/model"
|
||||||
"github.com/ollama/ollama/version"
|
"github.com/ollama/ollama/version"
|
||||||
|
|
|
@ -26,11 +26,11 @@ import (
|
||||||
"golang.org/x/exp/slices"
|
"golang.org/x/exp/slices"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
|
"github.com/ollama/ollama/envconfig"
|
||||||
"github.com/ollama/ollama/gpu"
|
"github.com/ollama/ollama/gpu"
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
"github.com/ollama/ollama/openai"
|
"github.com/ollama/ollama/openai"
|
||||||
"github.com/ollama/ollama/parser"
|
"github.com/ollama/ollama/parser"
|
||||||
"github.com/ollama/ollama/server/envconfig"
|
|
||||||
"github.com/ollama/ollama/types/errtypes"
|
"github.com/ollama/ollama/types/errtypes"
|
||||||
"github.com/ollama/ollama/types/model"
|
"github.com/ollama/ollama/types/model"
|
||||||
"github.com/ollama/ollama/version"
|
"github.com/ollama/ollama/version"
|
||||||
|
@ -315,10 +315,10 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func getDefaultSessionDuration() time.Duration {
|
func getDefaultSessionDuration() time.Duration {
|
||||||
if t, exists := os.LookupEnv("OLLAMA_KEEP_ALIVE"); exists {
|
if envconfig.KeepAlive != "" {
|
||||||
v, err := strconv.Atoi(t)
|
v, err := strconv.Atoi(envconfig.KeepAlive)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
d, err := time.ParseDuration(t)
|
d, err := time.ParseDuration(envconfig.KeepAlive)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return defaultSessionDuration
|
return defaultSessionDuration
|
||||||
}
|
}
|
||||||
|
@ -1025,7 +1025,7 @@ func Serve(ln net.Listener) error {
|
||||||
level = slog.LevelDebug
|
level = slog.LevelDebug
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Info("server config", "env", envconfig.AsMap())
|
slog.Info("server config", "env", envconfig.Values())
|
||||||
handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
|
handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
|
||||||
Level: level,
|
Level: level,
|
||||||
AddSource: true,
|
AddSource: true,
|
||||||
|
|
|
@ -209,14 +209,14 @@ func Test_Routes(t *testing.T) {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
t.Setenv("OLLAMA_MODELS", t.TempDir())
|
||||||
|
|
||||||
s := &Server{}
|
s := &Server{}
|
||||||
router := s.GenerateRoutes()
|
router := s.GenerateRoutes()
|
||||||
|
|
||||||
httpSrv := httptest.NewServer(router)
|
httpSrv := httptest.NewServer(router)
|
||||||
t.Cleanup(httpSrv.Close)
|
t.Cleanup(httpSrv.Close)
|
||||||
|
|
||||||
t.Setenv("OLLAMA_MODELS", t.TempDir())
|
|
||||||
|
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
t.Run(tc.Name, func(t *testing.T) {
|
t.Run(tc.Name, func(t *testing.T) {
|
||||||
u := httpSrv.URL + tc.Path
|
u := httpSrv.URL + tc.Path
|
||||||
|
|
|
@ -16,7 +16,7 @@ import (
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/gpu"
|
"github.com/ollama/ollama/gpu"
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
"github.com/ollama/ollama/server/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
"golang.org/x/exp/slices"
|
"golang.org/x/exp/slices"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,7 @@ import (
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/gpu"
|
"github.com/ollama/ollama/gpu"
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
"github.com/ollama/ollama/server/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in a new issue