Add back memory escape valve
If we get our predictions wrong, this can be used to set a lower memory limit as a workaround. Recent multi-gpu refactoring accidentally removed it, so this adds it back.
This commit is contained in:
parent
2ac3dd6853
commit
5445aaa94e
1 changed files with 13 additions and 0 deletions
|
@ -3,6 +3,8 @@ package llm
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
|
@ -49,6 +51,17 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
||||||
for _, info := range gpus {
|
for _, info := range gpus {
|
||||||
memoryAvailable += info.FreeMemory
|
memoryAvailable += info.FreeMemory
|
||||||
}
|
}
|
||||||
|
userLimit := os.Getenv("OLLAMA_MAX_VRAM")
|
||||||
|
if userLimit != "" {
|
||||||
|
avail, err := strconv.ParseUint(userLimit, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("invalid setting, ignoring", "OLLAMA_MAX_VRAM", userLimit, "error", err)
|
||||||
|
} else {
|
||||||
|
slog.Info("user override memory limit", "OLLAMA_MAX_VRAM", avail, "actual", memoryAvailable)
|
||||||
|
memoryAvailable = avail
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
slog.Debug("evaluating", "library", gpus[0].Library, "gpu_count", len(gpus), "available", format.HumanBytes2(memoryAvailable))
|
slog.Debug("evaluating", "library", gpus[0].Library, "gpu_count", len(gpus), "available", format.HumanBytes2(memoryAvailable))
|
||||||
|
|
||||||
// TODO - this is probably wrong, first GPU vs secondaries will have different overheads
|
// TODO - this is probably wrong, first GPU vs secondaries will have different overheads
|
||||||
|
|
Loading…
Reference in a new issue