From ef98803d63a4e4c56853688343f011256ced130d Mon Sep 17 00:00:00 2001 From: Jeffrey Morgan Date: Sat, 13 Jul 2024 09:20:05 -0700 Subject: [PATCH] llm: looser checks for minimum memory (#5677) --- llm/server.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llm/server.go b/llm/server.go index 8f37aa23..ffed9fc0 100644 --- a/llm/server.go +++ b/llm/server.go @@ -127,7 +127,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr // On linux, over-allocating CPU memory will almost always result in an error if runtime.GOOS == "linux" { systemMemoryRequired := estimate.TotalSize - estimate.VRAMSize - available := min(systemTotalMemory, systemFreeMemory+systemSwapFreeMemory) + available := systemFreeMemory + systemSwapFreeMemory if systemMemoryRequired > available { slog.Warn("model request too large for system", "requested", format.HumanBytes2(systemMemoryRequired), "available", available, "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "swap", format.HumanBytes2(systemSwapFreeMemory)) return nil, fmt.Errorf("model requires more system memory (%s) than is available (%s)", format.HumanBytes2(systemMemoryRequired), format.HumanBytes2(available))