llm: looser checks for minimum memory (#5677)

This commit is contained in:
Jeffrey Morgan 2024-07-13 09:20:05 -07:00 committed by GitHub
parent 02fea420e5
commit ef98803d63
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -127,7 +127,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
// On linux, over-allocating CPU memory will almost always result in an error // On linux, over-allocating CPU memory will almost always result in an error
if runtime.GOOS == "linux" { if runtime.GOOS == "linux" {
systemMemoryRequired := estimate.TotalSize - estimate.VRAMSize systemMemoryRequired := estimate.TotalSize - estimate.VRAMSize
available := min(systemTotalMemory, systemFreeMemory+systemSwapFreeMemory) available := systemFreeMemory + systemSwapFreeMemory
if systemMemoryRequired > available { if systemMemoryRequired > available {
slog.Warn("model request too large for system", "requested", format.HumanBytes2(systemMemoryRequired), "available", available, "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "swap", format.HumanBytes2(systemSwapFreeMemory)) slog.Warn("model request too large for system", "requested", format.HumanBytes2(systemMemoryRequired), "available", available, "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "swap", format.HumanBytes2(systemSwapFreeMemory))
return nil, fmt.Errorf("model requires more system memory (%s) than is available (%s)", format.HumanBytes2(systemMemoryRequired), format.HumanBytes2(available)) return nil, fmt.Errorf("model requires more system memory (%s) than is available (%s)", format.HumanBytes2(systemMemoryRequired), format.HumanBytes2(available))