From 1eb382da5a52e882497552256e7494a90c095467 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Fri, 10 May 2024 12:13:28 -0700 Subject: [PATCH] add phi2 mem --- llm/ggml.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llm/ggml.go b/llm/ggml.go index 1c21bde0..40089be2 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -329,7 +329,10 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui 4*batch*(1+4*embedding+context+context*heads), ) - partialOffload = 4*batch*(2*embedding+vocab) + embedding*vocab*105/128 + partialOffload = max( + 4*batch*(2*embedding+vocab)+embedding*vocab*105/128, + 4*batch*(2+3*embedding+context+context*heads), + ) case "stablelm": fullOffload = 4 * batch * (context*(1+heads) + 3*embedding + 2) partialOffload = max(