add phi2 mem

This commit is contained in:
Michael Yang 2024-05-10 12:13:28 -07:00
parent bb6fd02298
commit 1eb382da5a

View file

@ -329,7 +329,10 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
4*batch*(1+4*embedding+context+context*heads), 4*batch*(1+4*embedding+context+context*heads),
) )
partialOffload = 4*batch*(2*embedding+vocab) + embedding*vocab*105/128 partialOffload = max(
4*batch*(2*embedding+vocab)+embedding*vocab*105/128,
4*batch*(2+3*embedding+context+context*heads),
)
case "stablelm": case "stablelm":
fullOffload = 4 * batch * (context*(1+heads) + 3*embedding + 2) fullOffload = 4 * batch * (context*(1+heads) + 3*embedding + 2)
partialOffload = max( partialOffload = max(