From 5a739ff4cb27f7804903adfb674f8a1e197ea86f Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Wed, 10 Jul 2024 13:18:04 -0700 Subject: [PATCH] chatglm graph --- llm/ggml.go | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/llm/ggml.go b/llm/ggml.go index cfead450..fddb5039 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -424,6 +424,32 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui 4*batch*(3*embedding+vocab)+embedding*vocab*105/128, 4*batch*(2*embedding+1+2*embeddingHeadsK*headsKV+context+context*headsKV)+4*embeddingHeadsK*context*headsKV+embedding*embeddingHeadsK*headsKV*9/16, ) + case "chatglm": + fullOffload = 4 * batch * (embedding + vocab) + partialOffload = 4*batch*(embedding+vocab) + embedding*vocab*105/128 + if qkvBias, ok := layers["blk.0"]["attn_qkv.bias"]; ok { + fullOffload = max( + fullOffload, + 4*batch*(2+ + 2*embedding+ + context+ + context*heads+ + embeddingHeadsK*heads+ + qkvBias.Shape[0]), + ) + + partialOffload = max( + partialOffload, + 4*batch*(1+ + 2*embedding+ + embeddingHeadsK*heads+ + context+ + context*heads)+ + 4*embeddingHeadsK*context+ + 4*context*embeddingHeadsK+ + 4*qkvBias.Shape[0], + ) + } } return