Merge pull request #3712 from ollama/mxyng/mem
add stablelm graph calculation
This commit is contained in:
commit
8645076a71
2 changed files with 6 additions and 1 deletions
|
@ -381,6 +381,12 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
|
||||||
)
|
)
|
||||||
|
|
||||||
partialOffload = 4*batch*(2*embedding+vocab) + embedding*vocab*105/128
|
partialOffload = 4*batch*(2*embedding+vocab) + embedding*vocab*105/128
|
||||||
|
case "stablelm":
|
||||||
|
fullOffload = 4 * batch * (context*(1+heads) + 3*embedding + 2)
|
||||||
|
partialOffload = max(
|
||||||
|
4*batch*(vocab+2*embedding),
|
||||||
|
fullOffload,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
|
@ -112,7 +112,6 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
|
||||||
var memoryLayerOutput uint64
|
var memoryLayerOutput uint64
|
||||||
for k, v := range layers {
|
for k, v := range layers {
|
||||||
if !strings.HasPrefix(k, "blk.") {
|
if !strings.HasPrefix(k, "blk.") {
|
||||||
slog.Info("aaa", "name", k, "size", format.HumanBytes2(v.size()))
|
|
||||||
memoryLayerOutput += v.size()
|
memoryLayerOutput += v.size()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue