diff --git a/llm/ggml.go b/llm/ggml.go index 932efe62..f40f17e5 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -381,6 +381,12 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui ) partialOffload = 4*batch*(2*embedding+vocab) + embedding*vocab*105/128 + case "stablelm": + fullOffload = 4 * batch * (context*(1+heads) + 3*embedding + 2) + partialOffload = max( + 4*batch*(vocab+2*embedding), + fullOffload, + ) } return diff --git a/llm/server.go b/llm/server.go index a9bfdc47..3cab6f1d 100644 --- a/llm/server.go +++ b/llm/server.go @@ -112,7 +112,6 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option var memoryLayerOutput uint64 for k, v := range layers { if !strings.HasPrefix(k, "blk.") { - slog.Info("aaa", "name", k, "size", format.HumanBytes2(v.size())) memoryLayerOutput += v.size() } }