From f921e2696ed21e1f169ad26f69c21d0f2629840f Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Tue, 9 Jan 2024 09:45:42 -0800 Subject: [PATCH] typo --- llm/llm.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llm/llm.go b/llm/llm.go index 023077aa..0ef834b4 100644 --- a/llm/llm.go +++ b/llm/llm.go @@ -61,7 +61,7 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options) requiredKv := 2 * 2 * int64(opts.NumCtx) * int64(ggml.NumLayers()) * int64(ggml.NumEmbed()) * int64(ggml.NumHeadKv()) / int64(ggml.NumHead()) // this amount is the overhead + tensors in memory - // TODO: get this from the llama.cpp's graph calcluations instead of + // TODO: get this from the llama.cpp's graph calculations instead of // estimating it's 1/6 * kv_cache_size * num_gqa requiredAlloc := int64(ggml.NumGQA()) * requiredKv / 6