better estimate scratch buffer size
This commit is contained in:
parent
18ddf6d57d
commit
58ce2d8273
1 changed files with 2 additions and 2 deletions
|
@ -62,8 +62,8 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
|
||||||
|
|
||||||
// this amount is the overhead + tensors in memory
|
// this amount is the overhead + tensors in memory
|
||||||
// TODO: get this from the llama.cpp's graph calcluations instead of
|
// TODO: get this from the llama.cpp's graph calcluations instead of
|
||||||
// guessing it's ~1/7th of the kv cache times gqa
|
// estimating it's 1/6 * kv_cache_size * num_gqa
|
||||||
requiredAlloc := int64(ggml.NumGQA()) * requiredKv / 7
|
requiredAlloc := int64(ggml.NumGQA()) * requiredKv / 6
|
||||||
|
|
||||||
requiredTotal := requiredModel + requiredKv + requiredAlloc
|
requiredTotal := requiredModel + requiredKv + requiredAlloc
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue