Handle models with divergent layer sizes
The recent refactoring of the memory prediction assumed all layers are the same size, but for some models (like deepseek-coder-v2) this is not the case, so our predictions were significantly off.
This commit is contained in:
parent
b55958a587
commit
359b15a597
1 changed files with 6 additions and 0 deletions
|
@ -1,6 +1,7 @@
|
||||||
package llm
|
package llm
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
@ -179,6 +180,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
||||||
|
|
||||||
// For all the layers, find where they can fit on the GPU(s)
|
// For all the layers, find where they can fit on the GPU(s)
|
||||||
for i := range int(ggml.KV().BlockCount()) {
|
for i := range int(ggml.KV().BlockCount()) {
|
||||||
|
// Some models have inconsistent layer sizes
|
||||||
|
if blk, ok := layers[fmt.Sprintf("blk.%d", i)]; ok {
|
||||||
|
layerSize = blk.size()
|
||||||
|
layerSize += kv / ggml.KV().BlockCount()
|
||||||
|
}
|
||||||
memoryWeights += layerSize
|
memoryWeights += layerSize
|
||||||
|
|
||||||
if opts.NumGPU >= 0 && layerCount >= opts.NumGPU {
|
if opts.NumGPU >= 0 && layerCount >= opts.NumGPU {
|
||||||
|
|
Loading…
Add table
Reference in a new issue