Merge pull request #5117 from dhiltgen/fix_prediction

Handle models with divergent layer sizes
This commit is contained in:
Daniel Hiltgen 2024-06-18 11:36:51 -07:00 committed by GitHub
commit 26d0bf9236
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,6 +1,7 @@
package llm
import (
"fmt"
"log/slog"
"strconv"
"strings"
@ -179,6 +180,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
// For all the layers, find where they can fit on the GPU(s)
for i := range int(ggml.KV().BlockCount()) {
// Some models have inconsistent layer sizes
if blk, ok := layers[fmt.Sprintf("blk.%d", i)]; ok {
layerSize = blk.size()
layerSize += kv / ggml.KV().BlockCount()
}
memoryWeights += layerSize
if opts.NumGPU >= 0 && layerCount >= opts.NumGPU {