only count output tensors
This commit is contained in:
parent
5f73c08729
commit
7bb7cb8a60
1 changed files with 18 additions and 9 deletions
|
@ -5,7 +5,6 @@ import (
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
|
@ -100,8 +99,22 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
||||||
return 0, 0
|
return 0, 0
|
||||||
}
|
}
|
||||||
|
|
||||||
var layerCount int
|
|
||||||
layers := ggml.Tensors().Layers()
|
layers := ggml.Tensors().Layers()
|
||||||
|
|
||||||
|
var memoryLayerOutput uint64
|
||||||
|
for k, v := range layers {
|
||||||
|
if k == "output" || k == "output_norm" {
|
||||||
|
memoryLayerOutput += v.size()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if gpus[0].Library == "metal" && opts.UseMMap {
|
||||||
|
// memory is preallocated for output tensors
|
||||||
|
memoryRequiredTotal += memoryLayerOutput
|
||||||
|
memoryRequiredPartial += memoryLayerOutput
|
||||||
|
}
|
||||||
|
|
||||||
|
var layerCount int
|
||||||
for i := 0; i < int(ggml.KV().BlockCount()); i++ {
|
for i := 0; i < int(ggml.KV().BlockCount()); i++ {
|
||||||
memoryLayer := layers[fmt.Sprintf("blk.%d", i)].size()
|
memoryLayer := layers[fmt.Sprintf("blk.%d", i)].size()
|
||||||
|
|
||||||
|
@ -115,15 +128,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var memoryLayerOutput uint64
|
if gpus[0].Library != "metal" || !opts.UseMMap {
|
||||||
for k, v := range layers {
|
// memory was not preallocated for output tensors
|
||||||
if !strings.HasPrefix(k, "blk.") {
|
memoryRequiredTotal += memoryLayerOutput
|
||||||
memoryLayerOutput += v.size()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
memoryRequiredTotal += memoryLayerOutput
|
|
||||||
|
|
||||||
if memoryAvailable > memoryRequiredTotal {
|
if memoryAvailable > memoryRequiredTotal {
|
||||||
layerCount = int(ggml.KV().BlockCount()) + 1
|
layerCount = int(ggml.KV().BlockCount()) + 1
|
||||||
memoryRequiredPartial = memoryRequiredTotal
|
memoryRequiredPartial = memoryRequiredTotal
|
||||||
|
|
Loading…
Reference in a new issue