fix the cpu estimatedTotal memory + get the expiry time for loading models (#4461)
This commit is contained in:
parent
5fa36a0833
commit
d1692fd3e0
2 changed files with 9 additions and 0 deletions
|
@ -89,6 +89,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||||
|
|
||||||
cpuRunner = serverForCpu()
|
cpuRunner = serverForCpu()
|
||||||
gpuCount = 0
|
gpuCount = 0
|
||||||
|
_, _, estimatedTotal = EstimateGPULayers(gpus, ggml, projectors, opts)
|
||||||
} else {
|
} else {
|
||||||
if gpus[0].Library == "metal" {
|
if gpus[0].Library == "metal" {
|
||||||
memInfo, err := gpu.GetCPUMem()
|
memInfo, err := gpu.GetCPUMem()
|
||||||
|
|
|
@ -1161,6 +1161,14 @@ func (s *Server) ProcessHandler(c *gin.Context) {
|
||||||
Details: modelDetails,
|
Details: modelDetails,
|
||||||
ExpiresAt: v.expiresAt,
|
ExpiresAt: v.expiresAt,
|
||||||
}
|
}
|
||||||
|
// The scheduler waits to set expiresAt, so if a model is loading it's
|
||||||
|
// possible that it will be set to the unix epoch. For those cases, just
|
||||||
|
// calculate the time w/ the sessionDuration instead.
|
||||||
|
var epoch time.Time
|
||||||
|
if v.expiresAt == epoch {
|
||||||
|
mr.ExpiresAt = time.Now().Add(v.sessionDuration)
|
||||||
|
}
|
||||||
|
|
||||||
models = append(models, mr)
|
models = append(models, mr)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue