Fix case for NumCtx
This commit is contained in:
parent
3518aaef33
commit
cff3f44f4a
1 changed files with 7 additions and 7 deletions
|
@ -23,7 +23,7 @@ type LlmRequest struct {
|
||||||
ctx context.Context //nolint:containedctx
|
ctx context.Context //nolint:containedctx
|
||||||
model *Model
|
model *Model
|
||||||
opts api.Options
|
opts api.Options
|
||||||
origNumCTX int // Track the initial ctx request
|
origNumCtx int // Track the initial ctx request
|
||||||
sessionDuration time.Duration
|
sessionDuration time.Duration
|
||||||
successCh chan *runnerRef
|
successCh chan *runnerRef
|
||||||
errCh chan error
|
errCh chan error
|
||||||
|
@ -118,8 +118,8 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
||||||
case pending := <-s.pendingReqCh:
|
case pending := <-s.pendingReqCh:
|
||||||
// Block other requests until we get this pending request running
|
// Block other requests until we get this pending request running
|
||||||
pending.schedAttempts++
|
pending.schedAttempts++
|
||||||
if pending.origNumCTX == 0 {
|
if pending.origNumCtx == 0 {
|
||||||
pending.origNumCTX = pending.opts.NumCtx
|
pending.origNumCtx = pending.opts.NumCtx
|
||||||
}
|
}
|
||||||
|
|
||||||
if pending.ctx.Err() != nil {
|
if pending.ctx.Err() != nil {
|
||||||
|
@ -135,7 +135,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
||||||
}
|
}
|
||||||
// Keep NumCtx and numParallel in sync
|
// Keep NumCtx and numParallel in sync
|
||||||
if numParallel > 1 {
|
if numParallel > 1 {
|
||||||
pending.opts.NumCtx = pending.origNumCTX * numParallel
|
pending.opts.NumCtx = pending.origNumCtx * numParallel
|
||||||
}
|
}
|
||||||
|
|
||||||
for {
|
for {
|
||||||
|
@ -197,7 +197,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
||||||
// simplifying assumption of defaultParallel when in CPU mode
|
// simplifying assumption of defaultParallel when in CPU mode
|
||||||
if numParallel <= 0 {
|
if numParallel <= 0 {
|
||||||
numParallel = defaultParallel
|
numParallel = defaultParallel
|
||||||
pending.opts.NumCtx = pending.origNumCTX * numParallel
|
pending.opts.NumCtx = pending.origNumCtx * numParallel
|
||||||
}
|
}
|
||||||
|
|
||||||
if loadedCount == 0 {
|
if loadedCount == 0 {
|
||||||
|
@ -691,7 +691,7 @@ func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList, numP
|
||||||
|
|
||||||
// First attempt to fit the model into a single GPU
|
// First attempt to fit the model into a single GPU
|
||||||
for _, p := range numParallelToTry {
|
for _, p := range numParallelToTry {
|
||||||
req.opts.NumCtx = req.origNumCTX * p
|
req.opts.NumCtx = req.origNumCtx * p
|
||||||
if !envconfig.SchedSpread {
|
if !envconfig.SchedSpread {
|
||||||
for _, g := range sgl {
|
for _, g := range sgl {
|
||||||
if ok, estimatedVRAM = llm.PredictServerFit([]gpu.GpuInfo{g}, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
|
if ok, estimatedVRAM = llm.PredictServerFit([]gpu.GpuInfo{g}, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
|
||||||
|
@ -709,7 +709,7 @@ func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList, numP
|
||||||
|
|
||||||
// Now try all the GPUs
|
// Now try all the GPUs
|
||||||
for _, p := range numParallelToTry {
|
for _, p := range numParallelToTry {
|
||||||
req.opts.NumCtx = req.origNumCTX * p
|
req.opts.NumCtx = req.origNumCtx * p
|
||||||
if ok, estimatedVRAM = llm.PredictServerFit(sgl, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
|
if ok, estimatedVRAM = llm.PredictServerFit(sgl, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
|
||||||
slog.Info("new model will fit in available VRAM, loading", "model", req.model.ModelPath, "library", sgl[0].Library, "parallel", p, "required", format.HumanBytes2(estimatedVRAM))
|
slog.Info("new model will fit in available VRAM, loading", "model", req.model.ModelPath, "library", sgl[0].Library, "parallel", p, "required", format.HumanBytes2(estimatedVRAM))
|
||||||
*numParallel = p
|
*numParallel = p
|
||||||
|
|
Loading…
Reference in a new issue