From d7eb05b9361febead29a74e71ddffc2ebeff5302 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Tue, 12 Nov 2024 10:41:44 -0800 Subject: [PATCH] runner.go: Fix off-by-one for num predicted --- llama/runner/runner.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama/runner/runner.go b/llama/runner/runner.go index b680f060..cff7d148 100644 --- a/llama/runner/runner.go +++ b/llama/runner/runner.go @@ -345,7 +345,7 @@ func (s *Server) processBatch(tokenBatch *llama.Batch, embedBatch *llama.Batch) } // if past the num predict limit - if seq.numPredict > 0 && seq.numPredicted > seq.numPredict { + if seq.numPredict > 0 && seq.numPredicted >= seq.numPredict { s.removeSequence(seqIdx, "limit") continue }