runner.go: Truncate inputs that exceed context rather than shifting
Previous versions of the runner would truncate inputs to the context window before beginning processing. The main processing loop relied on this behavior if the context needed to be shifted later (due to token generation). If truncation did not occur then invariants would be broken, causing crashes or infinite loops. Later versions attempted to fix these bugs and make the logic less subtle so that all inputs could be handled. Truncation was removed to make things consistent. However, truncation is much faster than processing and shifting, so removing it caused performance problems when the input vastly exceeded the context size. This restores the input truncation as a performance optimization while keeping the more robust processing logic. Fixes #7762
This commit is contained in:
parent
c3ff916431
commit
c4b34f2a2a
1 changed files with 4 additions and 1 deletions
|
@ -122,7 +122,10 @@ func (s *Server) NewSequence(prompt string, images []ImageData, params NewSequen
|
||||||
params.numKeep = min(params.numKeep, s.cache.numCtx-1)
|
params.numKeep = min(params.numKeep, s.cache.numCtx-1)
|
||||||
|
|
||||||
if len(inputs) > s.cache.numCtx {
|
if len(inputs) > s.cache.numCtx {
|
||||||
slog.Warn("input exceeds context length", "prompt", len(inputs), "limit", s.cache.numCtx)
|
slog.Warn("truncating input prompt", "limit", s.cache.numCtx, "prompt", len(inputs), "numKeep", params.numKeep)
|
||||||
|
newInputs := inputs[:params.numKeep]
|
||||||
|
newInputs = append(newInputs, inputs[len(inputs)-s.cache.numCtx+params.numKeep:]...)
|
||||||
|
inputs = newInputs
|
||||||
}
|
}
|
||||||
|
|
||||||
var sc *llama.SamplingContext
|
var sc *llama.SamplingContext
|
||||||
|
|
Loading…
Reference in a new issue