llm: remove ambiguous comment when putting upper limit on predictions to avoid infinite generation (#5535)

This commit is contained in:
Jeffrey Morgan 2024-07-07 14:32:05 -04:00 committed by GitHub
parent d8def1ff94
commit 53da2c6965
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -699,10 +699,9 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
} }
defer s.sem.Release(1) defer s.sem.Release(1)
// only allow maximum 10 "context shifts" to avoid infinite generation // put an upper limit on num_predict to avoid the model running on forever
if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx { if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
req.Options.NumPredict = 10 * s.options.NumCtx req.Options.NumPredict = 10 * s.options.NumCtx
slog.Debug("setting token limit to 10x num_ctx", "num_ctx", s.options.NumCtx, "num_predict", req.Options.NumPredict)
} }
request := map[string]any{ request := map[string]any{