From 993cf8bf55745a46ce756461008bbea3ad8e9cb1 Mon Sep 17 00:00:00 2001 From: Jeffrey Morgan Date: Thu, 25 Apr 2024 19:02:30 -0400 Subject: [PATCH] llm: limit generation to 10x context size to avoid run on generations (#3918) * llm: limit generation to 10x context size to avoid run on generations * add comment * simplify condition statement --- api/types.go | 6 ++++-- llm/server.go | 7 +++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/api/types.go b/api/types.go index 7fe2b4e4..9200949c 100644 --- a/api/types.go +++ b/api/types.go @@ -396,8 +396,10 @@ func (opts *Options) FromMap(m map[string]interface{}) error { func DefaultOptions() Options { return Options{ // options set on request to runner - NumPredict: -1, - NumKeep: 0, + NumPredict: -1, + + // set a minimal num_keep to avoid issues on context shifts + NumKeep: 4, Temperature: 0.8, TopK: 40, TopP: 0.9, diff --git a/llm/server.go b/llm/server.go index da725bc3..14d64c19 100644 --- a/llm/server.go +++ b/llm/server.go @@ -560,6 +560,13 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu return err } defer s.sem.Release(1) + + // only allow maximum 10 "context shifts" to avoid infinite generation + if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx { + req.Options.NumPredict = 10 * s.options.NumCtx + slog.Debug("setting token limit to 10x num_ctx", "num_ctx", s.options.NumCtx, "num_predict", req.Options.NumPredict) + } + request := map[string]any{ "prompt": req.Prompt, "stream": true,