From 993cf8bf55745a46ce756461008bbea3ad8e9cb1 Mon Sep 17 00:00:00 2001
From: Jeffrey Morgan <jmorganca@gmail.com>
Date: Thu, 25 Apr 2024 19:02:30 -0400
Subject: [PATCH] llm: limit generation to 10x context size to avoid run on
 generations (#3918)

* llm: limit generation to 10x context size to avoid run on generations

* add comment

* simplify condition statement
---
 api/types.go  | 6 ++++--
 llm/server.go | 7 +++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/api/types.go b/api/types.go
index 7fe2b4e4..9200949c 100644
--- a/api/types.go
+++ b/api/types.go
@@ -396,8 +396,10 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
 func DefaultOptions() Options {
 	return Options{
 		// options set on request to runner
-		NumPredict:       -1,
-		NumKeep:          0,
+		NumPredict: -1,
+
+		// set a minimal num_keep to avoid issues on context shifts
+		NumKeep:          4,
 		Temperature:      0.8,
 		TopK:             40,
 		TopP:             0.9,
diff --git a/llm/server.go b/llm/server.go
index da725bc3..14d64c19 100644
--- a/llm/server.go
+++ b/llm/server.go
@@ -560,6 +560,13 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
 		return err
 	}
 	defer s.sem.Release(1)
+
+	// only allow maximum 10 "context shifts" to avoid infinite generation
+	if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
+		req.Options.NumPredict = 10 * s.options.NumCtx
+		slog.Debug("setting token limit to 10x num_ctx", "num_ctx", s.options.NumCtx, "num_predict", req.Options.NumPredict)
+	}
+
 	request := map[string]any{
 		"prompt":            req.Prompt,
 		"stream":            true,