From 73e2c8f68fe075ea159a20bbf778c0cf801316ad Mon Sep 17 00:00:00 2001
From: Daniel Hiltgen <daniel@ollama.com>
Date: Tue, 9 Jul 2024 15:28:25 -0700
Subject: [PATCH] Fix context exhaustion integration test for small gpus

On the smaller GPUs, the initial model load of llama2 took over 30s (the
default timeout for the DoGenerate helper)
---
 integration/context_test.go | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/integration/context_test.go b/integration/context_test.go
index 46fac5ea..f1342e16 100644
--- a/integration/context_test.go
+++ b/integration/context_test.go
@@ -12,7 +12,7 @@ import (
 
 func TestContextExhaustion(t *testing.T) {
 	// Longer needed for small footprint GPUs
-	ctx, cancel := context.WithTimeout(context.Background(), 6*time.Minute)
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
 	defer cancel()
 	// Set up the test data
 	req := api.GenerateRequest{
@@ -25,5 +25,10 @@ func TestContextExhaustion(t *testing.T) {
 			"num_ctx":     128,
 		},
 	}
-	GenerateTestHelper(ctx, t, req, []string{"once", "upon", "lived"})
+	client, _, cleanup := InitServerConnection(ctx, t)
+	defer cleanup()
+	if err := PullIfMissing(ctx, client, req.Model); err != nil {
+		t.Fatalf("PullIfMissing failed: %v", err)
+	}
+	DoGenerate(ctx, t, client, req, []string{"once", "upon", "lived"}, 120*time.Second, 10*time.Second)
 }