From 73e2c8f68fe075ea159a20bbf778c0cf801316ad Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 9 Jul 2024 15:28:25 -0700 Subject: [PATCH] Fix context exhaustion integration test for small gpus On the smaller GPUs, the initial model load of llama2 took over 30s (the default timeout for the DoGenerate helper) --- integration/context_test.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/integration/context_test.go b/integration/context_test.go index 46fac5ea..f1342e16 100644 --- a/integration/context_test.go +++ b/integration/context_test.go @@ -12,7 +12,7 @@ import ( func TestContextExhaustion(t *testing.T) { // Longer needed for small footprint GPUs - ctx, cancel := context.WithTimeout(context.Background(), 6*time.Minute) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) defer cancel() // Set up the test data req := api.GenerateRequest{ @@ -25,5 +25,10 @@ func TestContextExhaustion(t *testing.T) { "num_ctx": 128, }, } - GenerateTestHelper(ctx, t, req, []string{"once", "upon", "lived"}) + client, _, cleanup := InitServerConnection(ctx, t) + defer cleanup() + if err := PullIfMissing(ctx, client, req.Model); err != nil { + t.Fatalf("PullIfMissing failed: %v", err) + } + DoGenerate(ctx, t, client, req, []string{"once", "upon", "lived"}, 120*time.Second, 10*time.Second) }