diff --git a/integration/max_queue_test.go b/integration/max_queue_test.go index 43b15c6c..dfa5eae0 100644 --- a/integration/max_queue_test.go +++ b/integration/max_queue_test.go @@ -19,6 +19,11 @@ import ( ) func TestMaxQueue(t *testing.T) { + if os.Getenv("OLLAMA_TEST_EXISTING") != "" { + t.Skip("Max Queue test requires spawing a local server so we can adjust the queue size") + return + } + // Note: This test can be quite slow when running in CPU mode, so keep the threadCount low unless your on GPU // Also note that by default Darwin can't sustain > ~128 connections without adjusting limits threadCount := 32 @@ -109,9 +114,9 @@ func TestMaxQueue(t *testing.T) { slog.Info("generate done, waiting for embeds") embedwg.Wait() + slog.Info("embeds completed", "success", succesCount, "busy", busyCount, "reset", resetByPeerCount, "canceled", canceledCount) require.Equal(t, resetByPeerCount, 0, "Connections reset by peer, have you updated your fd and socket limits?") require.True(t, busyCount > 0, "no requests hit busy error but some should have") require.True(t, canceledCount == 0, "no requests should have been canceled due to timeout") - slog.Info("embeds completed", "success", succesCount, "busy", busyCount, "reset", resetByPeerCount, "canceled", canceledCount) }