From bb1c8b6f1a0d860deefdc07f7415bb0b3416ce7f Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 2 Jun 2026 17:00:04 -0700 Subject: [PATCH] test(honcho): de-flake prewarm smoke test's thread wait (#37614) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TestDialecticLifecycleSmoke._await_thread did a single join(timeout=3.0) and then proceeded regardless of whether the background dialectic thread had finished. On a loaded CI runner (6 parallel test slices) the prewarm thread's completion can slip past that 3s window, so the join times out silently and the test reads _prefetch_result before the worker wrote it — the intermittent 'session-start prewarm must land in _prefetch_result' failure. Join in a loop up to a 30s ceiling and assert the thread is actually dead, so a genuine hang surfaces as a clear failure instead of a timing race. Reproduced the old failure deterministically (5/5 fails with a 3.5s prewarm delay) and confirmed the fix (0/8) before/after. --- tests/honcho_plugin/test_session.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/tests/honcho_plugin/test_session.py b/tests/honcho_plugin/test_session.py index cf47f3a38..e8dadf2f5 100644 --- a/tests/honcho_plugin/test_session.py +++ b/tests/honcho_plugin/test_session.py @@ -1,5 +1,7 @@ """Tests for plugins/memory/honcho/session.py — HonchoSession and helpers.""" +import time + from datetime import datetime from types import SimpleNamespace from unittest.mock import MagicMock @@ -1538,8 +1540,27 @@ class TestDialecticLifecycleSmoke: return provider, mock_manager, cfg def _await_thread(self, provider): - if provider._prefetch_thread: - provider._prefetch_thread.join(timeout=3.0) + """Block until the in-flight prefetch/prewarm thread has fully finished. + + The earlier version did a single ``join(timeout=3.0)`` and then + proceeded regardless of whether the thread had actually finished. On a + loaded CI runner (6 parallel test slices), the background dialectic + thread's completion can slip past that 3s window, so the join times out + silently and the test reads ``_prefetch_result`` before the worker wrote + it — a flaky ``session-start prewarm must land`` failure. We instead join + in a loop up to a generous ceiling and assert the thread is dead, so a + genuine hang surfaces as a clear, non-flaky failure instead of a race. + """ + thread = provider._prefetch_thread + if thread is None: + return + deadline = time.monotonic() + 30.0 + while thread.is_alive() and time.monotonic() < deadline: + thread.join(timeout=1.0) + assert not thread.is_alive(), ( + "prefetch/prewarm thread did not finish within 30s — " + "this is a real hang, not a timing flake" + ) def test_full_multi_turn_session(self): """Walks init → turns 1..8 → session end. Asserts at every step that