test(honcho): de-flake prewarm smoke test's thread wait (#37614)

TestDialecticLifecycleSmoke._await_thread did a single join(timeout=3.0) and then proceeded regardless of whether the background dialectic thread had finished. On a loaded CI runner (6 parallel test slices) the prewarm thread's completion can slip past that 3s window, so the join times out silently and the test reads _prefetch_result before the worker wrote it — the intermittent 'session-start prewarm must land in _prefetch_result' failure. Join in a loop up to a 30s ceiling and assert the thread is actually dead, so a genuine hang surfaces as a clear failure instead of a timing race. Reproduced the old failure deterministically (5/5 fails with a 3.5s prewarm delay) and confirmed the fix (0/8) before/after.
2026-06-02 17:00:04 -07:00
parent 082025abcd
commit bb1c8b6f1a
1 changed files with 23 additions and 2 deletions
--- a/tests/honcho_plugin/test_session.py
+++ b/tests/honcho_plugin/test_session.py
@ -1,5 +1,7 @@
 """Tests for plugins/memory/honcho/session.py — HonchoSession and helpers."""
 import time
 from datetime import datetime
 from types import SimpleNamespace
 from unittest.mock import MagicMock
@ -1538,8 +1540,27 @@ class TestDialecticLifecycleSmoke:
            return provider, mock_manager, cfg
    def _await_thread(self, provider):
-        if provider._prefetch_thread:
+        """Block until the in-flight prefetch/prewarm thread has fully finished.
-            provider._prefetch_thread.join(timeout=3.0)
+
        The earlier version did a single ``join(timeout=3.0)`` and then
        proceeded regardless of whether the thread had actually finished. On a
        loaded CI runner (6 parallel test slices), the background dialectic
        thread's completion can slip past that 3s window, so the join times out
        silently and the test reads ``_prefetch_result`` before the worker wrote
        it — a flaky ``session-start prewarm must land`` failure. We instead join
        in a loop up to a generous ceiling and assert the thread is dead, so a
        genuine hang surfaces as a clear, non-flaky failure instead of a race.
        """
        thread = provider._prefetch_thread
        if thread is None:
            return
        deadline = time.monotonic() + 30.0
        while thread.is_alive() and time.monotonic() < deadline:
            thread.join(timeout=1.0)
        assert not thread.is_alive(), (
            "prefetch/prewarm thread did not finish within 30s — "
            "this is a real hang, not a timing flake"
        )
    def test_full_multi_turn_session(self):
        """Walks init → turns 1..8 → session end. Asserts at every step that