fix(dashboard): reap orphaned embedded-chat sessions to stop slash_worker leak
Since #38591 made the dashboard's embedded chat unconditional, every browser refresh of /chat spins up a fresh session.create (new sid + a fresh _SlashWorker via _deferred_build) over /api/ws, but the old tab's WS disconnect only DETACHES the transport (ws.py) — it never closes the old session or its slash_worker. The dashboard's in-process gateway is long-lived, so the detached _SlashWorker subprocess's stdin pipe stays open forever and the worker never reaches EOF: one leaked python process per refresh. Fix at the session-lifecycle layer (not PTY signal timing — verified that a process whose owning gateway dies is always reaped via stdin-EOF; the leak is specifically the long-lived dashboard process keeping detached sessions parked). On WS disconnect, schedule a grace-delayed reap of any session left orphaned (transport detached to stdio, not mid-turn). A quick reconnect / session.resume / prompt.submit rebinds a live transport and cancels the reap, preserving the intentional detach-for-reconnect window. - server.py: extract _teardown_session() (shared with session.close), add _ws_session_is_orphaned() + _schedule_ws_orphan_reap(), gated by HERMES_TUI_WS_ORPHAN_REAP_GRACE_S (default 20s, 0 disables). - ws.py: schedule the reap for each detached session on disconnect. - tests: reap-closes-worker, spares-reattached/mid-turn/finalized, disabled-when-grace-zero.
This commit is contained in:
@ -876,6 +876,73 @@ def test_session_close_commits_memory_and_fires_finalize_hook(monkeypatch):
|
||||
server._sessions.pop("sid", None)
|
||||
|
||||
|
||||
def test_ws_orphan_reap_closes_worker_when_session_stays_detached(monkeypatch):
|
||||
"""A detached WS session past its grace window has its slash_worker closed.
|
||||
|
||||
Regression for #38591 fallout: every dashboard refresh spawned a fresh
|
||||
session + _SlashWorker but never reaped the previous one, leaking one
|
||||
python subprocess per refresh.
|
||||
"""
|
||||
closed = {"worker": False}
|
||||
|
||||
class _FakeWorker:
|
||||
def close(self):
|
||||
closed["worker"] = True
|
||||
|
||||
server._sessions["orphan-sid"] = _session(
|
||||
transport=server._stdio_transport,
|
||||
slash_worker=_FakeWorker(),
|
||||
running=False,
|
||||
)
|
||||
# Run the reap body synchronously (no real timer/grace) to assert behaviour.
|
||||
monkeypatch.setattr(server, "_WS_ORPHAN_REAP_GRACE_S", 0.01)
|
||||
try:
|
||||
# Directly invoke the orphaned-check + teardown the timer would run.
|
||||
assert server._ws_session_is_orphaned(server._sessions["orphan-sid"]) is True
|
||||
session = server._sessions.pop("orphan-sid")
|
||||
server._teardown_session(session)
|
||||
assert closed["worker"] is True
|
||||
finally:
|
||||
server._sessions.pop("orphan-sid", None)
|
||||
|
||||
|
||||
def test_ws_orphan_reap_spares_reattached_session(monkeypatch):
|
||||
"""A session that rebinds a live transport is NOT considered orphaned."""
|
||||
|
||||
class _LiveTransport:
|
||||
def write(self, *a, **k):
|
||||
return True
|
||||
|
||||
# Reattached: transport is a live (non-stdio) transport.
|
||||
reattached = _session(transport=_LiveTransport(), running=False)
|
||||
assert server._ws_session_is_orphaned(reattached) is False
|
||||
|
||||
# Mid-turn sessions are also spared even if detached.
|
||||
mid_turn = _session(transport=server._stdio_transport, running=True)
|
||||
assert server._ws_session_is_orphaned(mid_turn) is False
|
||||
|
||||
# Already finalized sessions are spared (idempotency).
|
||||
done = _session(transport=server._stdio_transport, running=False, _finalized=True)
|
||||
assert server._ws_session_is_orphaned(done) is False
|
||||
|
||||
|
||||
def test_ws_orphan_reap_disabled_when_grace_zero(monkeypatch):
|
||||
"""Grace=0 disables the reaper entirely (pre-fix park-forever behaviour)."""
|
||||
fired = {"timer": False}
|
||||
|
||||
class _Timer:
|
||||
def __init__(self, *a, **k):
|
||||
fired["timer"] = True
|
||||
|
||||
def start(self):
|
||||
pass
|
||||
|
||||
monkeypatch.setattr(server, "_WS_ORPHAN_REAP_GRACE_S", 0.0)
|
||||
monkeypatch.setattr(server.threading, "Timer", _Timer)
|
||||
server._schedule_ws_orphan_reap("any-sid")
|
||||
assert fired["timer"] is False
|
||||
|
||||
|
||||
def test_init_session_fires_reset_hook(monkeypatch):
|
||||
hooks = []
|
||||
|
||||
|
||||
Reference in New Issue
Block a user