perf(tui): stop slow/dead MCP servers from freezing TUI startup

The 'summoning hermes…' phase blocked on gateway.ready, which ran MCP tool discovery inline. Any configured-but-unreachable MCP server burned its full connect-retry backoff (1+2+4s ≈ 7s) before the composer appeared — startup went from instant to ~7.5s of dead air for anyone with a down stdio/http server in mcp_servers. Move discovery into a background daemon thread so gateway.ready fires immediately; tools register into the shared registry as servers connect, and the agent isn't built until the first prompt. Measured spawn→ready: ~7500ms → ~115ms (dead twozero_td server in config). Also drop rich.console + prompt_toolkit off banner.py's import path (lazy-imported inside cprint/build_welcome_banner). tui_gateway.server imports banner only to reach the lightweight prefetch_update_check helper; the eager rich/pt imports added ~45ms before gateway.ready for no benefit. tui_gateway.server import: ~115ms → ~69ms.
2026-05-30 14:01:32 +05:30
parent bfc4a26032
commit cbf851ae1d
4 changed files with 192 additions and 28 deletions
--- a/hermes_cli/banner.py
+++ b/hermes_cli/banner.py
@ -12,14 +12,16 @@ import threading
 import time
 from pathlib import Path
 from hermes_constants import get_hermes_home
-from typing import Dict, List, Optional
+from typing import TYPE_CHECKING, Dict, List, Optional
-from rich.console import Console
+# rich and prompt_toolkit are imported lazily (inside the functions that use
-from rich.panel import Panel
+# them) rather than at module level.  Importing this module is on the TUI
-from rich.table import Table
+# gateway's critical startup path purely to reach the lightweight update-check
-
+# helpers (``prefetch_update_check``); pulling rich.console + prompt_toolkit
-from prompt_toolkit import print_formatted_text as _pt_print
+# eagerly added ~50ms of wasted imports before ``gateway.ready`` could fire.
-from prompt_toolkit.formatted_text import ANSI as _PT_ANSI
+# Keep the type-only reference available to checkers without the runtime cost.
 if TYPE_CHECKING:
    from rich.console import Console
 logger = logging.getLogger(__name__)
@ -36,6 +38,8 @@ _RST = "\033[0m"
 def cprint(text: str):
    """Print ANSI-colored text through prompt_toolkit's renderer."""
    from prompt_toolkit import print_formatted_text as _pt_print
    from prompt_toolkit.formatted_text import ANSI as _PT_ANSI
    _pt_print(_PT_ANSI(text))
@ -471,7 +475,7 @@ def _display_toolset_name(toolset_name: str) -> str:
    )
-def build_welcome_banner(console: Console, model: str, cwd: str,
+def build_welcome_banner(console: "Console", model: str, cwd: str,
                         tools: List[dict] = None,
                         enabled_toolsets: List[str] = None,
                         session_id: str = None,
@ -490,6 +494,8 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
        context_length: Model's context window size in tokens.
    """
    from model_tools import check_tool_availability, TOOLSET_REQUIREMENTS
    from rich.panel import Panel
    from rich.table import Table
    if get_toolset_for_tool is None:
        from model_tools import get_toolset_for_tool
--- a/tests/tui_gateway/test_wait_for_mcp_discovery.py
+++ b/tests/tui_gateway/test_wait_for_mcp_discovery.py
@ -0,0 +1,78 @@
 """Tests for tui_gateway.entry.wait_for_mcp_discovery (PR #35245).
 MCP tool discovery runs in a background daemon thread so a slow/dead server
 can't freeze ``gateway.ready``.  The agent snapshots its tool list once at
 build time and never re-reads it, so ``_make_agent`` briefly joins the
 discovery thread before building — bounded, so a dead server can't re-introduce
 the startup hang, and a no-op once discovery has finished.
 """
 import threading
 import time
 import tui_gateway.entry as entry
 def _restore_thread_slot(saved):
    entry._mcp_discovery_thread = saved
 def test_no_thread_is_noop():
    """When no discovery thread was started (the common no-MCP case), the
    helper returns immediately and never blocks."""
    saved = entry._mcp_discovery_thread
    try:
        entry._mcp_discovery_thread = None
        start = time.monotonic()
        entry.wait_for_mcp_discovery(timeout=5.0)
        assert time.monotonic() - start < 0.1
    finally:
        _restore_thread_slot(saved)
 def test_already_finished_thread_is_noop():
    """A thread that has already finished is not joined-on (dead thread)."""
    saved = entry._mcp_discovery_thread
    try:
        t = threading.Thread(target=lambda: None, daemon=True)
        t.start()
        t.join()  # ensure it's finished
        entry._mcp_discovery_thread = t
        start = time.monotonic()
        entry.wait_for_mcp_discovery(timeout=5.0)
        assert time.monotonic() - start < 0.1
    finally:
        _restore_thread_slot(saved)
 def test_fast_thread_is_joined():
    """A reachable-but-still-connecting (fast) server lands before the agent
    snapshots tools — the helper waits for it to finish."""
    saved = entry._mcp_discovery_thread
    try:
        t = threading.Thread(target=lambda: time.sleep(0.05), daemon=True)
        t.start()
        entry._mcp_discovery_thread = t
        entry.wait_for_mcp_discovery(timeout=1.0)
        assert not t.is_alive()  # joined to completion
    finally:
        _restore_thread_slot(saved)
 def test_hung_thread_is_bounded_by_timeout():
    """A slow/dead server must NOT re-introduce the startup hang — the join is
    bounded by the timeout and returns even though the thread is still alive."""
    saved = entry._mcp_discovery_thread
    stop = threading.Event()
    try:
        t = threading.Thread(target=stop.wait, daemon=True)  # blocks until set
        t.start()
        entry._mcp_discovery_thread = t
        start = time.monotonic()
        entry.wait_for_mcp_discovery(timeout=0.3)
        elapsed = time.monotonic() - start
        assert 0.25 <= elapsed < 1.0  # bounded near the timeout, not forever
        assert t.is_alive()  # thread still running; we did not block on it
    finally:
        stop.set()
        _restore_thread_slot(saved)
--- a/tui_gateway/entry.py
+++ b/tui_gateway/entry.py
@ -12,6 +12,7 @@ if _src_root and _src_root not in sys.path:
 sys.path = [p for p in sys.path if p not in {"", "."}]
 import json
 import logging
 import signal
 import time
 import traceback
@ -20,6 +21,13 @@ from tui_gateway import server
 from tui_gateway.server import _CRASH_LOG, dispatch, resolve_skin, write_json
 from tui_gateway.transport import TeeTransport
 logger = logging.getLogger(__name__)
 # Handle for the background MCP tool-discovery thread (see main()).  The first
 # agent build briefly joins this so already-spawning fast servers land before
 # the agent snapshots its tool list (see wait_for_mcp_discovery).
 _mcp_discovery_thread = None
 def _install_sidecar_publisher() -> None:
    """Mirror every dispatcher emit to the dashboard sidebar via WS.
@ -184,37 +192,76 @@ def _log_exit(reason: str) -> None:
    print(f"[gateway-exit] {reason}", file=sys.stderr, flush=True)
 def wait_for_mcp_discovery(timeout: float = 0.75) -> None:
    """Briefly block until background MCP discovery finishes, up to ``timeout``.
    MCP discovery runs in a daemon thread spawned at startup (see main()) so a
    slow/dead server can't freeze ``gateway.ready``.  But the agent snapshots
    its tool list ONCE at build time and never re-reads it, so a reachable-but-
    slow server that finishes connecting *after* the first prompt would be
    invisible for the whole session.  Joining with a short bounded timeout
    before the first agent build lets already-spawning fast servers land
    without re-introducing the startup hang: a dead server simply isn't waited
    on beyond ``timeout``.  No-op when no discovery thread was started.
    """
    thread = _mcp_discovery_thread
    if thread is None or not thread.is_alive():
        return
    thread.join(timeout=timeout)
 def main():
    _install_sidecar_publisher()
-    # MCP tool discovery — inline is safe here: TUI entry is a plain
+    # MCP tool discovery — runs in a background daemon thread so a slow or
-    # sync loop with no asyncio event loop to block.  Previously ran as
+    # unreachable MCP server can't freeze TUI startup.  Previously this ran
-    # a model_tools.py module-level side effect; moved to explicit
+    # inline before ``gateway.ready``, which meant any configured-but-down
-    # startup calls to avoid freezing the gateway's loop on lazy import
+    # server stalled the whole shell on "summoning hermes…" for the full
-    # (#16856).
+    # connect-retry backoff (e.g. a dead stdio/http server burns 1+2+4s of
    # retries → ~7s of dead air before the composer appears).  Discovery is
    # idempotent and registers tools into the shared registry as servers
    # connect.  The agent isn't built until the first prompt, at which point
    # ``_make_agent`` briefly joins this thread (``wait_for_mcp_discovery``,
    # bounded) so already-spawning fast servers land in the tool snapshot —
    # a dead server is simply not waited on past the bound.  ``/reload-mcp``
    # rebuilds the snapshot for servers that connect later in the session.
    #
    # Cold-start guard: importing ``tools.mcp_tool`` transitively pulls the
    # full MCP SDK (mcp, pydantic, httpx, jsonschema, starlette parsers —
-    # ~200ms on macOS), which runs on the TUI's critical path before
+    # ~200ms on macOS).  The overwhelming majority of users have no
-    # ``gateway.ready`` can be emitted.  The overwhelming majority of users
+    # ``mcp_servers`` configured, in which case every byte of that import is
-    # have no ``mcp_servers`` configured, in which case every byte of that
+    # wasted.  Check the config first (cheap) and only spawn the discovery
-    # import is wasted.  Check the config first (cheap — it's already been
+    # thread when there's actually MCP work to do, so the import cost stays
-    # loaded once by ``_config_mtime`` elsewhere) and only pay the import
+    # off the path entirely for the common case.
    # cost when there's actually MCP work to do.
    try:
        from hermes_cli.config import read_raw_config
        _mcp_servers = (read_raw_config() or {}).get("mcp_servers")
        _has_mcp_servers = isinstance(_mcp_servers, dict) and len(_mcp_servers) > 0
    except Exception:
-        # Be conservative: if we can't decide, fall back to the old
+        # Be conservative: if we can't decide, fall back to attempting
-        # behaviour and let the discovery path handle its own errors.
+        # discovery (still backgrounded, so it can't block startup).
        _has_mcp_servers = True
    if _has_mcp_servers:
-        try:
+        def _discover_mcp_background() -> None:
-            from tools.mcp_tool import discover_mcp_tools
+            try:
-            discover_mcp_tools()
+                from tools.mcp_tool import discover_mcp_tools
-        except Exception:
+                discover_mcp_tools()
-            pass
+            except Exception:
                logger.warning(
                    "Background MCP tool discovery failed", exc_info=True
                )
        import threading as _mcp_threading
        _mcp_thread = _mcp_threading.Thread(
            target=_discover_mcp_background,
            name="tui-mcp-discovery",
            daemon=True,
        )
        _mcp_thread.start()
        # Publish the handle so the first agent build can briefly wait for
        # already-spawning fast servers to land (see wait_for_mcp_discovery).
        global _mcp_discovery_thread
        _mcp_discovery_thread = _mcp_thread
    if not write_json({
        "jsonrpc": "2.0",
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@ -2005,6 +2005,19 @@ def _make_agent(sid: str, key: str, session_id: str | None = None):
    from run_agent import AIAgent
    from hermes_cli.runtime_provider import resolve_runtime_provider
    # MCP tool discovery runs in a background daemon thread at startup so a
    # dead server can't freeze the shell (see tui_gateway/entry.py).  The agent
    # snapshots its tool list once here and never re-reads it, so briefly wait
    # for in-flight discovery to land before building — bounded, so a slow/dead
    # server still can't block.  No-op once discovery has finished (every build
    # after the first during a slow startup).
    try:
        from tui_gateway.entry import wait_for_mcp_discovery
        wait_for_mcp_discovery()
    except Exception:
        pass
    cfg = _load_cfg()
    agent_cfg = cfg.get("agent") or {}
    system_prompt = (agent_cfg.get("system_prompt", "") or "").strip()
@ -4690,8 +4703,28 @@ def _(rid, params: dict) -> dict:
        discover_mcp_tools()
        if session:
            agent = session["agent"]
-            if hasattr(agent, "refresh_tools"):
+            # Rebuild the cached agent's tool snapshot so the current session
-                agent.refresh_tools()
+            # picks up added/removed MCP tools without `/new` (which discards
            # history).  The agent snapshots tools once at build and never
            # re-reads the registry, so an explicit rebuild is required here.
            # The user already consented to the prompt-cache invalidation via
            # the confirm gate above.  Mirrors gateway/run.py::_execute_mcp_reload.
            try:
                from model_tools import get_tool_definitions
                new_defs = get_tool_definitions(
                    enabled_toolsets=_load_enabled_toolsets(),
                    quiet_mode=True,
                )
                agent.tools = new_defs
                agent.valid_tool_names = (
                    {t["function"]["name"] for t in new_defs} if new_defs else set()
                )
            except Exception as _exc:
                logger.warning(
                    "Failed to refresh cached agent tools after /reload-mcp: %s",
                    _exc,
                )
            _emit("session.info", params.get("session_id", ""), _session_info(agent))
        # Honor `always=true` by persisting the opt-out to config.