perf(tui): stop slow/dead MCP servers from freezing TUI startup

The 'summoning hermes…' phase blocked on gateway.ready, which ran MCP
tool discovery inline. Any configured-but-unreachable MCP server burned
its full connect-retry backoff (1+2+4s ≈ 7s) before the composer
appeared — startup went from instant to ~7.5s of dead air for anyone
with a down stdio/http server in mcp_servers.

Move discovery into a background daemon thread so gateway.ready fires
immediately; tools register into the shared registry as servers connect,
and the agent isn't built until the first prompt. Measured spawn→ready:
~7500ms → ~115ms (dead twozero_td server in config).

Also drop rich.console + prompt_toolkit off banner.py's import path
(lazy-imported inside cprint/build_welcome_banner). tui_gateway.server
imports banner only to reach the lightweight prefetch_update_check
helper; the eager rich/pt imports added ~45ms before gateway.ready for
no benefit. tui_gateway.server import: ~115ms → ~69ms.
This commit is contained in:
kshitijk4poor
2026-05-30 14:01:32 +05:30
committed by Teknium
parent bfc4a26032
commit cbf851ae1d
4 changed files with 192 additions and 28 deletions

View File

@ -12,14 +12,16 @@ import threading
import time import time
from pathlib import Path from pathlib import Path
from hermes_constants import get_hermes_home from hermes_constants import get_hermes_home
from typing import Dict, List, Optional from typing import TYPE_CHECKING, Dict, List, Optional
from rich.console import Console # rich and prompt_toolkit are imported lazily (inside the functions that use
from rich.panel import Panel # them) rather than at module level. Importing this module is on the TUI
from rich.table import Table # gateway's critical startup path purely to reach the lightweight update-check
# helpers (``prefetch_update_check``); pulling rich.console + prompt_toolkit
from prompt_toolkit import print_formatted_text as _pt_print # eagerly added ~50ms of wasted imports before ``gateway.ready`` could fire.
from prompt_toolkit.formatted_text import ANSI as _PT_ANSI # Keep the type-only reference available to checkers without the runtime cost.
if TYPE_CHECKING:
from rich.console import Console
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -36,6 +38,8 @@ _RST = "\033[0m"
def cprint(text: str): def cprint(text: str):
"""Print ANSI-colored text through prompt_toolkit's renderer.""" """Print ANSI-colored text through prompt_toolkit's renderer."""
from prompt_toolkit import print_formatted_text as _pt_print
from prompt_toolkit.formatted_text import ANSI as _PT_ANSI
_pt_print(_PT_ANSI(text)) _pt_print(_PT_ANSI(text))
@ -471,7 +475,7 @@ def _display_toolset_name(toolset_name: str) -> str:
) )
def build_welcome_banner(console: Console, model: str, cwd: str, def build_welcome_banner(console: "Console", model: str, cwd: str,
tools: List[dict] = None, tools: List[dict] = None,
enabled_toolsets: List[str] = None, enabled_toolsets: List[str] = None,
session_id: str = None, session_id: str = None,
@ -490,6 +494,8 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
context_length: Model's context window size in tokens. context_length: Model's context window size in tokens.
""" """
from model_tools import check_tool_availability, TOOLSET_REQUIREMENTS from model_tools import check_tool_availability, TOOLSET_REQUIREMENTS
from rich.panel import Panel
from rich.table import Table
if get_toolset_for_tool is None: if get_toolset_for_tool is None:
from model_tools import get_toolset_for_tool from model_tools import get_toolset_for_tool

View File

@ -0,0 +1,78 @@
"""Tests for tui_gateway.entry.wait_for_mcp_discovery (PR #35245).
MCP tool discovery runs in a background daemon thread so a slow/dead server
can't freeze ``gateway.ready``. The agent snapshots its tool list once at
build time and never re-reads it, so ``_make_agent`` briefly joins the
discovery thread before building — bounded, so a dead server can't re-introduce
the startup hang, and a no-op once discovery has finished.
"""
import threading
import time
import tui_gateway.entry as entry
def _restore_thread_slot(saved):
entry._mcp_discovery_thread = saved
def test_no_thread_is_noop():
"""When no discovery thread was started (the common no-MCP case), the
helper returns immediately and never blocks."""
saved = entry._mcp_discovery_thread
try:
entry._mcp_discovery_thread = None
start = time.monotonic()
entry.wait_for_mcp_discovery(timeout=5.0)
assert time.monotonic() - start < 0.1
finally:
_restore_thread_slot(saved)
def test_already_finished_thread_is_noop():
"""A thread that has already finished is not joined-on (dead thread)."""
saved = entry._mcp_discovery_thread
try:
t = threading.Thread(target=lambda: None, daemon=True)
t.start()
t.join() # ensure it's finished
entry._mcp_discovery_thread = t
start = time.monotonic()
entry.wait_for_mcp_discovery(timeout=5.0)
assert time.monotonic() - start < 0.1
finally:
_restore_thread_slot(saved)
def test_fast_thread_is_joined():
"""A reachable-but-still-connecting (fast) server lands before the agent
snapshots tools — the helper waits for it to finish."""
saved = entry._mcp_discovery_thread
try:
t = threading.Thread(target=lambda: time.sleep(0.05), daemon=True)
t.start()
entry._mcp_discovery_thread = t
entry.wait_for_mcp_discovery(timeout=1.0)
assert not t.is_alive() # joined to completion
finally:
_restore_thread_slot(saved)
def test_hung_thread_is_bounded_by_timeout():
"""A slow/dead server must NOT re-introduce the startup hang — the join is
bounded by the timeout and returns even though the thread is still alive."""
saved = entry._mcp_discovery_thread
stop = threading.Event()
try:
t = threading.Thread(target=stop.wait, daemon=True) # blocks until set
t.start()
entry._mcp_discovery_thread = t
start = time.monotonic()
entry.wait_for_mcp_discovery(timeout=0.3)
elapsed = time.monotonic() - start
assert 0.25 <= elapsed < 1.0 # bounded near the timeout, not forever
assert t.is_alive() # thread still running; we did not block on it
finally:
stop.set()
_restore_thread_slot(saved)

View File

@ -12,6 +12,7 @@ if _src_root and _src_root not in sys.path:
sys.path = [p for p in sys.path if p not in {"", "."}] sys.path = [p for p in sys.path if p not in {"", "."}]
import json import json
import logging
import signal import signal
import time import time
import traceback import traceback
@ -20,6 +21,13 @@ from tui_gateway import server
from tui_gateway.server import _CRASH_LOG, dispatch, resolve_skin, write_json from tui_gateway.server import _CRASH_LOG, dispatch, resolve_skin, write_json
from tui_gateway.transport import TeeTransport from tui_gateway.transport import TeeTransport
logger = logging.getLogger(__name__)
# Handle for the background MCP tool-discovery thread (see main()). The first
# agent build briefly joins this so already-spawning fast servers land before
# the agent snapshots its tool list (see wait_for_mcp_discovery).
_mcp_discovery_thread = None
def _install_sidecar_publisher() -> None: def _install_sidecar_publisher() -> None:
"""Mirror every dispatcher emit to the dashboard sidebar via WS. """Mirror every dispatcher emit to the dashboard sidebar via WS.
@ -184,37 +192,76 @@ def _log_exit(reason: str) -> None:
print(f"[gateway-exit] {reason}", file=sys.stderr, flush=True) print(f"[gateway-exit] {reason}", file=sys.stderr, flush=True)
def wait_for_mcp_discovery(timeout: float = 0.75) -> None:
"""Briefly block until background MCP discovery finishes, up to ``timeout``.
MCP discovery runs in a daemon thread spawned at startup (see main()) so a
slow/dead server can't freeze ``gateway.ready``. But the agent snapshots
its tool list ONCE at build time and never re-reads it, so a reachable-but-
slow server that finishes connecting *after* the first prompt would be
invisible for the whole session. Joining with a short bounded timeout
before the first agent build lets already-spawning fast servers land
without re-introducing the startup hang: a dead server simply isn't waited
on beyond ``timeout``. No-op when no discovery thread was started.
"""
thread = _mcp_discovery_thread
if thread is None or not thread.is_alive():
return
thread.join(timeout=timeout)
def main(): def main():
_install_sidecar_publisher() _install_sidecar_publisher()
# MCP tool discovery — inline is safe here: TUI entry is a plain # MCP tool discovery — runs in a background daemon thread so a slow or
# sync loop with no asyncio event loop to block. Previously ran as # unreachable MCP server can't freeze TUI startup. Previously this ran
# a model_tools.py module-level side effect; moved to explicit # inline before ``gateway.ready``, which meant any configured-but-down
# startup calls to avoid freezing the gateway's loop on lazy import # server stalled the whole shell on "summoning hermes…" for the full
# (#16856). # connect-retry backoff (e.g. a dead stdio/http server burns 1+2+4s of
# retries → ~7s of dead air before the composer appears). Discovery is
# idempotent and registers tools into the shared registry as servers
# connect. The agent isn't built until the first prompt, at which point
# ``_make_agent`` briefly joins this thread (``wait_for_mcp_discovery``,
# bounded) so already-spawning fast servers land in the tool snapshot —
# a dead server is simply not waited on past the bound. ``/reload-mcp``
# rebuilds the snapshot for servers that connect later in the session.
# #
# Cold-start guard: importing ``tools.mcp_tool`` transitively pulls the # Cold-start guard: importing ``tools.mcp_tool`` transitively pulls the
# full MCP SDK (mcp, pydantic, httpx, jsonschema, starlette parsers — # full MCP SDK (mcp, pydantic, httpx, jsonschema, starlette parsers —
# ~200ms on macOS), which runs on the TUI's critical path before # ~200ms on macOS). The overwhelming majority of users have no
# ``gateway.ready`` can be emitted. The overwhelming majority of users # ``mcp_servers`` configured, in which case every byte of that import is
# have no ``mcp_servers`` configured, in which case every byte of that # wasted. Check the config first (cheap) and only spawn the discovery
# import is wasted. Check the config first (cheap — it's already been # thread when there's actually MCP work to do, so the import cost stays
# loaded once by ``_config_mtime`` elsewhere) and only pay the import # off the path entirely for the common case.
# cost when there's actually MCP work to do.
try: try:
from hermes_cli.config import read_raw_config from hermes_cli.config import read_raw_config
_mcp_servers = (read_raw_config() or {}).get("mcp_servers") _mcp_servers = (read_raw_config() or {}).get("mcp_servers")
_has_mcp_servers = isinstance(_mcp_servers, dict) and len(_mcp_servers) > 0 _has_mcp_servers = isinstance(_mcp_servers, dict) and len(_mcp_servers) > 0
except Exception: except Exception:
# Be conservative: if we can't decide, fall back to the old # Be conservative: if we can't decide, fall back to attempting
# behaviour and let the discovery path handle its own errors. # discovery (still backgrounded, so it can't block startup).
_has_mcp_servers = True _has_mcp_servers = True
if _has_mcp_servers: if _has_mcp_servers:
try: def _discover_mcp_background() -> None:
from tools.mcp_tool import discover_mcp_tools try:
discover_mcp_tools() from tools.mcp_tool import discover_mcp_tools
except Exception: discover_mcp_tools()
pass except Exception:
logger.warning(
"Background MCP tool discovery failed", exc_info=True
)
import threading as _mcp_threading
_mcp_thread = _mcp_threading.Thread(
target=_discover_mcp_background,
name="tui-mcp-discovery",
daemon=True,
)
_mcp_thread.start()
# Publish the handle so the first agent build can briefly wait for
# already-spawning fast servers to land (see wait_for_mcp_discovery).
global _mcp_discovery_thread
_mcp_discovery_thread = _mcp_thread
if not write_json({ if not write_json({
"jsonrpc": "2.0", "jsonrpc": "2.0",

View File

@ -2005,6 +2005,19 @@ def _make_agent(sid: str, key: str, session_id: str | None = None):
from run_agent import AIAgent from run_agent import AIAgent
from hermes_cli.runtime_provider import resolve_runtime_provider from hermes_cli.runtime_provider import resolve_runtime_provider
# MCP tool discovery runs in a background daemon thread at startup so a
# dead server can't freeze the shell (see tui_gateway/entry.py). The agent
# snapshots its tool list once here and never re-reads it, so briefly wait
# for in-flight discovery to land before building — bounded, so a slow/dead
# server still can't block. No-op once discovery has finished (every build
# after the first during a slow startup).
try:
from tui_gateway.entry import wait_for_mcp_discovery
wait_for_mcp_discovery()
except Exception:
pass
cfg = _load_cfg() cfg = _load_cfg()
agent_cfg = cfg.get("agent") or {} agent_cfg = cfg.get("agent") or {}
system_prompt = (agent_cfg.get("system_prompt", "") or "").strip() system_prompt = (agent_cfg.get("system_prompt", "") or "").strip()
@ -4690,8 +4703,28 @@ def _(rid, params: dict) -> dict:
discover_mcp_tools() discover_mcp_tools()
if session: if session:
agent = session["agent"] agent = session["agent"]
if hasattr(agent, "refresh_tools"): # Rebuild the cached agent's tool snapshot so the current session
agent.refresh_tools() # picks up added/removed MCP tools without `/new` (which discards
# history). The agent snapshots tools once at build and never
# re-reads the registry, so an explicit rebuild is required here.
# The user already consented to the prompt-cache invalidation via
# the confirm gate above. Mirrors gateway/run.py::_execute_mcp_reload.
try:
from model_tools import get_tool_definitions
new_defs = get_tool_definitions(
enabled_toolsets=_load_enabled_toolsets(),
quiet_mode=True,
)
agent.tools = new_defs
agent.valid_tool_names = (
{t["function"]["name"] for t in new_defs} if new_defs else set()
)
except Exception as _exc:
logger.warning(
"Failed to refresh cached agent tools after /reload-mcp: %s",
_exc,
)
_emit("session.info", params.get("session_id", ""), _session_info(agent)) _emit("session.info", params.get("session_id", ""), _session_info(agent))
# Honor `always=true` by persisting the opt-out to config. # Honor `always=true` by persisting the opt-out to config.