diff --git a/model_tools.py b/model_tools.py
index 5c0559f00..d85a8b8ef 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -206,6 +206,27 @@ _LEGACY_TOOLSET_MAP = {
 # get_tool_definitions  (the main schema provider)
 # =============================================================================
 
+# Module-level memoization for get_tool_definitions(). Keyed on
+# (frozenset(enabled_toolsets), frozenset(disabled_toolsets), registry._generation).
+# Hot callers (gateway runner, AIAgent.__init__) invoke this on every turn
+# with quiet_mode=True; caching avoids ~7 ms of registry walking + schema
+# filtering + check_fn probing per call. Only active when quiet_mode=True
+# because quiet_mode=False has stdout side effects (tool-selection prints).
+#
+# Invalidation happens transparently via the registry's _generation counter,
+# which bumps on register() / deregister() / register_toolset_alias(). The
+# inner check_fn TTL cache in registry.py handles environment drift (Docker
+# daemon start/stop, env var changes, etc.) on a 30 s horizon.
+_tool_defs_cache: Dict[tuple, List[Dict[str, Any]]] = {}
+
+
+def _clear_tool_defs_cache() -> None:
+    """Drop memoized get_tool_definitions() results. Called when dynamic
+    schema dependencies change (e.g. discord capability cache reset,
+    execute_code sandbox reconfigured)."""
+    _tool_defs_cache.clear()
+
+
 def get_tool_definitions(
     enabled_toolsets: List[str] = None,
     disabled_toolsets: List[str] = None,
@@ -224,6 +245,50 @@ def get_tool_definitions(
     Returns:
         Filtered list of OpenAI-format tool definitions.
     """
+    # Fast path: memoized result when the caller doesn't need stdout prints.
+    # The cache key captures every argument-level input; the registry
+    # generation captures registry mutations (MCP refresh, plugin load).
+    # check_fn results are TTL-cached one level down, inside
+    # registry.get_definitions. The config-mtime fingerprint below captures
+    # user-visible config edits that affect dynamic schemas (execute_code
+    # mode, discord action allowlist, etc.) without needing an explicit
+    # invalidate hook on every config-writer.
+    if quiet_mode:
+        try:
+            from hermes_cli.config import get_config_path
+            cfg_path = get_config_path()
+            cfg_stat = cfg_path.stat()
+            cfg_fp = (cfg_stat.st_mtime_ns, cfg_stat.st_size)
+        except (FileNotFoundError, OSError, ImportError):
+            cfg_fp = None
+        cache_key = (
+            frozenset(enabled_toolsets) if enabled_toolsets is not None else None,
+            frozenset(disabled_toolsets) if disabled_toolsets else None,
+            registry._generation,
+            cfg_fp,
+        )
+        cached = _tool_defs_cache.get(cache_key)
+        if cached is not None:
+            # Update _last_resolved_tool_names so downstream callers see
+            # consistent state even on a cache hit.
+            global _last_resolved_tool_names
+            _last_resolved_tool_names = [t["function"]["name"] for t in cached]
+            # Return a shallow copy of the list but share the dict references —
+            # schemas are treated as read-only by all known callers.
+            return list(cached)
+
+    result = _compute_tool_definitions(enabled_toolsets, disabled_toolsets, quiet_mode)
+    if quiet_mode:
+        _tool_defs_cache[cache_key] = result
+    return result
+
+
+def _compute_tool_definitions(
+    enabled_toolsets: List[str] = None,
+    disabled_toolsets: List[str] = None,
+    quiet_mode: bool = False,
+) -> List[Dict[str, Any]]:
+    """Uncached implementation of :func:`get_tool_definitions`."""
     # Determine which tool names the caller wants
     tools_to_include: set = set()
 
diff --git a/tests/conftest.py b/tests/conftest.py
index d2545e059..6386e26ec 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -480,3 +480,29 @@ def _enforce_test_timeout():
     yield
     signal.alarm(0)
     signal.signal(signal.SIGALRM, old)
+
+
+@pytest.fixture(autouse=True)
+def _reset_tool_registry_caches():
+    """Clear tool-registry-level caches between tests.
+
+    The production registry caches ``check_fn()`` results for 30 s
+    (see tools/registry.py) and :func:`get_tool_definitions` memoizes
+    its result (see model_tools.py). Both are keyed on state that tests
+    routinely mutate (env vars, registry._generation, config.yaml mtime)
+    — but a stale result from test A can still be served to test B
+    because 30 s covers the entire suite, and xdist worker reuse means
+    one test's cache lands in another's process. Clearing before every
+    test keeps hermetic behavior.
+    """
+    try:
+        from tools.registry import invalidate_check_fn_cache
+        invalidate_check_fn_cache()
+    except ImportError:
+        pass
+    try:
+        from model_tools import _clear_tool_defs_cache
+        _clear_tool_defs_cache()
+    except ImportError:
+        pass
+    yield
diff --git a/tools/registry.py b/tools/registry.py
index e6d554e2b..342078191 100644
--- a/tools/registry.py
+++ b/tools/registry.py
@@ -19,6 +19,7 @@ import importlib
 import json
 import logging
 import threading
+import time
 from pathlib import Path
 from typing import Callable, Dict, List, Optional, Set
 
@@ -97,6 +98,48 @@ class ToolEntry:
         self.max_result_size_chars = max_result_size_chars
 
 
+# ---------------------------------------------------------------------------
+# check_fn TTL cache
+#
+# check_fn callables like tools/terminal_tool.check_terminal_requirements
+# probe external state (Docker daemon, Modal SDK install, playwright binary
+# availability). For a long-lived CLI or gateway process, calling them on
+# every get_definitions() is pure waste — external state changes on human
+# timescales. Cache results for ~30 s so env-var flips via ``hermes tools``
+# or live credential file changes propagate within a turn or two without
+# requiring any explicit invalidation.
+# ---------------------------------------------------------------------------
+
+_CHECK_FN_TTL_SECONDS = 30.0
+_check_fn_cache: Dict[Callable, tuple[float, bool]] = {}
+_check_fn_cache_lock = threading.Lock()
+
+
+def _check_fn_cached(fn: Callable) -> bool:
+    """Return bool(fn()), TTL-cached across calls. Swallows exceptions as False."""
+    now = time.monotonic()
+    with _check_fn_cache_lock:
+        cached = _check_fn_cache.get(fn)
+        if cached is not None:
+            ts, value = cached
+            if now - ts < _CHECK_FN_TTL_SECONDS:
+                return value
+    try:
+        value = bool(fn())
+    except Exception:
+        value = False
+    with _check_fn_cache_lock:
+        _check_fn_cache[fn] = (now, value)
+    return value
+
+
+def invalidate_check_fn_cache() -> None:
+    """Drop all cached ``check_fn`` results. Call after config changes that
+    affect tool availability (e.g. ``hermes tools enable``)."""
+    with _check_fn_cache_lock:
+        _check_fn_cache.clear()
+
+
 class ToolRegistry:
     """Singleton registry that collects tool schemas + handlers from tool files."""
 
@@ -108,6 +151,12 @@ class ToolRegistry:
         # reading tool metadata, so keep mutations serialized and readers on
         # stable snapshots.
         self._lock = threading.RLock()
+        # Monotonically-increasing generation counter. Bumped on every
+        # mutation (register / deregister / register_toolset_alias / MCP
+        # refresh). External callers (e.g. get_tool_definitions) can memoize
+        # against it: a cache entry keyed on the generation is valid for as
+        # long as the generation hasn't changed.
+        self._generation: int = 0
 
     def _snapshot_state(self) -> tuple[List[ToolEntry], Dict[str, Callable]]:
         """Return a coherent snapshot of registry entries and toolset checks."""
@@ -158,6 +207,7 @@ class ToolRegistry:
                     alias, existing, toolset,
                 )
             self._toolset_aliases[alias] = toolset
+            self._generation += 1
 
     def get_registered_toolset_aliases(self) -> Dict[str, str]:
         """Return a snapshot of ``{alias: canonical_toolset}`` mappings."""
@@ -225,6 +275,7 @@ class ToolRegistry:
             )
             if check_fn and toolset not in self._toolset_checks:
                 self._toolset_checks[toolset] = check_fn
+            self._generation += 1
 
     def deregister(self, name: str) -> None:
         """Remove a tool from the registry.
@@ -249,6 +300,7 @@ class ToolRegistry:
                     for alias, target in self._toolset_aliases.items()
                     if target != entry.toolset
                 }
+            self._generation += 1
         logger.debug("Deregistered tool: %s", name)
 
     # ------------------------------------------------------------------
@@ -259,9 +311,17 @@ class ToolRegistry:
         """Return OpenAI-format tool schemas for the requested tool names.
 
         Only tools whose ``check_fn()`` returns True (or have no check_fn)
-        are included.
+        are included. ``check_fn()`` results are cached for ~30 s via
+        :func:`_check_fn_cached` to amortize repeat probes (check_terminal_
+        requirements probes modal/docker, browser checks probe playwright,
+        etc.); TTL chosen so env-var changes (``hermes tools enable foo``)
+        still take effect in near-real-time without forcing a full cache
+        flush on every call.
         """
         result = []
+        # Per-call cache on top of the 30 s TTL — handles repeat probes of the
+        # same check_fn within one definitions pass without re-reading the
+        # TTL clock.
         check_results: Dict[Callable, bool] = {}
         entries_by_name = {entry.name: entry for entry in self._snapshot_entries()}
         for name in sorted(tool_names):
@@ -270,12 +330,7 @@ class ToolRegistry:
                 continue
             if entry.check_fn:
                 if entry.check_fn not in check_results:
-                    try:
-                        check_results[entry.check_fn] = bool(entry.check_fn())
-                    except Exception:
-                        check_results[entry.check_fn] = False
-                        if not quiet:
-                            logger.debug("Tool %s check raised; skipping", name)
+                    check_results[entry.check_fn] = _check_fn_cached(entry.check_fn)
                 if not check_results[entry.check_fn]:
                     if not quiet:
                         logger.debug("Tool %s unavailable (check failed)", name)
diff --git a/tools/web_tools.py b/tools/web_tools.py
index 4ae4cd66a..352b4a55b 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -45,12 +45,47 @@ import logging
 import os
 import re
 import asyncio
-from typing import List, Dict, Any, Optional
+from typing import List, Dict, Any, Optional, TYPE_CHECKING
 import httpx
 # NOTE: `from firecrawl import Firecrawl` is deliberately NOT at module top —
 # the SDK pulls ~200 ms of imports (httpcore, firecrawl.v1/v2 type trees) and
-# we only need it when the backend is actually "firecrawl". See
-# _get_firecrawl_client() below for the lazy import.
+# we only need it when the backend is actually "firecrawl". We expose
+# ``Firecrawl`` as a thin proxy that imports the SDK on first call/
+# isinstance check, so both (a) the in-module ``Firecrawl(...)`` construction
+# site in _get_firecrawl_client() works unchanged, and (b) tests using
+# ``patch("tools.web_tools.Firecrawl", ...)`` keep working.
+if TYPE_CHECKING:
+    from firecrawl import Firecrawl  # noqa: F401 — type hints only
+
+_FIRECRAWL_CLS_CACHE: Optional[type] = None
+
+
+def _load_firecrawl_cls() -> type:
+    """Import and cache ``firecrawl.Firecrawl``."""
+    global _FIRECRAWL_CLS_CACHE
+    if _FIRECRAWL_CLS_CACHE is None:
+        from firecrawl import Firecrawl as _cls
+        _FIRECRAWL_CLS_CACHE = _cls
+    return _FIRECRAWL_CLS_CACHE
+
+
+class _FirecrawlProxy:
+    """Module-level proxy that looks like ``firecrawl.Firecrawl`` but imports lazily."""
+
+    __slots__ = ()
+
+    def __call__(self, *args, **kwargs):
+        return _load_firecrawl_cls()(*args, **kwargs)
+
+    def __instancecheck__(self, obj):
+        return isinstance(obj, _load_firecrawl_cls())
+
+    def __repr__(self):
+        return "<lazy firecrawl.Firecrawl proxy>"
+
+
+Firecrawl = _FirecrawlProxy()
+
 from agent.auxiliary_client import (
     async_call_llm,
     extract_content_or_reasoning,
@@ -239,8 +274,7 @@ def _get_firecrawl_client():
     if _firecrawl_client is not None and _firecrawl_client_config == client_config:
         return _firecrawl_client
 
-    # Lazy import — ~200 ms of SDK init, only paid when firecrawl is actually used.
-    from firecrawl import Firecrawl  # noqa: E402
+    # Uses the module-level `Firecrawl` name (lazy proxy at module top).
     _firecrawl_client = Firecrawl(**kwargs)
     _firecrawl_client_config = client_config
     return _firecrawl_client