diff --git a/model_tools.py b/model_tools.py index 5c0559f00..d85a8b8ef 100644 --- a/model_tools.py +++ b/model_tools.py @@ -206,6 +206,27 @@ _LEGACY_TOOLSET_MAP = { # get_tool_definitions (the main schema provider) # ============================================================================= +# Module-level memoization for get_tool_definitions(). Keyed on +# (frozenset(enabled_toolsets), frozenset(disabled_toolsets), registry._generation). +# Hot callers (gateway runner, AIAgent.__init__) invoke this on every turn +# with quiet_mode=True; caching avoids ~7 ms of registry walking + schema +# filtering + check_fn probing per call. Only active when quiet_mode=True +# because quiet_mode=False has stdout side effects (tool-selection prints). +# +# Invalidation happens transparently via the registry's _generation counter, +# which bumps on register() / deregister() / register_toolset_alias(). The +# inner check_fn TTL cache in registry.py handles environment drift (Docker +# daemon start/stop, env var changes, etc.) on a 30 s horizon. +_tool_defs_cache: Dict[tuple, List[Dict[str, Any]]] = {} + + +def _clear_tool_defs_cache() -> None: + """Drop memoized get_tool_definitions() results. Called when dynamic + schema dependencies change (e.g. discord capability cache reset, + execute_code sandbox reconfigured).""" + _tool_defs_cache.clear() + + def get_tool_definitions( enabled_toolsets: List[str] = None, disabled_toolsets: List[str] = None, @@ -224,6 +245,50 @@ def get_tool_definitions( Returns: Filtered list of OpenAI-format tool definitions. """ + # Fast path: memoized result when the caller doesn't need stdout prints. + # The cache key captures every argument-level input; the registry + # generation captures registry mutations (MCP refresh, plugin load). + # check_fn results are TTL-cached one level down, inside + # registry.get_definitions. The config-mtime fingerprint below captures + # user-visible config edits that affect dynamic schemas (execute_code + # mode, discord action allowlist, etc.) without needing an explicit + # invalidate hook on every config-writer. + if quiet_mode: + try: + from hermes_cli.config import get_config_path + cfg_path = get_config_path() + cfg_stat = cfg_path.stat() + cfg_fp = (cfg_stat.st_mtime_ns, cfg_stat.st_size) + except (FileNotFoundError, OSError, ImportError): + cfg_fp = None + cache_key = ( + frozenset(enabled_toolsets) if enabled_toolsets is not None else None, + frozenset(disabled_toolsets) if disabled_toolsets else None, + registry._generation, + cfg_fp, + ) + cached = _tool_defs_cache.get(cache_key) + if cached is not None: + # Update _last_resolved_tool_names so downstream callers see + # consistent state even on a cache hit. + global _last_resolved_tool_names + _last_resolved_tool_names = [t["function"]["name"] for t in cached] + # Return a shallow copy of the list but share the dict references — + # schemas are treated as read-only by all known callers. + return list(cached) + + result = _compute_tool_definitions(enabled_toolsets, disabled_toolsets, quiet_mode) + if quiet_mode: + _tool_defs_cache[cache_key] = result + return result + + +def _compute_tool_definitions( + enabled_toolsets: List[str] = None, + disabled_toolsets: List[str] = None, + quiet_mode: bool = False, +) -> List[Dict[str, Any]]: + """Uncached implementation of :func:`get_tool_definitions`.""" # Determine which tool names the caller wants tools_to_include: set = set() diff --git a/tests/conftest.py b/tests/conftest.py index d2545e059..6386e26ec 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -480,3 +480,29 @@ def _enforce_test_timeout(): yield signal.alarm(0) signal.signal(signal.SIGALRM, old) + + +@pytest.fixture(autouse=True) +def _reset_tool_registry_caches(): + """Clear tool-registry-level caches between tests. + + The production registry caches ``check_fn()`` results for 30 s + (see tools/registry.py) and :func:`get_tool_definitions` memoizes + its result (see model_tools.py). Both are keyed on state that tests + routinely mutate (env vars, registry._generation, config.yaml mtime) + — but a stale result from test A can still be served to test B + because 30 s covers the entire suite, and xdist worker reuse means + one test's cache lands in another's process. Clearing before every + test keeps hermetic behavior. + """ + try: + from tools.registry import invalidate_check_fn_cache + invalidate_check_fn_cache() + except ImportError: + pass + try: + from model_tools import _clear_tool_defs_cache + _clear_tool_defs_cache() + except ImportError: + pass + yield diff --git a/tools/registry.py b/tools/registry.py index e6d554e2b..342078191 100644 --- a/tools/registry.py +++ b/tools/registry.py @@ -19,6 +19,7 @@ import importlib import json import logging import threading +import time from pathlib import Path from typing import Callable, Dict, List, Optional, Set @@ -97,6 +98,48 @@ class ToolEntry: self.max_result_size_chars = max_result_size_chars +# --------------------------------------------------------------------------- +# check_fn TTL cache +# +# check_fn callables like tools/terminal_tool.check_terminal_requirements +# probe external state (Docker daemon, Modal SDK install, playwright binary +# availability). For a long-lived CLI or gateway process, calling them on +# every get_definitions() is pure waste — external state changes on human +# timescales. Cache results for ~30 s so env-var flips via ``hermes tools`` +# or live credential file changes propagate within a turn or two without +# requiring any explicit invalidation. +# --------------------------------------------------------------------------- + +_CHECK_FN_TTL_SECONDS = 30.0 +_check_fn_cache: Dict[Callable, tuple[float, bool]] = {} +_check_fn_cache_lock = threading.Lock() + + +def _check_fn_cached(fn: Callable) -> bool: + """Return bool(fn()), TTL-cached across calls. Swallows exceptions as False.""" + now = time.monotonic() + with _check_fn_cache_lock: + cached = _check_fn_cache.get(fn) + if cached is not None: + ts, value = cached + if now - ts < _CHECK_FN_TTL_SECONDS: + return value + try: + value = bool(fn()) + except Exception: + value = False + with _check_fn_cache_lock: + _check_fn_cache[fn] = (now, value) + return value + + +def invalidate_check_fn_cache() -> None: + """Drop all cached ``check_fn`` results. Call after config changes that + affect tool availability (e.g. ``hermes tools enable``).""" + with _check_fn_cache_lock: + _check_fn_cache.clear() + + class ToolRegistry: """Singleton registry that collects tool schemas + handlers from tool files.""" @@ -108,6 +151,12 @@ class ToolRegistry: # reading tool metadata, so keep mutations serialized and readers on # stable snapshots. self._lock = threading.RLock() + # Monotonically-increasing generation counter. Bumped on every + # mutation (register / deregister / register_toolset_alias / MCP + # refresh). External callers (e.g. get_tool_definitions) can memoize + # against it: a cache entry keyed on the generation is valid for as + # long as the generation hasn't changed. + self._generation: int = 0 def _snapshot_state(self) -> tuple[List[ToolEntry], Dict[str, Callable]]: """Return a coherent snapshot of registry entries and toolset checks.""" @@ -158,6 +207,7 @@ class ToolRegistry: alias, existing, toolset, ) self._toolset_aliases[alias] = toolset + self._generation += 1 def get_registered_toolset_aliases(self) -> Dict[str, str]: """Return a snapshot of ``{alias: canonical_toolset}`` mappings.""" @@ -225,6 +275,7 @@ class ToolRegistry: ) if check_fn and toolset not in self._toolset_checks: self._toolset_checks[toolset] = check_fn + self._generation += 1 def deregister(self, name: str) -> None: """Remove a tool from the registry. @@ -249,6 +300,7 @@ class ToolRegistry: for alias, target in self._toolset_aliases.items() if target != entry.toolset } + self._generation += 1 logger.debug("Deregistered tool: %s", name) # ------------------------------------------------------------------ @@ -259,9 +311,17 @@ class ToolRegistry: """Return OpenAI-format tool schemas for the requested tool names. Only tools whose ``check_fn()`` returns True (or have no check_fn) - are included. + are included. ``check_fn()`` results are cached for ~30 s via + :func:`_check_fn_cached` to amortize repeat probes (check_terminal_ + requirements probes modal/docker, browser checks probe playwright, + etc.); TTL chosen so env-var changes (``hermes tools enable foo``) + still take effect in near-real-time without forcing a full cache + flush on every call. """ result = [] + # Per-call cache on top of the 30 s TTL — handles repeat probes of the + # same check_fn within one definitions pass without re-reading the + # TTL clock. check_results: Dict[Callable, bool] = {} entries_by_name = {entry.name: entry for entry in self._snapshot_entries()} for name in sorted(tool_names): @@ -270,12 +330,7 @@ class ToolRegistry: continue if entry.check_fn: if entry.check_fn not in check_results: - try: - check_results[entry.check_fn] = bool(entry.check_fn()) - except Exception: - check_results[entry.check_fn] = False - if not quiet: - logger.debug("Tool %s check raised; skipping", name) + check_results[entry.check_fn] = _check_fn_cached(entry.check_fn) if not check_results[entry.check_fn]: if not quiet: logger.debug("Tool %s unavailable (check failed)", name) diff --git a/tools/web_tools.py b/tools/web_tools.py index 4ae4cd66a..352b4a55b 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -45,12 +45,47 @@ import logging import os import re import asyncio -from typing import List, Dict, Any, Optional +from typing import List, Dict, Any, Optional, TYPE_CHECKING import httpx # NOTE: `from firecrawl import Firecrawl` is deliberately NOT at module top — # the SDK pulls ~200 ms of imports (httpcore, firecrawl.v1/v2 type trees) and -# we only need it when the backend is actually "firecrawl". See -# _get_firecrawl_client() below for the lazy import. +# we only need it when the backend is actually "firecrawl". We expose +# ``Firecrawl`` as a thin proxy that imports the SDK on first call/ +# isinstance check, so both (a) the in-module ``Firecrawl(...)`` construction +# site in _get_firecrawl_client() works unchanged, and (b) tests using +# ``patch("tools.web_tools.Firecrawl", ...)`` keep working. +if TYPE_CHECKING: + from firecrawl import Firecrawl # noqa: F401 — type hints only + +_FIRECRAWL_CLS_CACHE: Optional[type] = None + + +def _load_firecrawl_cls() -> type: + """Import and cache ``firecrawl.Firecrawl``.""" + global _FIRECRAWL_CLS_CACHE + if _FIRECRAWL_CLS_CACHE is None: + from firecrawl import Firecrawl as _cls + _FIRECRAWL_CLS_CACHE = _cls + return _FIRECRAWL_CLS_CACHE + + +class _FirecrawlProxy: + """Module-level proxy that looks like ``firecrawl.Firecrawl`` but imports lazily.""" + + __slots__ = () + + def __call__(self, *args, **kwargs): + return _load_firecrawl_cls()(*args, **kwargs) + + def __instancecheck__(self, obj): + return isinstance(obj, _load_firecrawl_cls()) + + def __repr__(self): + return "" + + +Firecrawl = _FirecrawlProxy() + from agent.auxiliary_client import ( async_call_llm, extract_content_or_reasoning, @@ -239,8 +274,7 @@ def _get_firecrawl_client(): if _firecrawl_client is not None and _firecrawl_client_config == client_config: return _firecrawl_client - # Lazy import — ~200 ms of SDK init, only paid when firecrawl is actually used. - from firecrawl import Firecrawl # noqa: E402 + # Uses the module-level `Firecrawl` name (lazy proxy at module top). _firecrawl_client = Firecrawl(**kwargs) _firecrawl_client_config = client_config return _firecrawl_client