perf(tools): memoize get_tool_definitions + TTL-cache check_fn results (#17098)

Two amplifying optimizations to per-turn overhead in the gateway:

1. get_tool_definitions() memoization (model_tools.py)
   Keyed on (frozenset(enabled), frozenset(disabled),
   registry._generation, config.yaml mtime+size). Only active when
   quiet_mode=True (which is every hot-path caller — gateway,
   AIAgent.__init__); quiet_mode=False keeps the existing print side
   effects. Cached path returns a shallow-copy list sharing read-only
   schema dicts.

   Measured: 7.5 ms → 0.01 ms per call (~750× speedup). Gateway
   constructs fresh AIAgent per message, so this saves ~7 ms/turn before
   any LLM work.

2. check_fn() TTL cache (tools/registry.py)
   check_fn callables like check_terminal_requirements probe external
   state (Docker daemon, Modal SDK, playwright binary). For a long-lived
   process, hitting them on every get_definitions() pass was pure waste
   — external state changes on human timescales. 30 s TTL so env-var
   flips (hermes tools enable X) propagate within a turn or two without
   explicit invalidation.

   Measured: first call 7.5ms → 1.6ms (check_fn probes now dominate);
   subsequent calls ~0.01ms via the upstream memoization.

Invalidation surface:
- registry._generation bumps on register/deregister/register_toolset_alias,
  invalidating the memoized definitions automatically.
- config.yaml mtime in the cache key captures user-visible config edits
  affecting dynamic schemas (execute_code mode, discord allowlist).
- invalidate_check_fn_cache() exposed for explicit flushes (e.g. after
  hermes tools enable/disable).
- tests/conftest.py autouse fixture clears both caches before every test
  so env-var monkeypatches don't see stale results.

Also fixes a regression from PR #17046 that I missed:
- tools/web_tools.py — Firecrawl was removed from module scope by the
  lazy import, breaking 8 tests that patch 'tools.web_tools.Firecrawl'.
  Applied the same _FirecrawlProxy pattern used in auxiliary_client/
  run_agent for OpenAI (module-level proxy that looks like the class
  but imports the SDK on first call/isinstance; patch() replaces the
  attribute as usual).

Verified:
- 49/49 tests/tools/test_web_tools_config.py pass (was 8 failing on main)
- 68/68 tests/tools/test_homeassistant_tool.py pass (was 1 failing in
  the full suite due to check_fn TTL cross-test pollution; fixed by
  the autouse fixture)
- 3887/3895 tests/tools/ (8 pre-existing fails: 2 delegate, 1 mcp
  dynamic discovery, 5 mcp structured content — all confirmed on main)
- 2973/2976 tests/agent/ + tests/run_agent/ (3 pre-existing fails)
- 868/868 tests/run_agent/ (excluding test_run_agent.py which has
  pre-existing suite-level issues)
- Live smoke: 2 turns + /model switch + tool calls, zero errors in
  agent.log session window.

Co-authored-by: teknium1 <teknium@users.noreply.github.com>
This commit is contained in:
Teknium
2026-04-28 18:20:17 -07:00
committed by GitHub
parent 188eaa57c4
commit 9f004b6d94
4 changed files with 192 additions and 12 deletions

View File

@ -206,6 +206,27 @@ _LEGACY_TOOLSET_MAP = {
# get_tool_definitions (the main schema provider)
# =============================================================================
# Module-level memoization for get_tool_definitions(). Keyed on
# (frozenset(enabled_toolsets), frozenset(disabled_toolsets), registry._generation).
# Hot callers (gateway runner, AIAgent.__init__) invoke this on every turn
# with quiet_mode=True; caching avoids ~7 ms of registry walking + schema
# filtering + check_fn probing per call. Only active when quiet_mode=True
# because quiet_mode=False has stdout side effects (tool-selection prints).
#
# Invalidation happens transparently via the registry's _generation counter,
# which bumps on register() / deregister() / register_toolset_alias(). The
# inner check_fn TTL cache in registry.py handles environment drift (Docker
# daemon start/stop, env var changes, etc.) on a 30 s horizon.
_tool_defs_cache: Dict[tuple, List[Dict[str, Any]]] = {}
def _clear_tool_defs_cache() -> None:
"""Drop memoized get_tool_definitions() results. Called when dynamic
schema dependencies change (e.g. discord capability cache reset,
execute_code sandbox reconfigured)."""
_tool_defs_cache.clear()
def get_tool_definitions(
enabled_toolsets: List[str] = None,
disabled_toolsets: List[str] = None,
@ -224,6 +245,50 @@ def get_tool_definitions(
Returns:
Filtered list of OpenAI-format tool definitions.
"""
# Fast path: memoized result when the caller doesn't need stdout prints.
# The cache key captures every argument-level input; the registry
# generation captures registry mutations (MCP refresh, plugin load).
# check_fn results are TTL-cached one level down, inside
# registry.get_definitions. The config-mtime fingerprint below captures
# user-visible config edits that affect dynamic schemas (execute_code
# mode, discord action allowlist, etc.) without needing an explicit
# invalidate hook on every config-writer.
if quiet_mode:
try:
from hermes_cli.config import get_config_path
cfg_path = get_config_path()
cfg_stat = cfg_path.stat()
cfg_fp = (cfg_stat.st_mtime_ns, cfg_stat.st_size)
except (FileNotFoundError, OSError, ImportError):
cfg_fp = None
cache_key = (
frozenset(enabled_toolsets) if enabled_toolsets is not None else None,
frozenset(disabled_toolsets) if disabled_toolsets else None,
registry._generation,
cfg_fp,
)
cached = _tool_defs_cache.get(cache_key)
if cached is not None:
# Update _last_resolved_tool_names so downstream callers see
# consistent state even on a cache hit.
global _last_resolved_tool_names
_last_resolved_tool_names = [t["function"]["name"] for t in cached]
# Return a shallow copy of the list but share the dict references —
# schemas are treated as read-only by all known callers.
return list(cached)
result = _compute_tool_definitions(enabled_toolsets, disabled_toolsets, quiet_mode)
if quiet_mode:
_tool_defs_cache[cache_key] = result
return result
def _compute_tool_definitions(
enabled_toolsets: List[str] = None,
disabled_toolsets: List[str] = None,
quiet_mode: bool = False,
) -> List[Dict[str, Any]]:
"""Uncached implementation of :func:`get_tool_definitions`."""
# Determine which tool names the caller wants
tools_to_include: set = set()

View File

@ -480,3 +480,29 @@ def _enforce_test_timeout():
yield
signal.alarm(0)
signal.signal(signal.SIGALRM, old)
@pytest.fixture(autouse=True)
def _reset_tool_registry_caches():
"""Clear tool-registry-level caches between tests.
The production registry caches ``check_fn()`` results for 30 s
(see tools/registry.py) and :func:`get_tool_definitions` memoizes
its result (see model_tools.py). Both are keyed on state that tests
routinely mutate (env vars, registry._generation, config.yaml mtime)
— but a stale result from test A can still be served to test B
because 30 s covers the entire suite, and xdist worker reuse means
one test's cache lands in another's process. Clearing before every
test keeps hermetic behavior.
"""
try:
from tools.registry import invalidate_check_fn_cache
invalidate_check_fn_cache()
except ImportError:
pass
try:
from model_tools import _clear_tool_defs_cache
_clear_tool_defs_cache()
except ImportError:
pass
yield

View File

@ -19,6 +19,7 @@ import importlib
import json
import logging
import threading
import time
from pathlib import Path
from typing import Callable, Dict, List, Optional, Set
@ -97,6 +98,48 @@ class ToolEntry:
self.max_result_size_chars = max_result_size_chars
# ---------------------------------------------------------------------------
# check_fn TTL cache
#
# check_fn callables like tools/terminal_tool.check_terminal_requirements
# probe external state (Docker daemon, Modal SDK install, playwright binary
# availability). For a long-lived CLI or gateway process, calling them on
# every get_definitions() is pure waste — external state changes on human
# timescales. Cache results for ~30 s so env-var flips via ``hermes tools``
# or live credential file changes propagate within a turn or two without
# requiring any explicit invalidation.
# ---------------------------------------------------------------------------
_CHECK_FN_TTL_SECONDS = 30.0
_check_fn_cache: Dict[Callable, tuple[float, bool]] = {}
_check_fn_cache_lock = threading.Lock()
def _check_fn_cached(fn: Callable) -> bool:
"""Return bool(fn()), TTL-cached across calls. Swallows exceptions as False."""
now = time.monotonic()
with _check_fn_cache_lock:
cached = _check_fn_cache.get(fn)
if cached is not None:
ts, value = cached
if now - ts < _CHECK_FN_TTL_SECONDS:
return value
try:
value = bool(fn())
except Exception:
value = False
with _check_fn_cache_lock:
_check_fn_cache[fn] = (now, value)
return value
def invalidate_check_fn_cache() -> None:
"""Drop all cached ``check_fn`` results. Call after config changes that
affect tool availability (e.g. ``hermes tools enable``)."""
with _check_fn_cache_lock:
_check_fn_cache.clear()
class ToolRegistry:
"""Singleton registry that collects tool schemas + handlers from tool files."""
@ -108,6 +151,12 @@ class ToolRegistry:
# reading tool metadata, so keep mutations serialized and readers on
# stable snapshots.
self._lock = threading.RLock()
# Monotonically-increasing generation counter. Bumped on every
# mutation (register / deregister / register_toolset_alias / MCP
# refresh). External callers (e.g. get_tool_definitions) can memoize
# against it: a cache entry keyed on the generation is valid for as
# long as the generation hasn't changed.
self._generation: int = 0
def _snapshot_state(self) -> tuple[List[ToolEntry], Dict[str, Callable]]:
"""Return a coherent snapshot of registry entries and toolset checks."""
@ -158,6 +207,7 @@ class ToolRegistry:
alias, existing, toolset,
)
self._toolset_aliases[alias] = toolset
self._generation += 1
def get_registered_toolset_aliases(self) -> Dict[str, str]:
"""Return a snapshot of ``{alias: canonical_toolset}`` mappings."""
@ -225,6 +275,7 @@ class ToolRegistry:
)
if check_fn and toolset not in self._toolset_checks:
self._toolset_checks[toolset] = check_fn
self._generation += 1
def deregister(self, name: str) -> None:
"""Remove a tool from the registry.
@ -249,6 +300,7 @@ class ToolRegistry:
for alias, target in self._toolset_aliases.items()
if target != entry.toolset
}
self._generation += 1
logger.debug("Deregistered tool: %s", name)
# ------------------------------------------------------------------
@ -259,9 +311,17 @@ class ToolRegistry:
"""Return OpenAI-format tool schemas for the requested tool names.
Only tools whose ``check_fn()`` returns True (or have no check_fn)
are included.
are included. ``check_fn()`` results are cached for ~30 s via
:func:`_check_fn_cached` to amortize repeat probes (check_terminal_
requirements probes modal/docker, browser checks probe playwright,
etc.); TTL chosen so env-var changes (``hermes tools enable foo``)
still take effect in near-real-time without forcing a full cache
flush on every call.
"""
result = []
# Per-call cache on top of the 30 s TTL — handles repeat probes of the
# same check_fn within one definitions pass without re-reading the
# TTL clock.
check_results: Dict[Callable, bool] = {}
entries_by_name = {entry.name: entry for entry in self._snapshot_entries()}
for name in sorted(tool_names):
@ -270,12 +330,7 @@ class ToolRegistry:
continue
if entry.check_fn:
if entry.check_fn not in check_results:
try:
check_results[entry.check_fn] = bool(entry.check_fn())
except Exception:
check_results[entry.check_fn] = False
if not quiet:
logger.debug("Tool %s check raised; skipping", name)
check_results[entry.check_fn] = _check_fn_cached(entry.check_fn)
if not check_results[entry.check_fn]:
if not quiet:
logger.debug("Tool %s unavailable (check failed)", name)

View File

@ -45,12 +45,47 @@ import logging
import os
import re
import asyncio
from typing import List, Dict, Any, Optional
from typing import List, Dict, Any, Optional, TYPE_CHECKING
import httpx
# NOTE: `from firecrawl import Firecrawl` is deliberately NOT at module top —
# the SDK pulls ~200 ms of imports (httpcore, firecrawl.v1/v2 type trees) and
# we only need it when the backend is actually "firecrawl". See
# _get_firecrawl_client() below for the lazy import.
# we only need it when the backend is actually "firecrawl". We expose
# ``Firecrawl`` as a thin proxy that imports the SDK on first call/
# isinstance check, so both (a) the in-module ``Firecrawl(...)`` construction
# site in _get_firecrawl_client() works unchanged, and (b) tests using
# ``patch("tools.web_tools.Firecrawl", ...)`` keep working.
if TYPE_CHECKING:
from firecrawl import Firecrawl # noqa: F401 — type hints only
_FIRECRAWL_CLS_CACHE: Optional[type] = None
def _load_firecrawl_cls() -> type:
"""Import and cache ``firecrawl.Firecrawl``."""
global _FIRECRAWL_CLS_CACHE
if _FIRECRAWL_CLS_CACHE is None:
from firecrawl import Firecrawl as _cls
_FIRECRAWL_CLS_CACHE = _cls
return _FIRECRAWL_CLS_CACHE
class _FirecrawlProxy:
"""Module-level proxy that looks like ``firecrawl.Firecrawl`` but imports lazily."""
__slots__ = ()
def __call__(self, *args, **kwargs):
return _load_firecrawl_cls()(*args, **kwargs)
def __instancecheck__(self, obj):
return isinstance(obj, _load_firecrawl_cls())
def __repr__(self):
return "<lazy firecrawl.Firecrawl proxy>"
Firecrawl = _FirecrawlProxy()
from agent.auxiliary_client import (
async_call_llm,
extract_content_or_reasoning,
@ -239,8 +274,7 @@ def _get_firecrawl_client():
if _firecrawl_client is not None and _firecrawl_client_config == client_config:
return _firecrawl_client
# Lazy import — ~200 ms of SDK init, only paid when firecrawl is actually used.
from firecrawl import Firecrawl # noqa: E402
# Uses the module-level `Firecrawl` name (lazy proxy at module top).
_firecrawl_client = Firecrawl(**kwargs)
_firecrawl_client_config = client_config
return _firecrawl_client