From 6e179c44b16d0149f5fa014be29490aff15a6b20 Mon Sep 17 00:00:00 2001 From: briandevans <252620095+briandevans@users.noreply.github.com> Date: Fri, 29 May 2026 03:43:14 -0700 Subject: [PATCH] fix(web): ensure plugin discovery before web_*_tool registry lookups Web search/extract dispatch read agent.web_search_registry before plugin discovery had run, so in any process that hadn't imported model_tools.py (subprocess agent runs, delegate children, standalone scripts) the registry was empty: get_provider('firecrawl') returned None and the dispatcher emitted the misleading 'No web extract provider configured' error even with web.extract_backend set and FIRECRAWL_API_KEY exported. Adds an idempotent _ensure_web_plugins_loaded() helper (mirrors tools.browser_tool._ensure_browser_plugins_loaded) and calls it at the top of both the web_search_tool and web_extract_tool dispatch sites before the registry lookup. Fixes #27580. Co-authored-by: briandevans <252620095+briandevans@users.noreply.github.com> --- tests/tools/test_web_providers.py | 181 ++++++++++++++++++++++++++++++ tools/web_tools.py | 31 +++++ 2 files changed, 212 insertions(+) diff --git a/tests/tools/test_web_providers.py b/tests/tools/test_web_providers.py index 5dd65199e..bd3cce875 100644 --- a/tests/tools/test_web_providers.py +++ b/tests/tools/test_web_providers.py @@ -311,3 +311,184 @@ class TestUnconfiguredErrorEnvelopeParity: # No per-result burying assert "results" not in result + +class TestDispatchersTriggerPluginDiscovery: + """Regression tests for #27580: each web_*_tool dispatcher must + idempotently call ``_ensure_web_plugins_loaded()`` before consulting + ``agent.web_search_registry``. + + Without this, a tool call from a context that hasn't already loaded + plugins (subprocess agent runs, delegate children, standalone scripts, + test paths that import the registry directly) sees an empty registry + and returns the misleading "No web extract provider configured" error + even when the user has both the config key set AND the API key + exported. + + Mirrors :func:`tools.browser_tool._ensure_browser_plugins_loaded` — + every other plugin-backed dispatcher (image_gen, video_gen, browser, + skills) already does this. + """ + + def _clear_registry(self): + """Reset the web_search registry to empty and return a callback + that restores the original contents. Used in a try/finally so the + snapshot is restored even when the dispatcher under test raises.""" + from agent import web_search_registry + + with web_search_registry._lock: + original = dict(web_search_registry._providers) + web_search_registry._providers.clear() + + def _restore(): + with web_search_registry._lock: + web_search_registry._providers.clear() + web_search_registry._providers.update(original) + + return _restore + + def test_web_extract_tool_runs_discovery_before_registry_lookup(self, monkeypatch): + """``web_extract_tool`` must invoke ``_ensure_web_plugins_loaded()`` + before looking up the configured backend so the registry is + populated even from cold-start subprocess contexts. + + Without the fix, ``get_provider('firecrawl')`` returns ``None`` + on a fresh process and the dispatcher emits "No web extract + provider configured" despite the user having both + ``web.extract_backend: firecrawl`` and ``FIRECRAWL_API_KEY`` set + (issue #27580). + """ + import asyncio + import json + from unittest.mock import MagicMock + from agent.web_search_provider import WebSearchProvider + from agent import web_search_registry + from tools import web_tools + + restore = self._clear_registry() + try: + class FakeFirecrawl(WebSearchProvider): + @property + def name(self) -> str: + return "firecrawl" + + @property + def display_name(self) -> str: + return "Fake Firecrawl" + + def is_available(self) -> bool: + return True + + def supports_extract(self) -> bool: + return True + + async def extract(self, urls, format=None): + return [ + {"url": u, "title": "", "content": "ok", + "raw_content": "ok", "metadata": {}} + for u in urls + ] + + # Simulate "plugin discovery loads the firecrawl plugin": the + # wrapped helper registers the provider, mirroring what + # ``plugins/web/firecrawl/__init__.py:register`` does at + # real-process startup. Wrapping with ``MagicMock`` lets us + # also assert the dispatcher actually invoked the hook — if + # a future refactor accidentally drops the call the regression + # would otherwise hide behind a still-populated registry. + def _register_fake() -> None: + if web_search_registry.get_provider("firecrawl") is None: + web_search_registry.register_provider(FakeFirecrawl()) + + mock_hook = MagicMock(wraps=_register_fake) + # Patch the helper on ``tools.web_tools`` directly rather than the + # underlying ``hermes_cli.plugins._ensure_plugins_discovered`` so + # the test stays valid even if the import inside the helper is + # later moved to module scope or renamed. + monkeypatch.setattr( + web_tools, "_ensure_web_plugins_loaded", mock_hook + ) + monkeypatch.setattr( + web_tools, "_load_web_config", + lambda: {"extract_backend": "firecrawl"}, + ) + # Sanity: registry IS empty before the tool call. + assert web_search_registry.get_provider("firecrawl") is None + + result = json.loads(asyncio.run( + web_tools.web_extract_tool( + ["https://example.com"], + use_llm_processing=False, + ) + )) + + # The hook must have been called BEFORE the registry lookup — + # that is the invariant under regression test. Without the + # explicit ``.called`` assertion the test could pass if the + # registry were populated by some unrelated side effect. + assert mock_hook.called, ( + "web_extract_tool must call _ensure_web_plugins_loaded() " + "before resolving the registry" + ) + assert "No web extract provider configured" not in json.dumps(result) + assert web_search_registry.get_provider("firecrawl") is not None + finally: + restore() + + def test_web_search_tool_runs_discovery_before_registry_lookup(self, monkeypatch): + """``web_search_tool`` must invoke ``_ensure_web_plugins_loaded()`` + before the registry lookup for the same reason as the extract + path (issue #27580 root cause applies to all dispatchers). + """ + import json + from unittest.mock import MagicMock + from agent.web_search_provider import WebSearchProvider + from agent import web_search_registry + from tools import web_tools + + restore = self._clear_registry() + try: + class FakeBrave(WebSearchProvider): + @property + def name(self) -> str: + return "brave-free" + + @property + def display_name(self) -> str: + return "Fake Brave" + + def is_available(self) -> bool: + return True + + def supports_search(self) -> bool: + return True + + def search(self, query, limit=5): + return {"success": True, "data": {"web": [ + {"title": "ok", "url": "https://x", "description": "", + "position": 0} + ]}} + + def _register_fake() -> None: + if web_search_registry.get_provider("brave-free") is None: + web_search_registry.register_provider(FakeBrave()) + + mock_hook = MagicMock(wraps=_register_fake) + monkeypatch.setattr( + web_tools, "_ensure_web_plugins_loaded", mock_hook + ) + monkeypatch.setattr( + web_tools, "_load_web_config", + lambda: {"search_backend": "brave-free"}, + ) + assert web_search_registry.get_provider("brave-free") is None + + result = json.loads(web_tools.web_search_tool("hello", limit=1)) + assert mock_hook.called, ( + "web_search_tool must call _ensure_web_plugins_loaded() " + "before resolving the registry" + ) + assert "No web search provider configured" not in json.dumps(result) + assert web_search_registry.get_provider("brave-free") is not None + finally: + restore() + diff --git a/tools/web_tools.py b/tools/web_tools.py index 36440b978..509546fd5 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -732,6 +732,35 @@ def clean_base64_images(text: str) -> str: # dispatchers in this file resolve them via get_active_*_provider(). +def _ensure_web_plugins_loaded() -> None: + """Idempotently trigger plugin discovery so the web registry is populated. + + Every bundled web provider (brave-free, ddgs, searxng, exa, parallel, + tavily, firecrawl) registers itself via ``plugins/web//__init__.py`` + during plugin discovery. Tool dispatch can be reached from contexts that + haven't already triggered discovery — subprocess agent runs, delegate + children, standalone scripts, certain test paths — and without it the + registry is empty and ``get_provider('firecrawl')`` returns ``None`` even + when the user has ``web.extract_backend: firecrawl`` configured and + ``FIRECRAWL_API_KEY`` set. The symptom is a misleading "No web extract + provider configured" error (issue #27580). + + Mirrors :func:`tools.browser_tool._ensure_browser_plugins_loaded` exactly: + the underlying discovery call is idempotent and cheap on subsequent + invocations. + """ + try: + from hermes_cli.plugins import _ensure_plugins_discovered + + _ensure_plugins_discovered() + except Exception as exc: # noqa: BLE001 + # Warning, not debug: if a plugin import is genuinely broken the + # user otherwise hits the misleading "No web extract provider + # configured" error this helper is meant to eliminate, with no + # clue in normal logs about the real cause. + logger.warning("Web plugin discovery failed (non-fatal): %s", exc) + + def web_search_tool(query: str, limit: int = 5) -> str: """ Search the web for information using available search API backend. @@ -792,6 +821,7 @@ def web_search_tool(query: str, limit: int = 5) -> str: # (brave-free, ddgs, searxng, exa, parallel, tavily, firecrawl) # now live as plugins; the dispatcher is just a registry lookup + # delegation. Sync only — every provider's search() is sync. + _ensure_web_plugins_loaded() from agent.web_search_registry import ( get_active_search_provider, get_provider as _wsp_get_provider, @@ -924,6 +954,7 @@ async def web_extract_tool( # detect coroutine functions and await; sync functions run # inline (the policy gate, SSRF re-check, etc. live inside the # provider itself for the firecrawl per-URL loop). + _ensure_web_plugins_loaded() from agent.web_search_registry import ( get_active_extract_provider, get_provider as _wsp_get_provider,