fix(web): ensure plugin discovery before web_*_tool registry lookups

Web search/extract dispatch read agent.web_search_registry before plugin
discovery had run, so in any process that hadn't imported model_tools.py
(subprocess agent runs, delegate children, standalone scripts) the registry
was empty: get_provider('firecrawl') returned None and the dispatcher emitted
the misleading 'No web extract provider configured' error even with
web.extract_backend set and FIRECRAWL_API_KEY exported.

Adds an idempotent _ensure_web_plugins_loaded() helper (mirrors
tools.browser_tool._ensure_browser_plugins_loaded) and calls it at the top of
both the web_search_tool and web_extract_tool dispatch sites before the
registry lookup.

Fixes #27580.

Co-authored-by: briandevans <252620095+briandevans@users.noreply.github.com>
This commit is contained in:
briandevans
2026-05-29 03:43:14 -07:00
committed by Teknium
parent 58e1b04665
commit 6e179c44b1
2 changed files with 212 additions and 0 deletions

View File

@ -311,3 +311,184 @@ class TestUnconfiguredErrorEnvelopeParity:
# No per-result burying
assert "results" not in result
class TestDispatchersTriggerPluginDiscovery:
"""Regression tests for #27580: each web_*_tool dispatcher must
idempotently call ``_ensure_web_plugins_loaded()`` before consulting
``agent.web_search_registry``.
Without this, a tool call from a context that hasn't already loaded
plugins (subprocess agent runs, delegate children, standalone scripts,
test paths that import the registry directly) sees an empty registry
and returns the misleading "No web extract provider configured" error
even when the user has both the config key set AND the API key
exported.
Mirrors :func:`tools.browser_tool._ensure_browser_plugins_loaded` —
every other plugin-backed dispatcher (image_gen, video_gen, browser,
skills) already does this.
"""
def _clear_registry(self):
"""Reset the web_search registry to empty and return a callback
that restores the original contents. Used in a try/finally so the
snapshot is restored even when the dispatcher under test raises."""
from agent import web_search_registry
with web_search_registry._lock:
original = dict(web_search_registry._providers)
web_search_registry._providers.clear()
def _restore():
with web_search_registry._lock:
web_search_registry._providers.clear()
web_search_registry._providers.update(original)
return _restore
def test_web_extract_tool_runs_discovery_before_registry_lookup(self, monkeypatch):
"""``web_extract_tool`` must invoke ``_ensure_web_plugins_loaded()``
before looking up the configured backend so the registry is
populated even from cold-start subprocess contexts.
Without the fix, ``get_provider('firecrawl')`` returns ``None``
on a fresh process and the dispatcher emits "No web extract
provider configured" despite the user having both
``web.extract_backend: firecrawl`` and ``FIRECRAWL_API_KEY`` set
(issue #27580).
"""
import asyncio
import json
from unittest.mock import MagicMock
from agent.web_search_provider import WebSearchProvider
from agent import web_search_registry
from tools import web_tools
restore = self._clear_registry()
try:
class FakeFirecrawl(WebSearchProvider):
@property
def name(self) -> str:
return "firecrawl"
@property
def display_name(self) -> str:
return "Fake Firecrawl"
def is_available(self) -> bool:
return True
def supports_extract(self) -> bool:
return True
async def extract(self, urls, format=None):
return [
{"url": u, "title": "", "content": "ok",
"raw_content": "ok", "metadata": {}}
for u in urls
]
# Simulate "plugin discovery loads the firecrawl plugin": the
# wrapped helper registers the provider, mirroring what
# ``plugins/web/firecrawl/__init__.py:register`` does at
# real-process startup. Wrapping with ``MagicMock`` lets us
# also assert the dispatcher actually invoked the hook — if
# a future refactor accidentally drops the call the regression
# would otherwise hide behind a still-populated registry.
def _register_fake() -> None:
if web_search_registry.get_provider("firecrawl") is None:
web_search_registry.register_provider(FakeFirecrawl())
mock_hook = MagicMock(wraps=_register_fake)
# Patch the helper on ``tools.web_tools`` directly rather than the
# underlying ``hermes_cli.plugins._ensure_plugins_discovered`` so
# the test stays valid even if the import inside the helper is
# later moved to module scope or renamed.
monkeypatch.setattr(
web_tools, "_ensure_web_plugins_loaded", mock_hook
)
monkeypatch.setattr(
web_tools, "_load_web_config",
lambda: {"extract_backend": "firecrawl"},
)
# Sanity: registry IS empty before the tool call.
assert web_search_registry.get_provider("firecrawl") is None
result = json.loads(asyncio.run(
web_tools.web_extract_tool(
["https://example.com"],
use_llm_processing=False,
)
))
# The hook must have been called BEFORE the registry lookup —
# that is the invariant under regression test. Without the
# explicit ``.called`` assertion the test could pass if the
# registry were populated by some unrelated side effect.
assert mock_hook.called, (
"web_extract_tool must call _ensure_web_plugins_loaded() "
"before resolving the registry"
)
assert "No web extract provider configured" not in json.dumps(result)
assert web_search_registry.get_provider("firecrawl") is not None
finally:
restore()
def test_web_search_tool_runs_discovery_before_registry_lookup(self, monkeypatch):
"""``web_search_tool`` must invoke ``_ensure_web_plugins_loaded()``
before the registry lookup for the same reason as the extract
path (issue #27580 root cause applies to all dispatchers).
"""
import json
from unittest.mock import MagicMock
from agent.web_search_provider import WebSearchProvider
from agent import web_search_registry
from tools import web_tools
restore = self._clear_registry()
try:
class FakeBrave(WebSearchProvider):
@property
def name(self) -> str:
return "brave-free"
@property
def display_name(self) -> str:
return "Fake Brave"
def is_available(self) -> bool:
return True
def supports_search(self) -> bool:
return True
def search(self, query, limit=5):
return {"success": True, "data": {"web": [
{"title": "ok", "url": "https://x", "description": "",
"position": 0}
]}}
def _register_fake() -> None:
if web_search_registry.get_provider("brave-free") is None:
web_search_registry.register_provider(FakeBrave())
mock_hook = MagicMock(wraps=_register_fake)
monkeypatch.setattr(
web_tools, "_ensure_web_plugins_loaded", mock_hook
)
monkeypatch.setattr(
web_tools, "_load_web_config",
lambda: {"search_backend": "brave-free"},
)
assert web_search_registry.get_provider("brave-free") is None
result = json.loads(web_tools.web_search_tool("hello", limit=1))
assert mock_hook.called, (
"web_search_tool must call _ensure_web_plugins_loaded() "
"before resolving the registry"
)
assert "No web search provider configured" not in json.dumps(result)
assert web_search_registry.get_provider("brave-free") is not None
finally:
restore()

View File

@ -732,6 +732,35 @@ def clean_base64_images(text: str) -> str:
# dispatchers in this file resolve them via get_active_*_provider().
def _ensure_web_plugins_loaded() -> None:
"""Idempotently trigger plugin discovery so the web registry is populated.
Every bundled web provider (brave-free, ddgs, searxng, exa, parallel,
tavily, firecrawl) registers itself via ``plugins/web/<vendor>/__init__.py``
during plugin discovery. Tool dispatch can be reached from contexts that
haven't already triggered discovery — subprocess agent runs, delegate
children, standalone scripts, certain test paths — and without it the
registry is empty and ``get_provider('firecrawl')`` returns ``None`` even
when the user has ``web.extract_backend: firecrawl`` configured and
``FIRECRAWL_API_KEY`` set. The symptom is a misleading "No web extract
provider configured" error (issue #27580).
Mirrors :func:`tools.browser_tool._ensure_browser_plugins_loaded` exactly:
the underlying discovery call is idempotent and cheap on subsequent
invocations.
"""
try:
from hermes_cli.plugins import _ensure_plugins_discovered
_ensure_plugins_discovered()
except Exception as exc: # noqa: BLE001
# Warning, not debug: if a plugin import is genuinely broken the
# user otherwise hits the misleading "No web extract provider
# configured" error this helper is meant to eliminate, with no
# clue in normal logs about the real cause.
logger.warning("Web plugin discovery failed (non-fatal): %s", exc)
def web_search_tool(query: str, limit: int = 5) -> str:
"""
Search the web for information using available search API backend.
@ -792,6 +821,7 @@ def web_search_tool(query: str, limit: int = 5) -> str:
# (brave-free, ddgs, searxng, exa, parallel, tavily, firecrawl)
# now live as plugins; the dispatcher is just a registry lookup +
# delegation. Sync only — every provider's search() is sync.
_ensure_web_plugins_loaded()
from agent.web_search_registry import (
get_active_search_provider,
get_provider as _wsp_get_provider,
@ -924,6 +954,7 @@ async def web_extract_tool(
# detect coroutine functions and await; sync functions run
# inline (the policy gate, SSRF re-check, etc. live inside the
# provider itself for the firecrawl per-URL loop).
_ensure_web_plugins_loaded()
from agent.web_search_registry import (
get_active_extract_provider,
get_provider as _wsp_get_provider,