fix(web): ensure plugin discovery before web_*_tool registry lookups
Web search/extract dispatch read agent.web_search_registry before plugin
discovery had run, so in any process that hadn't imported model_tools.py
(subprocess agent runs, delegate children, standalone scripts) the registry
was empty: get_provider('firecrawl') returned None and the dispatcher emitted
the misleading 'No web extract provider configured' error even with
web.extract_backend set and FIRECRAWL_API_KEY exported.
Adds an idempotent _ensure_web_plugins_loaded() helper (mirrors
tools.browser_tool._ensure_browser_plugins_loaded) and calls it at the top of
both the web_search_tool and web_extract_tool dispatch sites before the
registry lookup.
Fixes #27580.
Co-authored-by: briandevans <252620095+briandevans@users.noreply.github.com>
This commit is contained in:
@ -311,3 +311,184 @@ class TestUnconfiguredErrorEnvelopeParity:
|
||||
# No per-result burying
|
||||
assert "results" not in result
|
||||
|
||||
|
||||
class TestDispatchersTriggerPluginDiscovery:
|
||||
"""Regression tests for #27580: each web_*_tool dispatcher must
|
||||
idempotently call ``_ensure_web_plugins_loaded()`` before consulting
|
||||
``agent.web_search_registry``.
|
||||
|
||||
Without this, a tool call from a context that hasn't already loaded
|
||||
plugins (subprocess agent runs, delegate children, standalone scripts,
|
||||
test paths that import the registry directly) sees an empty registry
|
||||
and returns the misleading "No web extract provider configured" error
|
||||
even when the user has both the config key set AND the API key
|
||||
exported.
|
||||
|
||||
Mirrors :func:`tools.browser_tool._ensure_browser_plugins_loaded` —
|
||||
every other plugin-backed dispatcher (image_gen, video_gen, browser,
|
||||
skills) already does this.
|
||||
"""
|
||||
|
||||
def _clear_registry(self):
|
||||
"""Reset the web_search registry to empty and return a callback
|
||||
that restores the original contents. Used in a try/finally so the
|
||||
snapshot is restored even when the dispatcher under test raises."""
|
||||
from agent import web_search_registry
|
||||
|
||||
with web_search_registry._lock:
|
||||
original = dict(web_search_registry._providers)
|
||||
web_search_registry._providers.clear()
|
||||
|
||||
def _restore():
|
||||
with web_search_registry._lock:
|
||||
web_search_registry._providers.clear()
|
||||
web_search_registry._providers.update(original)
|
||||
|
||||
return _restore
|
||||
|
||||
def test_web_extract_tool_runs_discovery_before_registry_lookup(self, monkeypatch):
|
||||
"""``web_extract_tool`` must invoke ``_ensure_web_plugins_loaded()``
|
||||
before looking up the configured backend so the registry is
|
||||
populated even from cold-start subprocess contexts.
|
||||
|
||||
Without the fix, ``get_provider('firecrawl')`` returns ``None``
|
||||
on a fresh process and the dispatcher emits "No web extract
|
||||
provider configured" despite the user having both
|
||||
``web.extract_backend: firecrawl`` and ``FIRECRAWL_API_KEY`` set
|
||||
(issue #27580).
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
from unittest.mock import MagicMock
|
||||
from agent.web_search_provider import WebSearchProvider
|
||||
from agent import web_search_registry
|
||||
from tools import web_tools
|
||||
|
||||
restore = self._clear_registry()
|
||||
try:
|
||||
class FakeFirecrawl(WebSearchProvider):
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "firecrawl"
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
return "Fake Firecrawl"
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
def supports_extract(self) -> bool:
|
||||
return True
|
||||
|
||||
async def extract(self, urls, format=None):
|
||||
return [
|
||||
{"url": u, "title": "", "content": "ok",
|
||||
"raw_content": "ok", "metadata": {}}
|
||||
for u in urls
|
||||
]
|
||||
|
||||
# Simulate "plugin discovery loads the firecrawl plugin": the
|
||||
# wrapped helper registers the provider, mirroring what
|
||||
# ``plugins/web/firecrawl/__init__.py:register`` does at
|
||||
# real-process startup. Wrapping with ``MagicMock`` lets us
|
||||
# also assert the dispatcher actually invoked the hook — if
|
||||
# a future refactor accidentally drops the call the regression
|
||||
# would otherwise hide behind a still-populated registry.
|
||||
def _register_fake() -> None:
|
||||
if web_search_registry.get_provider("firecrawl") is None:
|
||||
web_search_registry.register_provider(FakeFirecrawl())
|
||||
|
||||
mock_hook = MagicMock(wraps=_register_fake)
|
||||
# Patch the helper on ``tools.web_tools`` directly rather than the
|
||||
# underlying ``hermes_cli.plugins._ensure_plugins_discovered`` so
|
||||
# the test stays valid even if the import inside the helper is
|
||||
# later moved to module scope or renamed.
|
||||
monkeypatch.setattr(
|
||||
web_tools, "_ensure_web_plugins_loaded", mock_hook
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
web_tools, "_load_web_config",
|
||||
lambda: {"extract_backend": "firecrawl"},
|
||||
)
|
||||
# Sanity: registry IS empty before the tool call.
|
||||
assert web_search_registry.get_provider("firecrawl") is None
|
||||
|
||||
result = json.loads(asyncio.run(
|
||||
web_tools.web_extract_tool(
|
||||
["https://example.com"],
|
||||
use_llm_processing=False,
|
||||
)
|
||||
))
|
||||
|
||||
# The hook must have been called BEFORE the registry lookup —
|
||||
# that is the invariant under regression test. Without the
|
||||
# explicit ``.called`` assertion the test could pass if the
|
||||
# registry were populated by some unrelated side effect.
|
||||
assert mock_hook.called, (
|
||||
"web_extract_tool must call _ensure_web_plugins_loaded() "
|
||||
"before resolving the registry"
|
||||
)
|
||||
assert "No web extract provider configured" not in json.dumps(result)
|
||||
assert web_search_registry.get_provider("firecrawl") is not None
|
||||
finally:
|
||||
restore()
|
||||
|
||||
def test_web_search_tool_runs_discovery_before_registry_lookup(self, monkeypatch):
|
||||
"""``web_search_tool`` must invoke ``_ensure_web_plugins_loaded()``
|
||||
before the registry lookup for the same reason as the extract
|
||||
path (issue #27580 root cause applies to all dispatchers).
|
||||
"""
|
||||
import json
|
||||
from unittest.mock import MagicMock
|
||||
from agent.web_search_provider import WebSearchProvider
|
||||
from agent import web_search_registry
|
||||
from tools import web_tools
|
||||
|
||||
restore = self._clear_registry()
|
||||
try:
|
||||
class FakeBrave(WebSearchProvider):
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "brave-free"
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
return "Fake Brave"
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
def supports_search(self) -> bool:
|
||||
return True
|
||||
|
||||
def search(self, query, limit=5):
|
||||
return {"success": True, "data": {"web": [
|
||||
{"title": "ok", "url": "https://x", "description": "",
|
||||
"position": 0}
|
||||
]}}
|
||||
|
||||
def _register_fake() -> None:
|
||||
if web_search_registry.get_provider("brave-free") is None:
|
||||
web_search_registry.register_provider(FakeBrave())
|
||||
|
||||
mock_hook = MagicMock(wraps=_register_fake)
|
||||
monkeypatch.setattr(
|
||||
web_tools, "_ensure_web_plugins_loaded", mock_hook
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
web_tools, "_load_web_config",
|
||||
lambda: {"search_backend": "brave-free"},
|
||||
)
|
||||
assert web_search_registry.get_provider("brave-free") is None
|
||||
|
||||
result = json.loads(web_tools.web_search_tool("hello", limit=1))
|
||||
assert mock_hook.called, (
|
||||
"web_search_tool must call _ensure_web_plugins_loaded() "
|
||||
"before resolving the registry"
|
||||
)
|
||||
assert "No web search provider configured" not in json.dumps(result)
|
||||
assert web_search_registry.get_provider("brave-free") is not None
|
||||
finally:
|
||||
restore()
|
||||
|
||||
|
||||
@ -732,6 +732,35 @@ def clean_base64_images(text: str) -> str:
|
||||
# dispatchers in this file resolve them via get_active_*_provider().
|
||||
|
||||
|
||||
def _ensure_web_plugins_loaded() -> None:
|
||||
"""Idempotently trigger plugin discovery so the web registry is populated.
|
||||
|
||||
Every bundled web provider (brave-free, ddgs, searxng, exa, parallel,
|
||||
tavily, firecrawl) registers itself via ``plugins/web/<vendor>/__init__.py``
|
||||
during plugin discovery. Tool dispatch can be reached from contexts that
|
||||
haven't already triggered discovery — subprocess agent runs, delegate
|
||||
children, standalone scripts, certain test paths — and without it the
|
||||
registry is empty and ``get_provider('firecrawl')`` returns ``None`` even
|
||||
when the user has ``web.extract_backend: firecrawl`` configured and
|
||||
``FIRECRAWL_API_KEY`` set. The symptom is a misleading "No web extract
|
||||
provider configured" error (issue #27580).
|
||||
|
||||
Mirrors :func:`tools.browser_tool._ensure_browser_plugins_loaded` exactly:
|
||||
the underlying discovery call is idempotent and cheap on subsequent
|
||||
invocations.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.plugins import _ensure_plugins_discovered
|
||||
|
||||
_ensure_plugins_discovered()
|
||||
except Exception as exc: # noqa: BLE001
|
||||
# Warning, not debug: if a plugin import is genuinely broken the
|
||||
# user otherwise hits the misleading "No web extract provider
|
||||
# configured" error this helper is meant to eliminate, with no
|
||||
# clue in normal logs about the real cause.
|
||||
logger.warning("Web plugin discovery failed (non-fatal): %s", exc)
|
||||
|
||||
|
||||
def web_search_tool(query: str, limit: int = 5) -> str:
|
||||
"""
|
||||
Search the web for information using available search API backend.
|
||||
@ -792,6 +821,7 @@ def web_search_tool(query: str, limit: int = 5) -> str:
|
||||
# (brave-free, ddgs, searxng, exa, parallel, tavily, firecrawl)
|
||||
# now live as plugins; the dispatcher is just a registry lookup +
|
||||
# delegation. Sync only — every provider's search() is sync.
|
||||
_ensure_web_plugins_loaded()
|
||||
from agent.web_search_registry import (
|
||||
get_active_search_provider,
|
||||
get_provider as _wsp_get_provider,
|
||||
@ -924,6 +954,7 @@ async def web_extract_tool(
|
||||
# detect coroutine functions and await; sync functions run
|
||||
# inline (the policy gate, SSRF re-check, etc. live inside the
|
||||
# provider itself for the firecrawl per-URL loop).
|
||||
_ensure_web_plugins_loaded()
|
||||
from agent.web_search_registry import (
|
||||
get_active_extract_provider,
|
||||
get_provider as _wsp_get_provider,
|
||||
|
||||
Reference in New Issue
Block a user