fix(tool-search): scope bridge catalog + dispatch to the session's toolsets

Tool Search read its catalog from the global registry (get_tool_definitions
with no toolset scope = 'start with everything'), so a restricted-toolset
session — subagent, kanban worker, curated gateway session — could:

  1. tool_search the entire process registry, not just its granted tools, and
  2. tool_call any registered plugin/MCP tool it was never given, because
     registry.dispatch() has no enabled_tools gate for non-execute_code tools.

A scoped session (enabled_toolsets=['mcp-github']) reported total_available=26
and successfully invoked an out-of-scope plugin tool via tool_call.

Fix:
- handle_function_call gains enabled_toolsets/disabled_toolsets; the bridge
  dispatch scopes get_tool_definitions to them (also stops polluting the
  process-global _last_resolved_tool_names with out-of-scope tools, which
  leaked into execute_code's sandbox-tool fallback).
- A defense-in-depth gate rejects any tool_call'd name not in the scoped
  deferrable catalog.
- tool_executor's unwrap (both concurrent + sequential paths) enforces the
  same scope before dispatch, since it unwraps tool_call -> underlying name
  and bypasses the bridge branch. New _tool_search_scoped_names() helper,
  cached per-agent on registry generation + toolset scope.
- New scoped_deferrable_names() helper in tool_search.py shared by both sites.

Tests: 4 new regression tests in TestRegression_ToolsetScoping (scoped
catalog, out-of-scope tool_call rejection, no global pollution, helper).
This commit is contained in:
teknium1
2026-05-29 01:21:41 -07:00
committed by Teknium
parent 369075dc95
commit 7427b9d581
6 changed files with 303 additions and 30 deletions

View File

@ -1692,6 +1692,8 @@ def invoke_tool(agent, function_name: str, function_args: dict, effective_task_i
session_id=agent.session_id or "",
enabled_tools=list(agent.valid_tool_names) if agent.valid_tool_names else None,
skip_pre_tool_call_hook=True,
enabled_toolsets=getattr(agent, "enabled_toolsets", None),
disabled_toolsets=getattr(agent, "disabled_toolsets", None),
)

View File

@ -62,6 +62,55 @@ def _ra():
return run_agent
def _tool_search_scoped_names(agent) -> frozenset:
"""Return the deferrable tool names the session may invoke via tool_call.
The Tool Search unwrap dispatches the underlying tool directly, bypassing
the bridge branch (and its scope check) in
``model_tools.handle_function_call``. To keep a restricted-toolset session
(subagent, kanban worker, curated gateway session) from reaching tools it
was never granted, the unwrap validates the underlying name against this
set: the deferrable subset of the session's own enabled/disabled toolset
scope.
Result is cached on the agent and refreshed when the tool registry's
generation changes (e.g. an MCP server reconnects), so the common case is
a dict lookup, not a full tool-defs rebuild on every tool call.
"""
try:
import model_tools
from tools import tool_search as _ts
from tools.registry import registry as _registry
except Exception:
return frozenset()
enabled = getattr(agent, "enabled_toolsets", None)
disabled = getattr(agent, "disabled_toolsets", None)
cache_key = (
getattr(_registry, "_generation", 0),
frozenset(enabled) if enabled is not None else None,
frozenset(disabled) if disabled is not None else None,
)
cached = getattr(agent, "_tool_search_scope_cache", None)
if cached is not None and cached[0] == cache_key:
return cached[1]
try:
scoped_defs = model_tools.get_tool_definitions(
enabled_toolsets=enabled,
disabled_toolsets=disabled,
quiet_mode=True,
skip_tool_search_assembly=True,
) or []
names = _ts.scoped_deferrable_names(scoped_defs)
except Exception:
names = frozenset()
try:
agent._tool_search_scope_cache = (cache_key, names)
except Exception:
pass
return names
def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
"""Execute multiple tool calls concurrently using a thread pool.
@ -110,13 +159,28 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
# The original tool_call entry on ``tool_call.function`` is left
# untouched so the conversation transcript and the matching
# tool_call_id are preserved exactly as the model emitted them.
#
# Scope gate: the unwrap dispatches the underlying tool directly
# (bypassing the bridge branch in handle_function_call and its
# scope check), so we enforce session toolset scope HERE. A tool
# the session was not granted is rejected before any checkpoint,
# hook, or dispatch fires.
_ts_scope_block = None
try:
from tools import tool_search as _ts
if function_name == _ts.TOOL_CALL_NAME:
_underlying, _underlying_args, _err = _ts.resolve_underlying_call(function_args)
if not _err and _underlying:
function_name = _underlying
function_args = _underlying_args
if _underlying in _tool_search_scoped_names(agent):
function_name = _underlying
function_args = _underlying_args
else:
_ts_scope_block = json.dumps({
"error": (
f"'{_underlying}' is not available in this session. "
"Use tool_search to find tools you can call."
),
}, ensure_ascii=False)
except Exception:
pass
@ -144,21 +208,25 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
block_result = None
blocked_by_guardrail = False
try:
from hermes_cli.plugins import get_pre_tool_call_block_message
block_message = get_pre_tool_call_block_message(
function_name, function_args, task_id=effective_task_id or "",
)
except Exception:
block_message = None
if block_message is not None:
block_result = json.dumps({"error": block_message}, ensure_ascii=False)
if _ts_scope_block is not None:
# Out-of-scope tool_call: reject before hooks/guardrails/dispatch.
block_result = _ts_scope_block
else:
guardrail_decision = agent._tool_guardrails.before_call(function_name, function_args)
if not guardrail_decision.allows_execution:
block_result = agent._guardrail_block_result(guardrail_decision)
blocked_by_guardrail = True
try:
from hermes_cli.plugins import get_pre_tool_call_block_message
block_message = get_pre_tool_call_block_message(
function_name, function_args, task_id=effective_task_id or "",
)
except Exception:
block_message = None
if block_message is not None:
block_result = json.dumps({"error": block_message}, ensure_ascii=False)
else:
guardrail_decision = agent._tool_guardrails.before_call(function_name, function_args)
if not guardrail_decision.allows_execution:
block_result = agent._guardrail_block_result(guardrail_decision)
blocked_by_guardrail = True
parsed_calls.append((tool_call, function_name, function_args, block_result, blocked_by_guardrail))
@ -517,26 +585,38 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
if not isinstance(function_args, dict):
function_args = {}
# Tool Search unwrap — see _execute_tool_calls for full rationale.
# Tool Search unwrap — see execute_tool_calls_concurrent for full
# rationale, including the scope gate (the unwrap dispatches the
# underlying tool directly, so session toolset scope is enforced here).
_ts_scope_block: Optional[str] = None
try:
from tools import tool_search as _ts
if function_name == _ts.TOOL_CALL_NAME:
_underlying, _underlying_args, _err = _ts.resolve_underlying_call(function_args)
if not _err and _underlying:
function_name = _underlying
function_args = _underlying_args
if _underlying in _tool_search_scoped_names(agent):
function_name = _underlying
function_args = _underlying_args
else:
_ts_scope_block = (
f"'{_underlying}' is not available in this session. "
"Use tool_search to find tools you can call."
)
except Exception:
pass
# Check plugin hooks for a block directive before executing.
_block_msg: Optional[str] = None
try:
from hermes_cli.plugins import get_pre_tool_call_block_message
_block_msg = get_pre_tool_call_block_message(
function_name, function_args, task_id=effective_task_id or "",
)
except Exception:
pass
if _ts_scope_block is not None:
_block_msg = _ts_scope_block
else:
try:
from hermes_cli.plugins import get_pre_tool_call_block_message
_block_msg = get_pre_tool_call_block_message(
function_name, function_args, task_id=effective_task_id or "",
)
except Exception:
pass
_guardrail_block_decision: ToolGuardrailDecision | None = None
if _block_msg is None:
@ -783,6 +863,8 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
session_id=agent.session_id or "",
enabled_tools=list(agent.valid_tool_names) if agent.valid_tool_names else None,
skip_pre_tool_call_hook=True,
enabled_toolsets=getattr(agent, "enabled_toolsets", None),
disabled_toolsets=getattr(agent, "disabled_toolsets", None),
)
_spinner_result = function_result
except Exception as tool_error:
@ -803,6 +885,8 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
session_id=agent.session_id or "",
enabled_tools=list(agent.valid_tool_names) if agent.valid_tool_names else None,
skip_pre_tool_call_hook=True,
enabled_toolsets=getattr(agent, "enabled_toolsets", None),
disabled_toolsets=getattr(agent, "disabled_toolsets", None),
)
except Exception as tool_error:
function_result = f"Error executing tool '{function_name}': {tool_error}"

View File

@ -262,8 +262,8 @@ def _clear_tool_defs_cache() -> None:
def get_tool_definitions(
enabled_toolsets: List[str] = None,
disabled_toolsets: List[str] = None,
enabled_toolsets: Optional[List[str]] = None,
disabled_toolsets: Optional[List[str]] = None,
quiet_mode: bool = False,
skip_tool_search_assembly: bool = False,
) -> List[Dict[str, Any]]:
@ -335,8 +335,8 @@ def get_tool_definitions(
def _compute_tool_definitions(
enabled_toolsets: List[str] = None,
disabled_toolsets: List[str] = None,
enabled_toolsets: Optional[List[str]] = None,
disabled_toolsets: Optional[List[str]] = None,
quiet_mode: bool = False,
skip_tool_search_assembly: bool = False,
) -> List[Dict[str, Any]]:
@ -808,6 +808,8 @@ def handle_function_call(
user_task: Optional[str] = None,
enabled_tools: Optional[List[str]] = None,
skip_pre_tool_call_hook: bool = False,
enabled_toolsets: Optional[List[str]] = None,
disabled_toolsets: Optional[List[str]] = None,
) -> str:
"""
Main function call dispatcher that routes calls to the tool registry.
@ -821,6 +823,14 @@ def handle_function_call(
execute_code uses this list to determine which sandbox
tools to generate. Falls back to the process-global
``_last_resolved_tool_names`` for backward compat.
enabled_toolsets: The session's enabled toolsets. Used to scope the
Tool Search bridge catalog so ``tool_search`` /
``tool_describe`` / ``tool_call`` only see and invoke
tools the session was actually granted. ``None`` means
"no restriction" (the caller scopes to every toolset),
matching ``get_tool_definitions`` semantics.
disabled_toolsets: The session's disabled toolsets, applied as a
subtraction when scoping the bridge catalog.
Returns:
Function result as a JSON string.
@ -844,7 +854,19 @@ def handle_function_call(
# Use skip_tool_search_assembly=True so we see the real catalog,
# not the already-collapsed bridge-only list (the bridge would
# otherwise be searching only itself).
#
# Scope the catalog to the session's toolsets so the bridge can
# only surface and invoke tools the session was actually granted.
# Without this, a restricted-toolset session (subagent, kanban
# worker, curated gateway session) would see and be able to call
# the entire process registry via the bridge. Passing the same
# enabled/disabled toolsets the session was assembled with keeps
# the deferred catalog identical to the deferrable subset of the
# session's own tool list, and avoids polluting the process-global
# _last_resolved_tool_names with out-of-scope tools.
current_defs = get_tool_definitions(
enabled_toolsets=enabled_toolsets,
disabled_toolsets=disabled_toolsets,
quiet_mode=True, skip_tool_search_assembly=True,
) or []
except Exception:
@ -860,6 +882,20 @@ def handle_function_call(
if err or not underlying_name:
return json.dumps({"error": err or "tool_call could not be resolved"},
ensure_ascii=False)
# Defense in depth: the underlying tool MUST be in the session's
# scoped deferrable catalog. resolve_underlying_call() only checks
# that the name is deferrable in the global registry; this gate
# additionally rejects any tool the session was not granted, so a
# restricted session can never invoke an out-of-scope tool through
# the bridge even if the catalog scoping above regressed.
_scoped_deferrable = _ts_mod.scoped_deferrable_names(current_defs)
if underlying_name not in _scoped_deferrable:
return json.dumps({
"error": (
f"'{underlying_name}' is not available in this session. "
"Use tool_search to find tools you can call."
),
}, ensure_ascii=False)
# Recurse with the underlying tool. All hooks fire against the
# real tool name. The bridge is invisible to hooks by design.
return handle_function_call(
@ -871,6 +907,8 @@ def handle_function_call(
user_task=user_task,
enabled_tools=enabled_tools,
skip_pre_tool_call_hook=skip_pre_tool_call_hook,
enabled_toolsets=enabled_toolsets,
disabled_toolsets=disabled_toolsets,
)
try:

View File

@ -415,3 +415,124 @@ class TestRegression_OpenClawCron84141:
assert err is not None
assert "not a deferrable" in err
class TestRegression_ToolsetScoping:
"""A restricted-toolset session must not see or invoke out-of-scope tools.
The bug: the bridge dispatch and the tool_executor unwrap read the
catalog from the *global* registry (get_tool_definitions with no
toolset scope = "start with everything"), so a session scoped to one
MCP server could tool_search the entire process registry and tool_call
any plugin tool it was never granted. registry.dispatch() has no
enabled_tools gate for non-execute_code tools, so the out-of-scope tool
actually ran.
The fix threads the session's enabled/disabled toolsets into the bridge
dispatch (model_tools.handle_function_call) and the executor unwrap
(agent.tool_executor), scoping both the searchable catalog and the
invocable set to the session's own toolsets.
"""
@staticmethod
def _register(name, toolset):
from tools.registry import registry
def _handler(args, task_id=None, **kw):
return json.dumps({"ok": True, "tool": name})
registry.register(
name=name,
handler=_handler,
schema=_td(name, f"desc for {name}", {"repo": {"type": "string"}}),
toolset=toolset,
)
def test_search_catalog_is_scoped_to_session_toolsets(self):
import model_tools
for i in range(12):
self._register(f"mcp_scoped_gh_{i}", "mcp-scoped-gh")
self._register("scoped_oos_plugin", "scopedoosplugin")
# tool_search scoped to the github toolset must not count the
# out-of-scope plugin tool (or any of the host registry).
result = model_tools.handle_function_call(
function_name="tool_search",
function_args={"query": "mcp_scoped_gh", "limit": 5},
enabled_toolsets=["mcp-scoped-gh"],
)
parsed = json.loads(result)
assert parsed["total_available"] == 12, (
f"expected scoped catalog of 12, got {parsed['total_available']} "
"— catalog leaked tools outside the session's toolsets"
)
hit_names = {m["name"] for m in parsed["matches"]}
assert "scoped_oos_plugin" not in hit_names
def test_tool_call_rejects_out_of_scope_tool(self):
import model_tools
self._register("mcp_inscope_gh_op", "mcp-inscope-gh")
self._register("inscope_oos_plugin", "inscopeoosplugin")
# Out-of-scope plugin tool: rejected even though it is registered
# and deferrable in the global registry.
rejected = json.loads(model_tools.handle_function_call(
function_name="tool_call",
function_args={"name": "inscope_oos_plugin", "arguments": {}},
enabled_toolsets=["mcp-inscope-gh"],
))
assert "error" in rejected
assert "not available in this session" in rejected["error"]
# In-scope tool: dispatches normally.
ok = json.loads(model_tools.handle_function_call(
function_name="tool_call",
function_args={"name": "mcp_inscope_gh_op", "arguments": {"repo": "a/b"}},
enabled_toolsets=["mcp-inscope-gh"],
))
assert ok.get("ok") is True
assert ok.get("tool") == "mcp_inscope_gh_op"
def test_bridge_dispatch_does_not_pollute_global_resolved_names(self):
import model_tools
self._register("mcp_pollute_op_0", "mcp-pollute")
self._register("mcp_pollute_op_1", "mcp-pollute")
# Establish the scoped session global.
model_tools.get_tool_definitions(
enabled_toolsets=["mcp-pollute"], quiet_mode=True,
)
before = set(model_tools._last_resolved_tool_names)
assert "terminal" not in before
# A scoped tool_search call must not widen the process-global
# _last_resolved_tool_names to the whole registry (which would leak
# core/sandbox tools into execute_code's fallback).
model_tools.handle_function_call(
function_name="tool_search",
function_args={"query": "pollute"},
enabled_toolsets=["mcp-pollute"],
)
after = set(model_tools._last_resolved_tool_names)
assert "terminal" not in after, (
"bridge dispatch polluted _last_resolved_tool_names with "
"out-of-scope tools"
)
def test_scoped_deferrable_names_helper(self):
from tools.tool_search import scoped_deferrable_names
self._register("mcp_helper_op", "mcp-helper")
import model_tools
defs = model_tools.get_tool_definitions(
enabled_toolsets=["mcp-helper"],
quiet_mode=True,
skip_tool_search_assembly=True,
)
names = scoped_deferrable_names(defs)
assert "mcp_helper_op" in names
# core tools are never deferrable
assert "terminal" not in names

View File

@ -657,6 +657,26 @@ def dispatch_tool_describe(args: Dict[str, Any],
}, ensure_ascii=False)
def scoped_deferrable_names(tool_defs: List[Dict[str, Any]]) -> frozenset[str]:
"""Return the set of deferrable tool names present in ``tool_defs``.
``tool_defs`` is expected to be the *pre-assembly* tool list for the
current session's toolset scope (i.e. what
``get_tool_definitions(skip_tool_search_assembly=True)`` returns for the
session's enabled/disabled toolsets). The resulting set is the universe of
tools the session may legitimately reach through ``tool_call``. Used as a
scoping gate by both the ``model_tools`` bridge dispatch and the
``tool_executor`` unwrap so a restricted-toolset session can never invoke
an out-of-scope tool via the bridge.
"""
names: set[str] = set()
for td in tool_defs:
name = (td.get("function") or {}).get("name", "")
if name and is_deferrable_tool_name(name):
names.add(name)
return frozenset(names)
def resolve_underlying_call(args: Dict[str, Any]) -> Tuple[Optional[str], Dict[str, Any], Optional[str]]:
"""Parse a ``tool_call`` invocation into (underlying_name, args, error_msg).
@ -711,4 +731,5 @@ __all__ = [
"dispatch_tool_search",
"dispatch_tool_describe",
"resolve_underlying_call",
"scoped_deferrable_names",
]

View File

@ -139,6 +139,13 @@ to any progressive-disclosure design, not specific to this implementation:
tool-defs list every assembly — no session-keyed `Map`. This avoids
the class of bug where a stored catalog drifts out of sync with the
live tool registry.
- **The catalog is scoped to the session's toolsets.** `tool_search`,
`tool_describe`, and `tool_call` only ever see and invoke tools the
session was actually granted. A subagent, kanban worker, or gateway
session restricted to a subset of toolsets cannot use the bridge to
discover or call a tool outside that subset — the deferred catalog is
the deferrable slice of the session's own enabled/disabled toolsets,
not the whole process registry.
- **No JS sandbox.** Hermes uses the simpler "structured tools" mode
(search / describe / call as plain functions). The JS-sandbox "code
mode" some other implementations offer is a large surface area; we