fix(tool-search): scope bridge catalog + dispatch to the session's toolsets

Tool Search read its catalog from the global registry (get_tool_definitions with no toolset scope = 'start with everything'), so a restricted-toolset session — subagent, kanban worker, curated gateway session — could: 1. tool_search the entire process registry, not just its granted tools, and 2. tool_call any registered plugin/MCP tool it was never given, because registry.dispatch() has no enabled_tools gate for non-execute_code tools. A scoped session (enabled_toolsets=['mcp-github']) reported total_available=26 and successfully invoked an out-of-scope plugin tool via tool_call. Fix: - handle_function_call gains enabled_toolsets/disabled_toolsets; the bridge dispatch scopes get_tool_definitions to them (also stops polluting the process-global _last_resolved_tool_names with out-of-scope tools, which leaked into execute_code's sandbox-tool fallback). - A defense-in-depth gate rejects any tool_call'd name not in the scoped deferrable catalog. - tool_executor's unwrap (both concurrent + sequential paths) enforces the same scope before dispatch, since it unwraps tool_call -> underlying name and bypasses the bridge branch. New _tool_search_scoped_names() helper, cached per-agent on registry generation + toolset scope. - New scoped_deferrable_names() helper in tool_search.py shared by both sites. Tests: 4 new regression tests in TestRegression_ToolsetScoping (scoped catalog, out-of-scope tool_call rejection, no global pollution, helper).
2026-05-29 01:21:41 -07:00
parent 369075dc95
commit 7427b9d581
6 changed files with 303 additions and 30 deletions
--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
@ -1692,6 +1692,8 @@ def invoke_tool(agent, function_name: str, function_args: dict, effective_task_i
            session_id=agent.session_id or "",
            enabled_tools=list(agent.valid_tool_names) if agent.valid_tool_names else None,
            skip_pre_tool_call_hook=True,
+            enabled_toolsets=getattr(agent, "enabled_toolsets", None),
+            disabled_toolsets=getattr(agent, "disabled_toolsets", None),
        )


--- a/agent/tool_executor.py
+++ b/agent/tool_executor.py
@ -62,6 +62,55 @@ def _ra():
    return run_agent


+def _tool_search_scoped_names(agent) -> frozenset:
+    """Return the deferrable tool names the session may invoke via tool_call.
+
+    The Tool Search unwrap dispatches the underlying tool directly, bypassing
+    the bridge branch (and its scope check) in
+    ``model_tools.handle_function_call``. To keep a restricted-toolset session
+    (subagent, kanban worker, curated gateway session) from reaching tools it
+    was never granted, the unwrap validates the underlying name against this
+    set: the deferrable subset of the session's own enabled/disabled toolset
+    scope.
+
+    Result is cached on the agent and refreshed when the tool registry's
+    generation changes (e.g. an MCP server reconnects), so the common case is
+    a dict lookup, not a full tool-defs rebuild on every tool call.
+    """
+    try:
+        import model_tools
+        from tools import tool_search as _ts
+        from tools.registry import registry as _registry
+    except Exception:
+        return frozenset()
+
+    enabled = getattr(agent, "enabled_toolsets", None)
+    disabled = getattr(agent, "disabled_toolsets", None)
+    cache_key = (
+        getattr(_registry, "_generation", 0),
+        frozenset(enabled) if enabled is not None else None,
+        frozenset(disabled) if disabled is not None else None,
+    )
+    cached = getattr(agent, "_tool_search_scope_cache", None)
+    if cached is not None and cached[0] == cache_key:
+        return cached[1]
+    try:
+        scoped_defs = model_tools.get_tool_definitions(
+            enabled_toolsets=enabled,
+            disabled_toolsets=disabled,
+            quiet_mode=True,
+            skip_tool_search_assembly=True,
+        ) or []
+        names = _ts.scoped_deferrable_names(scoped_defs)
+    except Exception:
+        names = frozenset()
+    try:
+        agent._tool_search_scope_cache = (cache_key, names)
+    except Exception:
+        pass
+    return names
+
+
 def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
    """Execute multiple tool calls concurrently using a thread pool.

@ -110,13 +159,28 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
        # The original tool_call entry on ``tool_call.function`` is left
        # untouched so the conversation transcript and the matching
        # tool_call_id are preserved exactly as the model emitted them.
+        #
+        # Scope gate: the unwrap dispatches the underlying tool directly
+        # (bypassing the bridge branch in handle_function_call and its
+        # scope check), so we enforce session toolset scope HERE. A tool
+        # the session was not granted is rejected before any checkpoint,
+        # hook, or dispatch fires.
+        _ts_scope_block = None
        try:
            from tools import tool_search as _ts
            if function_name == _ts.TOOL_CALL_NAME:
                _underlying, _underlying_args, _err = _ts.resolve_underlying_call(function_args)
                if not _err and _underlying:
-                    function_name = _underlying
-                    function_args = _underlying_args
+                    if _underlying in _tool_search_scoped_names(agent):
+                        function_name = _underlying
+                        function_args = _underlying_args
+                    else:
+                        _ts_scope_block = json.dumps({
+                            "error": (
+                                f"'{_underlying}' is not available in this session. "
+                                "Use tool_search to find tools you can call."
+                            ),
+                        }, ensure_ascii=False)
        except Exception:
            pass

@ -144,21 +208,25 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe

        block_result = None
        blocked_by_guardrail = False
-        try:
-            from hermes_cli.plugins import get_pre_tool_call_block_message
-            block_message = get_pre_tool_call_block_message(
-                function_name, function_args, task_id=effective_task_id or "",
-            )
-        except Exception:
-            block_message = None
-
-        if block_message is not None:
-            block_result = json.dumps({"error": block_message}, ensure_ascii=False)
+        if _ts_scope_block is not None:
+            # Out-of-scope tool_call: reject before hooks/guardrails/dispatch.
+            block_result = _ts_scope_block
        else:
-            guardrail_decision = agent._tool_guardrails.before_call(function_name, function_args)
-            if not guardrail_decision.allows_execution:
-                block_result = agent._guardrail_block_result(guardrail_decision)
-                blocked_by_guardrail = True
+            try:
+                from hermes_cli.plugins import get_pre_tool_call_block_message
+                block_message = get_pre_tool_call_block_message(
+                    function_name, function_args, task_id=effective_task_id or "",
+                )
+            except Exception:
+                block_message = None
+
+            if block_message is not None:
+                block_result = json.dumps({"error": block_message}, ensure_ascii=False)
+            else:
+                guardrail_decision = agent._tool_guardrails.before_call(function_name, function_args)
+                if not guardrail_decision.allows_execution:
+                    block_result = agent._guardrail_block_result(guardrail_decision)
+                    blocked_by_guardrail = True

        parsed_calls.append((tool_call, function_name, function_args, block_result, blocked_by_guardrail))

@ -517,26 +585,38 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
        if not isinstance(function_args, dict):
            function_args = {}

-        # Tool Search unwrap — see _execute_tool_calls for full rationale.
+        # Tool Search unwrap — see execute_tool_calls_concurrent for full
+        # rationale, including the scope gate (the unwrap dispatches the
+        # underlying tool directly, so session toolset scope is enforced here).
+        _ts_scope_block: Optional[str] = None
        try:
            from tools import tool_search as _ts
            if function_name == _ts.TOOL_CALL_NAME:
                _underlying, _underlying_args, _err = _ts.resolve_underlying_call(function_args)
                if not _err and _underlying:
-                    function_name = _underlying
-                    function_args = _underlying_args
+                    if _underlying in _tool_search_scoped_names(agent):
+                        function_name = _underlying
+                        function_args = _underlying_args
+                    else:
+                        _ts_scope_block = (
+                            f"'{_underlying}' is not available in this session. "
+                            "Use tool_search to find tools you can call."
+                        )
        except Exception:
            pass

        # Check plugin hooks for a block directive before executing.
        _block_msg: Optional[str] = None
-        try:
-            from hermes_cli.plugins import get_pre_tool_call_block_message
-            _block_msg = get_pre_tool_call_block_message(
-                function_name, function_args, task_id=effective_task_id or "",
-            )
-        except Exception:
-            pass
+        if _ts_scope_block is not None:
+            _block_msg = _ts_scope_block
+        else:
+            try:
+                from hermes_cli.plugins import get_pre_tool_call_block_message
+                _block_msg = get_pre_tool_call_block_message(
+                    function_name, function_args, task_id=effective_task_id or "",
+                )
+            except Exception:
+                pass

        _guardrail_block_decision: ToolGuardrailDecision | None = None
        if _block_msg is None:
@ -783,6 +863,8 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
                    session_id=agent.session_id or "",
                    enabled_tools=list(agent.valid_tool_names) if agent.valid_tool_names else None,
                    skip_pre_tool_call_hook=True,
+                    enabled_toolsets=getattr(agent, "enabled_toolsets", None),
+                    disabled_toolsets=getattr(agent, "disabled_toolsets", None),
                )
                _spinner_result = function_result
            except Exception as tool_error:
@ -803,6 +885,8 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
                    session_id=agent.session_id or "",
                    enabled_tools=list(agent.valid_tool_names) if agent.valid_tool_names else None,
                    skip_pre_tool_call_hook=True,
+                    enabled_toolsets=getattr(agent, "enabled_toolsets", None),
+                    disabled_toolsets=getattr(agent, "disabled_toolsets", None),
                )
            except Exception as tool_error:
                function_result = f"Error executing tool '{function_name}': {tool_error}"
--- a/model_tools.py
+++ b/model_tools.py
@ -262,8 +262,8 @@ def _clear_tool_defs_cache() -> None:


 def get_tool_definitions(
-    enabled_toolsets: List[str] = None,
-    disabled_toolsets: List[str] = None,
+    enabled_toolsets: Optional[List[str]] = None,
+    disabled_toolsets: Optional[List[str]] = None,
    quiet_mode: bool = False,
    skip_tool_search_assembly: bool = False,
 ) -> List[Dict[str, Any]]:
@ -335,8 +335,8 @@ def get_tool_definitions(


 def _compute_tool_definitions(
-    enabled_toolsets: List[str] = None,
-    disabled_toolsets: List[str] = None,
+    enabled_toolsets: Optional[List[str]] = None,
+    disabled_toolsets: Optional[List[str]] = None,
    quiet_mode: bool = False,
    skip_tool_search_assembly: bool = False,
 ) -> List[Dict[str, Any]]:
@ -808,6 +808,8 @@ def handle_function_call(
    user_task: Optional[str] = None,
    enabled_tools: Optional[List[str]] = None,
    skip_pre_tool_call_hook: bool = False,
+    enabled_toolsets: Optional[List[str]] = None,
+    disabled_toolsets: Optional[List[str]] = None,
 ) -> str:
    """
    Main function call dispatcher that routes calls to the tool registry.
@ -821,6 +823,14 @@ def handle_function_call(
                       execute_code uses this list to determine which sandbox
                       tools to generate.  Falls back to the process-global
                       ``_last_resolved_tool_names`` for backward compat.
+        enabled_toolsets: The session's enabled toolsets.  Used to scope the
+                       Tool Search bridge catalog so ``tool_search`` /
+                       ``tool_describe`` / ``tool_call`` only see and invoke
+                       tools the session was actually granted.  ``None`` means
+                       "no restriction" (the caller scopes to every toolset),
+                       matching ``get_tool_definitions`` semantics.
+        disabled_toolsets: The session's disabled toolsets, applied as a
+                       subtraction when scoping the bridge catalog.

    Returns:
        Function result as a JSON string.
@ -844,7 +854,19 @@ def handle_function_call(
            # Use skip_tool_search_assembly=True so we see the real catalog,
            # not the already-collapsed bridge-only list (the bridge would
            # otherwise be searching only itself).
+            #
+            # Scope the catalog to the session's toolsets so the bridge can
+            # only surface and invoke tools the session was actually granted.
+            # Without this, a restricted-toolset session (subagent, kanban
+            # worker, curated gateway session) would see and be able to call
+            # the entire process registry via the bridge. Passing the same
+            # enabled/disabled toolsets the session was assembled with keeps
+            # the deferred catalog identical to the deferrable subset of the
+            # session's own tool list, and avoids polluting the process-global
+            # _last_resolved_tool_names with out-of-scope tools.
            current_defs = get_tool_definitions(
+                enabled_toolsets=enabled_toolsets,
+                disabled_toolsets=disabled_toolsets,
                quiet_mode=True, skip_tool_search_assembly=True,
            ) or []
        except Exception:
@ -860,6 +882,20 @@ def handle_function_call(
            if err or not underlying_name:
                return json.dumps({"error": err or "tool_call could not be resolved"},
                                  ensure_ascii=False)
+            # Defense in depth: the underlying tool MUST be in the session's
+            # scoped deferrable catalog. resolve_underlying_call() only checks
+            # that the name is deferrable in the global registry; this gate
+            # additionally rejects any tool the session was not granted, so a
+            # restricted session can never invoke an out-of-scope tool through
+            # the bridge even if the catalog scoping above regressed.
+            _scoped_deferrable = _ts_mod.scoped_deferrable_names(current_defs)
+            if underlying_name not in _scoped_deferrable:
+                return json.dumps({
+                    "error": (
+                        f"'{underlying_name}' is not available in this session. "
+                        "Use tool_search to find tools you can call."
+                    ),
+                }, ensure_ascii=False)
            # Recurse with the underlying tool. All hooks fire against the
            # real tool name. The bridge is invisible to hooks by design.
            return handle_function_call(
@ -871,6 +907,8 @@ def handle_function_call(
                user_task=user_task,
                enabled_tools=enabled_tools,
                skip_pre_tool_call_hook=skip_pre_tool_call_hook,
+                enabled_toolsets=enabled_toolsets,
+                disabled_toolsets=disabled_toolsets,
            )

    try:
--- a/tests/tools/test_tool_search.py
+++ b/tests/tools/test_tool_search.py
@ -415,3 +415,124 @@ class TestRegression_OpenClawCron84141:
        assert err is not None
        assert "not a deferrable" in err

+
+class TestRegression_ToolsetScoping:
+    """A restricted-toolset session must not see or invoke out-of-scope tools.
+
+    The bug: the bridge dispatch and the tool_executor unwrap read the
+    catalog from the *global* registry (get_tool_definitions with no
+    toolset scope = "start with everything"), so a session scoped to one
+    MCP server could tool_search the entire process registry and tool_call
+    any plugin tool it was never granted. registry.dispatch() has no
+    enabled_tools gate for non-execute_code tools, so the out-of-scope tool
+    actually ran.
+
+    The fix threads the session's enabled/disabled toolsets into the bridge
+    dispatch (model_tools.handle_function_call) and the executor unwrap
+    (agent.tool_executor), scoping both the searchable catalog and the
+    invocable set to the session's own toolsets.
+    """
+
+    @staticmethod
+    def _register(name, toolset):
+        from tools.registry import registry
+
+        def _handler(args, task_id=None, **kw):
+            return json.dumps({"ok": True, "tool": name})
+
+        registry.register(
+            name=name,
+            handler=_handler,
+            schema=_td(name, f"desc for {name}", {"repo": {"type": "string"}}),
+            toolset=toolset,
+        )
+
+    def test_search_catalog_is_scoped_to_session_toolsets(self):
+        import model_tools
+
+        for i in range(12):
+            self._register(f"mcp_scoped_gh_{i}", "mcp-scoped-gh")
+        self._register("scoped_oos_plugin", "scopedoosplugin")
+
+        # tool_search scoped to the github toolset must not count the
+        # out-of-scope plugin tool (or any of the host registry).
+        result = model_tools.handle_function_call(
+            function_name="tool_search",
+            function_args={"query": "mcp_scoped_gh", "limit": 5},
+            enabled_toolsets=["mcp-scoped-gh"],
+        )
+        parsed = json.loads(result)
+        assert parsed["total_available"] == 12, (
+            f"expected scoped catalog of 12, got {parsed['total_available']} "
+            "— catalog leaked tools outside the session's toolsets"
+        )
+        hit_names = {m["name"] for m in parsed["matches"]}
+        assert "scoped_oos_plugin" not in hit_names
+
+    def test_tool_call_rejects_out_of_scope_tool(self):
+        import model_tools
+
+        self._register("mcp_inscope_gh_op", "mcp-inscope-gh")
+        self._register("inscope_oos_plugin", "inscopeoosplugin")
+
+        # Out-of-scope plugin tool: rejected even though it is registered
+        # and deferrable in the global registry.
+        rejected = json.loads(model_tools.handle_function_call(
+            function_name="tool_call",
+            function_args={"name": "inscope_oos_plugin", "arguments": {}},
+            enabled_toolsets=["mcp-inscope-gh"],
+        ))
+        assert "error" in rejected
+        assert "not available in this session" in rejected["error"]
+
+        # In-scope tool: dispatches normally.
+        ok = json.loads(model_tools.handle_function_call(
+            function_name="tool_call",
+            function_args={"name": "mcp_inscope_gh_op", "arguments": {"repo": "a/b"}},
+            enabled_toolsets=["mcp-inscope-gh"],
+        ))
+        assert ok.get("ok") is True
+        assert ok.get("tool") == "mcp_inscope_gh_op"
+
+    def test_bridge_dispatch_does_not_pollute_global_resolved_names(self):
+        import model_tools
+
+        self._register("mcp_pollute_op_0", "mcp-pollute")
+        self._register("mcp_pollute_op_1", "mcp-pollute")
+
+        # Establish the scoped session global.
+        model_tools.get_tool_definitions(
+            enabled_toolsets=["mcp-pollute"], quiet_mode=True,
+        )
+        before = set(model_tools._last_resolved_tool_names)
+        assert "terminal" not in before
+
+        # A scoped tool_search call must not widen the process-global
+        # _last_resolved_tool_names to the whole registry (which would leak
+        # core/sandbox tools into execute_code's fallback).
+        model_tools.handle_function_call(
+            function_name="tool_search",
+            function_args={"query": "pollute"},
+            enabled_toolsets=["mcp-pollute"],
+        )
+        after = set(model_tools._last_resolved_tool_names)
+        assert "terminal" not in after, (
+            "bridge dispatch polluted _last_resolved_tool_names with "
+            "out-of-scope tools"
+        )
+
+    def test_scoped_deferrable_names_helper(self):
+        from tools.tool_search import scoped_deferrable_names
+
+        self._register("mcp_helper_op", "mcp-helper")
+        import model_tools
+        defs = model_tools.get_tool_definitions(
+            enabled_toolsets=["mcp-helper"],
+            quiet_mode=True,
+            skip_tool_search_assembly=True,
+        )
+        names = scoped_deferrable_names(defs)
+        assert "mcp_helper_op" in names
+        # core tools are never deferrable
+        assert "terminal" not in names
+
--- a/tools/tool_search.py
+++ b/tools/tool_search.py
@ -657,6 +657,26 @@ def dispatch_tool_describe(args: Dict[str, Any],
    }, ensure_ascii=False)


+def scoped_deferrable_names(tool_defs: List[Dict[str, Any]]) -> frozenset[str]:
+    """Return the set of deferrable tool names present in ``tool_defs``.
+
+    ``tool_defs`` is expected to be the *pre-assembly* tool list for the
+    current session's toolset scope (i.e. what
+    ``get_tool_definitions(skip_tool_search_assembly=True)`` returns for the
+    session's enabled/disabled toolsets). The resulting set is the universe of
+    tools the session may legitimately reach through ``tool_call``. Used as a
+    scoping gate by both the ``model_tools`` bridge dispatch and the
+    ``tool_executor`` unwrap so a restricted-toolset session can never invoke
+    an out-of-scope tool via the bridge.
+    """
+    names: set[str] = set()
+    for td in tool_defs:
+        name = (td.get("function") or {}).get("name", "")
+        if name and is_deferrable_tool_name(name):
+            names.add(name)
+    return frozenset(names)
+
+
 def resolve_underlying_call(args: Dict[str, Any]) -> Tuple[Optional[str], Dict[str, Any], Optional[str]]:
    """Parse a ``tool_call`` invocation into (underlying_name, args, error_msg).

@ -711,4 +731,5 @@ __all__ = [
    "dispatch_tool_search",
    "dispatch_tool_describe",
    "resolve_underlying_call",
+    "scoped_deferrable_names",
 ]
--- a/website/docs/user-guide/features/tool-search.md
+++ b/website/docs/user-guide/features/tool-search.md
@ -139,6 +139,13 @@ to any progressive-disclosure design, not specific to this implementation:
  tool-defs list every assembly — no session-keyed `Map`. This avoids
  the class of bug where a stored catalog drifts out of sync with the
  live tool registry.
+- **The catalog is scoped to the session's toolsets.** `tool_search`,
+  `tool_describe`, and `tool_call` only ever see and invoke tools the
+  session was actually granted. A subagent, kanban worker, or gateway
+  session restricted to a subset of toolsets cannot use the bridge to
+  discover or call a tool outside that subset — the deferred catalog is
+  the deferrable slice of the session's own enabled/disabled toolsets,
+  not the whole process registry.
 - **No JS sandbox.** Hermes uses the simpler "structured tools" mode
  (search / describe / call as plain functions). The JS-sandbox "code
  mode" some other implementations offer is a large surface area; we