From 7427b9d5812f3bd4deb47340ab64ef86e605c27e Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Fri, 29 May 2026 01:21:41 -0700 Subject: [PATCH] fix(tool-search): scope bridge catalog + dispatch to the session's toolsets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tool Search read its catalog from the global registry (get_tool_definitions with no toolset scope = 'start with everything'), so a restricted-toolset session — subagent, kanban worker, curated gateway session — could: 1. tool_search the entire process registry, not just its granted tools, and 2. tool_call any registered plugin/MCP tool it was never given, because registry.dispatch() has no enabled_tools gate for non-execute_code tools. A scoped session (enabled_toolsets=['mcp-github']) reported total_available=26 and successfully invoked an out-of-scope plugin tool via tool_call. Fix: - handle_function_call gains enabled_toolsets/disabled_toolsets; the bridge dispatch scopes get_tool_definitions to them (also stops polluting the process-global _last_resolved_tool_names with out-of-scope tools, which leaked into execute_code's sandbox-tool fallback). - A defense-in-depth gate rejects any tool_call'd name not in the scoped deferrable catalog. - tool_executor's unwrap (both concurrent + sequential paths) enforces the same scope before dispatch, since it unwraps tool_call -> underlying name and bypasses the bridge branch. New _tool_search_scoped_names() helper, cached per-agent on registry generation + toolset scope. - New scoped_deferrable_names() helper in tool_search.py shared by both sites. Tests: 4 new regression tests in TestRegression_ToolsetScoping (scoped catalog, out-of-scope tool_call rejection, no global pollution, helper). --- agent/agent_runtime_helpers.py | 2 + agent/tool_executor.py | 136 ++++++++++++++---- model_tools.py | 46 +++++- tests/tools/test_tool_search.py | 121 ++++++++++++++++ tools/tool_search.py | 21 +++ .../docs/user-guide/features/tool-search.md | 7 + 6 files changed, 303 insertions(+), 30 deletions(-) diff --git a/agent/agent_runtime_helpers.py b/agent/agent_runtime_helpers.py index 5ce1cc3d2..73f3cba43 100644 --- a/agent/agent_runtime_helpers.py +++ b/agent/agent_runtime_helpers.py @@ -1692,6 +1692,8 @@ def invoke_tool(agent, function_name: str, function_args: dict, effective_task_i session_id=agent.session_id or "", enabled_tools=list(agent.valid_tool_names) if agent.valid_tool_names else None, skip_pre_tool_call_hook=True, + enabled_toolsets=getattr(agent, "enabled_toolsets", None), + disabled_toolsets=getattr(agent, "disabled_toolsets", None), ) diff --git a/agent/tool_executor.py b/agent/tool_executor.py index 34be84a60..003fb9420 100644 --- a/agent/tool_executor.py +++ b/agent/tool_executor.py @@ -62,6 +62,55 @@ def _ra(): return run_agent +def _tool_search_scoped_names(agent) -> frozenset: + """Return the deferrable tool names the session may invoke via tool_call. + + The Tool Search unwrap dispatches the underlying tool directly, bypassing + the bridge branch (and its scope check) in + ``model_tools.handle_function_call``. To keep a restricted-toolset session + (subagent, kanban worker, curated gateway session) from reaching tools it + was never granted, the unwrap validates the underlying name against this + set: the deferrable subset of the session's own enabled/disabled toolset + scope. + + Result is cached on the agent and refreshed when the tool registry's + generation changes (e.g. an MCP server reconnects), so the common case is + a dict lookup, not a full tool-defs rebuild on every tool call. + """ + try: + import model_tools + from tools import tool_search as _ts + from tools.registry import registry as _registry + except Exception: + return frozenset() + + enabled = getattr(agent, "enabled_toolsets", None) + disabled = getattr(agent, "disabled_toolsets", None) + cache_key = ( + getattr(_registry, "_generation", 0), + frozenset(enabled) if enabled is not None else None, + frozenset(disabled) if disabled is not None else None, + ) + cached = getattr(agent, "_tool_search_scope_cache", None) + if cached is not None and cached[0] == cache_key: + return cached[1] + try: + scoped_defs = model_tools.get_tool_definitions( + enabled_toolsets=enabled, + disabled_toolsets=disabled, + quiet_mode=True, + skip_tool_search_assembly=True, + ) or [] + names = _ts.scoped_deferrable_names(scoped_defs) + except Exception: + names = frozenset() + try: + agent._tool_search_scope_cache = (cache_key, names) + except Exception: + pass + return names + + def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None: """Execute multiple tool calls concurrently using a thread pool. @@ -110,13 +159,28 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe # The original tool_call entry on ``tool_call.function`` is left # untouched so the conversation transcript and the matching # tool_call_id are preserved exactly as the model emitted them. + # + # Scope gate: the unwrap dispatches the underlying tool directly + # (bypassing the bridge branch in handle_function_call and its + # scope check), so we enforce session toolset scope HERE. A tool + # the session was not granted is rejected before any checkpoint, + # hook, or dispatch fires. + _ts_scope_block = None try: from tools import tool_search as _ts if function_name == _ts.TOOL_CALL_NAME: _underlying, _underlying_args, _err = _ts.resolve_underlying_call(function_args) if not _err and _underlying: - function_name = _underlying - function_args = _underlying_args + if _underlying in _tool_search_scoped_names(agent): + function_name = _underlying + function_args = _underlying_args + else: + _ts_scope_block = json.dumps({ + "error": ( + f"'{_underlying}' is not available in this session. " + "Use tool_search to find tools you can call." + ), + }, ensure_ascii=False) except Exception: pass @@ -144,21 +208,25 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe block_result = None blocked_by_guardrail = False - try: - from hermes_cli.plugins import get_pre_tool_call_block_message - block_message = get_pre_tool_call_block_message( - function_name, function_args, task_id=effective_task_id or "", - ) - except Exception: - block_message = None - - if block_message is not None: - block_result = json.dumps({"error": block_message}, ensure_ascii=False) + if _ts_scope_block is not None: + # Out-of-scope tool_call: reject before hooks/guardrails/dispatch. + block_result = _ts_scope_block else: - guardrail_decision = agent._tool_guardrails.before_call(function_name, function_args) - if not guardrail_decision.allows_execution: - block_result = agent._guardrail_block_result(guardrail_decision) - blocked_by_guardrail = True + try: + from hermes_cli.plugins import get_pre_tool_call_block_message + block_message = get_pre_tool_call_block_message( + function_name, function_args, task_id=effective_task_id or "", + ) + except Exception: + block_message = None + + if block_message is not None: + block_result = json.dumps({"error": block_message}, ensure_ascii=False) + else: + guardrail_decision = agent._tool_guardrails.before_call(function_name, function_args) + if not guardrail_decision.allows_execution: + block_result = agent._guardrail_block_result(guardrail_decision) + blocked_by_guardrail = True parsed_calls.append((tool_call, function_name, function_args, block_result, blocked_by_guardrail)) @@ -517,26 +585,38 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe if not isinstance(function_args, dict): function_args = {} - # Tool Search unwrap — see _execute_tool_calls for full rationale. + # Tool Search unwrap — see execute_tool_calls_concurrent for full + # rationale, including the scope gate (the unwrap dispatches the + # underlying tool directly, so session toolset scope is enforced here). + _ts_scope_block: Optional[str] = None try: from tools import tool_search as _ts if function_name == _ts.TOOL_CALL_NAME: _underlying, _underlying_args, _err = _ts.resolve_underlying_call(function_args) if not _err and _underlying: - function_name = _underlying - function_args = _underlying_args + if _underlying in _tool_search_scoped_names(agent): + function_name = _underlying + function_args = _underlying_args + else: + _ts_scope_block = ( + f"'{_underlying}' is not available in this session. " + "Use tool_search to find tools you can call." + ) except Exception: pass # Check plugin hooks for a block directive before executing. _block_msg: Optional[str] = None - try: - from hermes_cli.plugins import get_pre_tool_call_block_message - _block_msg = get_pre_tool_call_block_message( - function_name, function_args, task_id=effective_task_id or "", - ) - except Exception: - pass + if _ts_scope_block is not None: + _block_msg = _ts_scope_block + else: + try: + from hermes_cli.plugins import get_pre_tool_call_block_message + _block_msg = get_pre_tool_call_block_message( + function_name, function_args, task_id=effective_task_id or "", + ) + except Exception: + pass _guardrail_block_decision: ToolGuardrailDecision | None = None if _block_msg is None: @@ -783,6 +863,8 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe session_id=agent.session_id or "", enabled_tools=list(agent.valid_tool_names) if agent.valid_tool_names else None, skip_pre_tool_call_hook=True, + enabled_toolsets=getattr(agent, "enabled_toolsets", None), + disabled_toolsets=getattr(agent, "disabled_toolsets", None), ) _spinner_result = function_result except Exception as tool_error: @@ -803,6 +885,8 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe session_id=agent.session_id or "", enabled_tools=list(agent.valid_tool_names) if agent.valid_tool_names else None, skip_pre_tool_call_hook=True, + enabled_toolsets=getattr(agent, "enabled_toolsets", None), + disabled_toolsets=getattr(agent, "disabled_toolsets", None), ) except Exception as tool_error: function_result = f"Error executing tool '{function_name}': {tool_error}" diff --git a/model_tools.py b/model_tools.py index a086020b3..8e85581be 100644 --- a/model_tools.py +++ b/model_tools.py @@ -262,8 +262,8 @@ def _clear_tool_defs_cache() -> None: def get_tool_definitions( - enabled_toolsets: List[str] = None, - disabled_toolsets: List[str] = None, + enabled_toolsets: Optional[List[str]] = None, + disabled_toolsets: Optional[List[str]] = None, quiet_mode: bool = False, skip_tool_search_assembly: bool = False, ) -> List[Dict[str, Any]]: @@ -335,8 +335,8 @@ def get_tool_definitions( def _compute_tool_definitions( - enabled_toolsets: List[str] = None, - disabled_toolsets: List[str] = None, + enabled_toolsets: Optional[List[str]] = None, + disabled_toolsets: Optional[List[str]] = None, quiet_mode: bool = False, skip_tool_search_assembly: bool = False, ) -> List[Dict[str, Any]]: @@ -808,6 +808,8 @@ def handle_function_call( user_task: Optional[str] = None, enabled_tools: Optional[List[str]] = None, skip_pre_tool_call_hook: bool = False, + enabled_toolsets: Optional[List[str]] = None, + disabled_toolsets: Optional[List[str]] = None, ) -> str: """ Main function call dispatcher that routes calls to the tool registry. @@ -821,6 +823,14 @@ def handle_function_call( execute_code uses this list to determine which sandbox tools to generate. Falls back to the process-global ``_last_resolved_tool_names`` for backward compat. + enabled_toolsets: The session's enabled toolsets. Used to scope the + Tool Search bridge catalog so ``tool_search`` / + ``tool_describe`` / ``tool_call`` only see and invoke + tools the session was actually granted. ``None`` means + "no restriction" (the caller scopes to every toolset), + matching ``get_tool_definitions`` semantics. + disabled_toolsets: The session's disabled toolsets, applied as a + subtraction when scoping the bridge catalog. Returns: Function result as a JSON string. @@ -844,7 +854,19 @@ def handle_function_call( # Use skip_tool_search_assembly=True so we see the real catalog, # not the already-collapsed bridge-only list (the bridge would # otherwise be searching only itself). + # + # Scope the catalog to the session's toolsets so the bridge can + # only surface and invoke tools the session was actually granted. + # Without this, a restricted-toolset session (subagent, kanban + # worker, curated gateway session) would see and be able to call + # the entire process registry via the bridge. Passing the same + # enabled/disabled toolsets the session was assembled with keeps + # the deferred catalog identical to the deferrable subset of the + # session's own tool list, and avoids polluting the process-global + # _last_resolved_tool_names with out-of-scope tools. current_defs = get_tool_definitions( + enabled_toolsets=enabled_toolsets, + disabled_toolsets=disabled_toolsets, quiet_mode=True, skip_tool_search_assembly=True, ) or [] except Exception: @@ -860,6 +882,20 @@ def handle_function_call( if err or not underlying_name: return json.dumps({"error": err or "tool_call could not be resolved"}, ensure_ascii=False) + # Defense in depth: the underlying tool MUST be in the session's + # scoped deferrable catalog. resolve_underlying_call() only checks + # that the name is deferrable in the global registry; this gate + # additionally rejects any tool the session was not granted, so a + # restricted session can never invoke an out-of-scope tool through + # the bridge even if the catalog scoping above regressed. + _scoped_deferrable = _ts_mod.scoped_deferrable_names(current_defs) + if underlying_name not in _scoped_deferrable: + return json.dumps({ + "error": ( + f"'{underlying_name}' is not available in this session. " + "Use tool_search to find tools you can call." + ), + }, ensure_ascii=False) # Recurse with the underlying tool. All hooks fire against the # real tool name. The bridge is invisible to hooks by design. return handle_function_call( @@ -871,6 +907,8 @@ def handle_function_call( user_task=user_task, enabled_tools=enabled_tools, skip_pre_tool_call_hook=skip_pre_tool_call_hook, + enabled_toolsets=enabled_toolsets, + disabled_toolsets=disabled_toolsets, ) try: diff --git a/tests/tools/test_tool_search.py b/tests/tools/test_tool_search.py index 9621d3157..9c8c8a33c 100644 --- a/tests/tools/test_tool_search.py +++ b/tests/tools/test_tool_search.py @@ -415,3 +415,124 @@ class TestRegression_OpenClawCron84141: assert err is not None assert "not a deferrable" in err + +class TestRegression_ToolsetScoping: + """A restricted-toolset session must not see or invoke out-of-scope tools. + + The bug: the bridge dispatch and the tool_executor unwrap read the + catalog from the *global* registry (get_tool_definitions with no + toolset scope = "start with everything"), so a session scoped to one + MCP server could tool_search the entire process registry and tool_call + any plugin tool it was never granted. registry.dispatch() has no + enabled_tools gate for non-execute_code tools, so the out-of-scope tool + actually ran. + + The fix threads the session's enabled/disabled toolsets into the bridge + dispatch (model_tools.handle_function_call) and the executor unwrap + (agent.tool_executor), scoping both the searchable catalog and the + invocable set to the session's own toolsets. + """ + + @staticmethod + def _register(name, toolset): + from tools.registry import registry + + def _handler(args, task_id=None, **kw): + return json.dumps({"ok": True, "tool": name}) + + registry.register( + name=name, + handler=_handler, + schema=_td(name, f"desc for {name}", {"repo": {"type": "string"}}), + toolset=toolset, + ) + + def test_search_catalog_is_scoped_to_session_toolsets(self): + import model_tools + + for i in range(12): + self._register(f"mcp_scoped_gh_{i}", "mcp-scoped-gh") + self._register("scoped_oos_plugin", "scopedoosplugin") + + # tool_search scoped to the github toolset must not count the + # out-of-scope plugin tool (or any of the host registry). + result = model_tools.handle_function_call( + function_name="tool_search", + function_args={"query": "mcp_scoped_gh", "limit": 5}, + enabled_toolsets=["mcp-scoped-gh"], + ) + parsed = json.loads(result) + assert parsed["total_available"] == 12, ( + f"expected scoped catalog of 12, got {parsed['total_available']} " + "— catalog leaked tools outside the session's toolsets" + ) + hit_names = {m["name"] for m in parsed["matches"]} + assert "scoped_oos_plugin" not in hit_names + + def test_tool_call_rejects_out_of_scope_tool(self): + import model_tools + + self._register("mcp_inscope_gh_op", "mcp-inscope-gh") + self._register("inscope_oos_plugin", "inscopeoosplugin") + + # Out-of-scope plugin tool: rejected even though it is registered + # and deferrable in the global registry. + rejected = json.loads(model_tools.handle_function_call( + function_name="tool_call", + function_args={"name": "inscope_oos_plugin", "arguments": {}}, + enabled_toolsets=["mcp-inscope-gh"], + )) + assert "error" in rejected + assert "not available in this session" in rejected["error"] + + # In-scope tool: dispatches normally. + ok = json.loads(model_tools.handle_function_call( + function_name="tool_call", + function_args={"name": "mcp_inscope_gh_op", "arguments": {"repo": "a/b"}}, + enabled_toolsets=["mcp-inscope-gh"], + )) + assert ok.get("ok") is True + assert ok.get("tool") == "mcp_inscope_gh_op" + + def test_bridge_dispatch_does_not_pollute_global_resolved_names(self): + import model_tools + + self._register("mcp_pollute_op_0", "mcp-pollute") + self._register("mcp_pollute_op_1", "mcp-pollute") + + # Establish the scoped session global. + model_tools.get_tool_definitions( + enabled_toolsets=["mcp-pollute"], quiet_mode=True, + ) + before = set(model_tools._last_resolved_tool_names) + assert "terminal" not in before + + # A scoped tool_search call must not widen the process-global + # _last_resolved_tool_names to the whole registry (which would leak + # core/sandbox tools into execute_code's fallback). + model_tools.handle_function_call( + function_name="tool_search", + function_args={"query": "pollute"}, + enabled_toolsets=["mcp-pollute"], + ) + after = set(model_tools._last_resolved_tool_names) + assert "terminal" not in after, ( + "bridge dispatch polluted _last_resolved_tool_names with " + "out-of-scope tools" + ) + + def test_scoped_deferrable_names_helper(self): + from tools.tool_search import scoped_deferrable_names + + self._register("mcp_helper_op", "mcp-helper") + import model_tools + defs = model_tools.get_tool_definitions( + enabled_toolsets=["mcp-helper"], + quiet_mode=True, + skip_tool_search_assembly=True, + ) + names = scoped_deferrable_names(defs) + assert "mcp_helper_op" in names + # core tools are never deferrable + assert "terminal" not in names + diff --git a/tools/tool_search.py b/tools/tool_search.py index 148a9f2b9..e885a5d7b 100644 --- a/tools/tool_search.py +++ b/tools/tool_search.py @@ -657,6 +657,26 @@ def dispatch_tool_describe(args: Dict[str, Any], }, ensure_ascii=False) +def scoped_deferrable_names(tool_defs: List[Dict[str, Any]]) -> frozenset[str]: + """Return the set of deferrable tool names present in ``tool_defs``. + + ``tool_defs`` is expected to be the *pre-assembly* tool list for the + current session's toolset scope (i.e. what + ``get_tool_definitions(skip_tool_search_assembly=True)`` returns for the + session's enabled/disabled toolsets). The resulting set is the universe of + tools the session may legitimately reach through ``tool_call``. Used as a + scoping gate by both the ``model_tools`` bridge dispatch and the + ``tool_executor`` unwrap so a restricted-toolset session can never invoke + an out-of-scope tool via the bridge. + """ + names: set[str] = set() + for td in tool_defs: + name = (td.get("function") or {}).get("name", "") + if name and is_deferrable_tool_name(name): + names.add(name) + return frozenset(names) + + def resolve_underlying_call(args: Dict[str, Any]) -> Tuple[Optional[str], Dict[str, Any], Optional[str]]: """Parse a ``tool_call`` invocation into (underlying_name, args, error_msg). @@ -711,4 +731,5 @@ __all__ = [ "dispatch_tool_search", "dispatch_tool_describe", "resolve_underlying_call", + "scoped_deferrable_names", ] diff --git a/website/docs/user-guide/features/tool-search.md b/website/docs/user-guide/features/tool-search.md index 5610a4346..fb65ad29b 100644 --- a/website/docs/user-guide/features/tool-search.md +++ b/website/docs/user-guide/features/tool-search.md @@ -139,6 +139,13 @@ to any progressive-disclosure design, not specific to this implementation: tool-defs list every assembly — no session-keyed `Map`. This avoids the class of bug where a stored catalog drifts out of sync with the live tool registry. +- **The catalog is scoped to the session's toolsets.** `tool_search`, + `tool_describe`, and `tool_call` only ever see and invoke tools the + session was actually granted. A subagent, kanban worker, or gateway + session restricted to a subset of toolsets cannot use the bridge to + discover or call a tool outside that subset — the deferred catalog is + the deferrable slice of the session's own enabled/disabled toolsets, + not the whole process registry. - **No JS sandbox.** Hermes uses the simpler "structured tools" mode (search / describe / call as plain functions). The JS-sandbox "code mode" some other implementations offer is a large surface area; we