fix(code-exec): restore approval context in execute_code RPC threads + guard entry
Wrap both execute_code RPC threads (local UDS + remote file-RPC) with propagate_context_to_thread so gateway sessions no longer fall into check_dangerous_command's non-interactive auto-approve branch and the CLI approval prompt stays reachable. Add check_execute_code_guard: one-shot fail-closed approval of the whole script in gateway/ask/cron-deny before the child spawns (skips isolated backends; command-string built only past the early returns). Drop the broad HERMES_ env passthrough for an explicit operational allowlist plus DSN/WEBHOOK secret substrings, and update the POSIX-equivalence oracle. Refs #4146, #27303, #30882, #33057
This commit is contained in:
@ -253,20 +253,24 @@ class TestWindowsSocketSmokeTest:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _legacy_posix_scrubber(source_env, is_passthrough):
|
||||
"""Verbatim copy of the pre-Windows-fix inline scrubbing logic.
|
||||
"""Independent oracle for TestPosixEquivalence — a from-scratch reimpl of
|
||||
_scrub_child_env's POSIX behavior, used to prove the production helper does
|
||||
what we think it does.
|
||||
|
||||
This is the oracle used by TestPosixEquivalence to prove the refactor
|
||||
did not change POSIX behavior. DO NOT edit this to "match" a future
|
||||
production change — if _scrub_child_env's POSIX behavior legitimately
|
||||
needs to evolve, delete this function and adjust the equivalence test
|
||||
on purpose, so the churn is visible in review.
|
||||
Deliberately updated for #27303 (the broad ``HERMES_`` prefix was dropped
|
||||
in favor of an explicit operational allowlist, and DSN/WEBHOOK were added
|
||||
to the secret substrings). The original docstring said: if POSIX behavior
|
||||
legitimately needs to evolve, adjust this oracle on purpose so the churn is
|
||||
visible in review — that is what this change is.
|
||||
"""
|
||||
_SAFE_ENV_PREFIXES = ("PATH", "HOME", "USER", "LANG", "LC_", "TERM",
|
||||
"TMPDIR", "TMP", "TEMP", "SHELL", "LOGNAME",
|
||||
"XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA",
|
||||
"HERMES_")
|
||||
"XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA")
|
||||
_SECRET_SUBSTRINGS = ("KEY", "TOKEN", "SECRET", "PASSWORD", "CREDENTIAL",
|
||||
"PASSWD", "AUTH")
|
||||
"PASSWD", "AUTH", "DSN", "WEBHOOK")
|
||||
_HERMES_CHILD_ALLOWED = frozenset({
|
||||
"HERMES_HOME", "HERMES_PROFILE", "HERMES_CONFIG", "HERMES_ENV",
|
||||
})
|
||||
out = {}
|
||||
for k, v in source_env.items():
|
||||
if is_passthrough(k):
|
||||
@ -276,6 +280,9 @@ def _legacy_posix_scrubber(source_env, is_passthrough):
|
||||
continue
|
||||
if any(k.startswith(p) for p in _SAFE_ENV_PREFIXES):
|
||||
out[k] = v
|
||||
continue
|
||||
if k in _HERMES_CHILD_ALLOWED:
|
||||
out[k] = v
|
||||
return out
|
||||
|
||||
|
||||
@ -308,13 +315,20 @@ class TestPosixEquivalence:
|
||||
"PYTHONPATH": "/opt/lib",
|
||||
"VIRTUAL_ENV": "/home/alice/.venv",
|
||||
"CONDA_PREFIX": "/opt/conda",
|
||||
"HERMES_HOME": "/home/alice/.hermes",
|
||||
"HERMES_INTERACTIVE": "1",
|
||||
# HERMES_* handling (#27303): only the operational allowlist passes;
|
||||
# every other HERMES_* is dropped (the broad prefix was removed).
|
||||
"HERMES_HOME": "/home/alice/.hermes", # allowlisted → kept
|
||||
"HERMES_PROFILE": "default", # allowlisted → kept
|
||||
"HERMES_INTERACTIVE": "1", # not allowlisted → dropped
|
||||
"HERMES_BASE_URL": "https://api.internal", # not allowlisted → dropped
|
||||
"HERMES_KANBAN_DB": "postgres://u:p@h/db", # not allowlisted → dropped
|
||||
# Secret-substring blocks
|
||||
"OPENAI_API_KEY": "sk-xxx",
|
||||
"GITHUB_TOKEN": "ghp_xxx",
|
||||
"AWS_SECRET_ACCESS_KEY": "yyy",
|
||||
"MY_PASSWORD": "hunter2",
|
||||
"SENTRY_DSN": "https://abc@sentry.io/1", # DSN substring → blocked
|
||||
"SLACK_WEBHOOK": "https://hooks.slack/x", # WEBHOOK substring → blocked
|
||||
# Uncategorized — must be dropped
|
||||
"RANDOM_UNKNOWN": "drop-me",
|
||||
"DISPLAY": ":0",
|
||||
|
||||
@ -1057,6 +1057,107 @@ def _format_tirith_description(tirith_result: dict) -> str:
|
||||
return "Security scan — " + "; ".join(parts)
|
||||
|
||||
|
||||
def _await_gateway_decision(session_key: str, notify_cb, approval_data: dict,
|
||||
*, surface: str = "gateway") -> dict:
|
||||
"""Enqueue *approval_data*, notify the user, and block the calling agent
|
||||
thread until the request is resolved or the gateway approval timeout
|
||||
elapses — firing pre/post approval hooks and cleaning up the queue entry.
|
||||
|
||||
Shared by the terminal command guard (``check_all_command_guards``) and
|
||||
the execute_code guard (``check_execute_code_guard``) so the fiddly
|
||||
heartbeat-polling wait loop lives in one place.
|
||||
|
||||
Returns ``{"resolved": bool, "choice": str|None}`` on completion, or
|
||||
``{"resolved": False, "choice": None, "notify_failed": True}`` if the
|
||||
notify callback raised. Persistence of an approved choice and building
|
||||
the final tool-facing result dict remain the caller's responsibility.
|
||||
"""
|
||||
command = approval_data.get("command", "")
|
||||
description = approval_data.get("description", "")
|
||||
primary_key = approval_data.get("pattern_key", "")
|
||||
all_keys = approval_data.get("pattern_keys", [primary_key])
|
||||
|
||||
entry = _ApprovalEntry(approval_data)
|
||||
with _lock:
|
||||
_gateway_queues.setdefault(session_key, []).append(entry)
|
||||
|
||||
def _drop_entry() -> None:
|
||||
with _lock:
|
||||
queue = _gateway_queues.get(session_key, [])
|
||||
if entry in queue:
|
||||
queue.remove(entry)
|
||||
if not queue:
|
||||
_gateway_queues.pop(session_key, None)
|
||||
|
||||
# Notify plugins that an approval is being requested. Fires before the
|
||||
# gateway notify callback so observers get the event in real time.
|
||||
_fire_approval_hook(
|
||||
"pre_approval_request",
|
||||
command=command,
|
||||
description=description,
|
||||
pattern_key=primary_key,
|
||||
pattern_keys=list(all_keys),
|
||||
session_key=session_key,
|
||||
surface=surface,
|
||||
)
|
||||
|
||||
# Notify the user (bridges sync agent thread → async gateway)
|
||||
try:
|
||||
notify_cb(approval_data)
|
||||
except Exception as exc:
|
||||
logger.warning("Gateway approval notify failed: %s", exc)
|
||||
_drop_entry()
|
||||
return {"resolved": False, "choice": None, "notify_failed": True}
|
||||
|
||||
# Block until the user responds or timeout (default 5 min). Poll in short
|
||||
# slices so we can fire activity heartbeats every ~10s to the agent's
|
||||
# inactivity tracker — otherwise the gateway watchdog kills the agent
|
||||
# while the user is still responding. Mirrors _wait_for_process() cadence.
|
||||
timeout = _get_approval_config().get("gateway_timeout", 300)
|
||||
try:
|
||||
timeout = int(timeout)
|
||||
except (ValueError, TypeError):
|
||||
timeout = 300
|
||||
|
||||
try:
|
||||
from tools.environments.base import touch_activity_if_due
|
||||
except Exception: # pragma: no cover
|
||||
touch_activity_if_due = None
|
||||
|
||||
_now = time.monotonic()
|
||||
_deadline = _now + max(timeout, 0)
|
||||
_activity_state = {"last_touch": _now, "start": _now}
|
||||
resolved = False
|
||||
while True:
|
||||
_remaining = _deadline - time.monotonic()
|
||||
if _remaining <= 0:
|
||||
break
|
||||
if entry.event.wait(timeout=min(1.0, _remaining)):
|
||||
resolved = True
|
||||
break
|
||||
if touch_activity_if_due is not None:
|
||||
touch_activity_if_due(_activity_state, "waiting for user approval")
|
||||
|
||||
_drop_entry()
|
||||
|
||||
choice = entry.result
|
||||
# Normalize outcome for the post hook. Unresolved (timeout) and None both
|
||||
# mean the user never responded; report that explicitly so plugins can
|
||||
# distinguish timeout from explicit deny.
|
||||
_outcome = "timeout" if not resolved else (choice if choice else "timeout")
|
||||
_fire_approval_hook(
|
||||
"post_approval_response",
|
||||
command=command,
|
||||
description=description,
|
||||
pattern_key=primary_key,
|
||||
pattern_keys=list(all_keys),
|
||||
session_key=session_key,
|
||||
surface=surface,
|
||||
choice=_outcome,
|
||||
)
|
||||
return {"resolved": resolved, "choice": choice}
|
||||
|
||||
|
||||
def check_all_command_guards(command: str, env_type: str,
|
||||
approval_callback=None) -> dict:
|
||||
"""Run all pre-exec security checks and return a single approval decision.
|
||||
@ -1207,113 +1308,27 @@ def check_all_command_guards(command: str, env_type: str,
|
||||
|
||||
if notify_cb is not None:
|
||||
# --- Blocking gateway approval (queue-based) ---
|
||||
# Each call gets its own _ApprovalEntry so parallel subagents
|
||||
# and execute_code threads can block concurrently.
|
||||
# Block the agent thread until the user responds; the notify +
|
||||
# heartbeat wait loop is shared with check_execute_code_guard via
|
||||
# _await_gateway_decision().
|
||||
approval_data = {
|
||||
"command": command,
|
||||
"pattern_key": primary_key,
|
||||
"pattern_keys": all_keys,
|
||||
"description": combined_desc,
|
||||
}
|
||||
entry = _ApprovalEntry(approval_data)
|
||||
with _lock:
|
||||
_gateway_queues.setdefault(session_key, []).append(entry)
|
||||
|
||||
# Notify plugins that an approval is being requested. Fires before
|
||||
# the gateway notify callback so observers (e.g. macOS notifier
|
||||
# plugins, audit logs, Slack alerts) get the event in real time.
|
||||
_fire_approval_hook(
|
||||
"pre_approval_request",
|
||||
command=command,
|
||||
description=combined_desc,
|
||||
pattern_key=primary_key,
|
||||
pattern_keys=list(all_keys),
|
||||
session_key=session_key,
|
||||
surface="gateway",
|
||||
decision = _await_gateway_decision(
|
||||
session_key, notify_cb, approval_data, surface="gateway"
|
||||
)
|
||||
|
||||
# Notify the user (bridges sync agent thread → async gateway)
|
||||
try:
|
||||
notify_cb(approval_data)
|
||||
except Exception as exc:
|
||||
logger.warning("Gateway approval notify failed: %s", exc)
|
||||
with _lock:
|
||||
queue = _gateway_queues.get(session_key, [])
|
||||
if entry in queue:
|
||||
queue.remove(entry)
|
||||
if not queue:
|
||||
_gateway_queues.pop(session_key, None)
|
||||
if decision.get("notify_failed"):
|
||||
return {
|
||||
"approved": False,
|
||||
"message": "BLOCKED: Failed to send approval request to user. Do NOT retry.",
|
||||
"pattern_key": primary_key,
|
||||
"description": combined_desc,
|
||||
}
|
||||
|
||||
# Block until the user responds or timeout (default 5 min).
|
||||
# Poll in short slices so we can fire activity heartbeats every
|
||||
# ~10s to the agent's inactivity tracker. Without this, the
|
||||
# blocking event.wait() never touches activity, and the
|
||||
# gateway's inactivity watchdog (agent.gateway_timeout, default
|
||||
# 1800s) kills the agent while the user is still responding to
|
||||
# the approval prompt. Mirrors the _wait_for_process() cadence
|
||||
# in tools/environments/base.py.
|
||||
timeout = _get_approval_config().get("gateway_timeout", 300)
|
||||
try:
|
||||
timeout = int(timeout)
|
||||
except (ValueError, TypeError):
|
||||
timeout = 300
|
||||
|
||||
try:
|
||||
from tools.environments.base import touch_activity_if_due
|
||||
except Exception: # pragma: no cover
|
||||
touch_activity_if_due = None
|
||||
|
||||
_now = time.monotonic()
|
||||
_deadline = _now + max(timeout, 0)
|
||||
_activity_state = {"last_touch": _now, "start": _now}
|
||||
resolved = False
|
||||
while True:
|
||||
_remaining = _deadline - time.monotonic()
|
||||
if _remaining <= 0:
|
||||
break
|
||||
# 1s poll slice — the event is set immediately when the
|
||||
# user responds, so slice length only controls heartbeat
|
||||
# cadence, not user-visible responsiveness.
|
||||
if entry.event.wait(timeout=min(1.0, _remaining)):
|
||||
resolved = True
|
||||
break
|
||||
if touch_activity_if_due is not None:
|
||||
touch_activity_if_due(
|
||||
_activity_state, "waiting for user approval"
|
||||
)
|
||||
|
||||
# Clean up this entry from the queue
|
||||
with _lock:
|
||||
queue = _gateway_queues.get(session_key, [])
|
||||
if entry in queue:
|
||||
queue.remove(entry)
|
||||
if not queue:
|
||||
_gateway_queues.pop(session_key, None)
|
||||
|
||||
choice = entry.result
|
||||
# Normalize outcome for the post hook. Unresolved (timeout) and
|
||||
# None both mean the user never responded; report that explicitly
|
||||
# so plugins can distinguish timeout from explicit deny.
|
||||
_outcome = (
|
||||
"timeout" if not resolved
|
||||
else (choice if choice else "timeout")
|
||||
)
|
||||
_fire_approval_hook(
|
||||
"post_approval_response",
|
||||
command=command,
|
||||
description=combined_desc,
|
||||
pattern_key=primary_key,
|
||||
pattern_keys=list(all_keys),
|
||||
session_key=session_key,
|
||||
surface="gateway",
|
||||
choice=_outcome,
|
||||
)
|
||||
resolved = decision["resolved"]
|
||||
choice = decision["choice"]
|
||||
|
||||
if not resolved or choice is None or choice == "deny":
|
||||
# Consent contract: silence is NOT consent, and an explicit
|
||||
@ -1437,5 +1452,173 @@ def check_all_command_guards(command: str, env_type: str,
|
||||
"user_approved": True, "description": combined_desc}
|
||||
|
||||
|
||||
def check_execute_code_guard(code: str, env_type: str) -> dict:
|
||||
"""Approve an execute_code script before its child process is spawned.
|
||||
|
||||
execute_code runs arbitrary local Python — the script can call
|
||||
``subprocess``, ``os.system``, ``ctypes``, or other process/file APIs
|
||||
directly, none of which pass through ``terminal()`` /
|
||||
``DANGEROUS_PATTERNS``. In gateway/ask contexts we fail closed by approving
|
||||
the script as a whole before it runs (#30882). Returns the same dict
|
||||
contract as ``check_all_command_guards``.
|
||||
|
||||
Scope (documented limitation, #30882): in a purely local non-interactive
|
||||
non-gateway session (no TTY, not gateway, not cron-deny) this returns
|
||||
approved — matching the existing terminal auto-approve contract. The
|
||||
hardline floor still blocks catastrophic ``terminal()`` commands the script
|
||||
issues; running arbitrary code headlessly without any approval surface is
|
||||
trusted-by-config (set a gateway/ask surface or ``approvals.cron_mode`` to
|
||||
require approval).
|
||||
"""
|
||||
pattern_key = "execute_code"
|
||||
description = (
|
||||
"execute_code script execution. The script can spawn subprocesses or "
|
||||
"mutate files without passing through terminal command approval; "
|
||||
"approval is one-shot for this run."
|
||||
)
|
||||
|
||||
# Isolated backends already sandbox the child — matches the container skip
|
||||
# in check_all_command_guards / check_dangerous_command.
|
||||
if env_type in {"docker", "singularity", "modal", "daytona", "vercel_sandbox"}:
|
||||
return {"approved": True, "message": None}
|
||||
|
||||
# --yolo or approvals.mode=off: bypass (session- or process-scoped).
|
||||
approval_mode = _get_approval_mode()
|
||||
if _YOLO_MODE_FROZEN or is_current_session_yolo_enabled() or approval_mode == "off":
|
||||
return {"approved": True, "message": None}
|
||||
|
||||
is_gateway = _is_gateway_approval_context()
|
||||
is_ask = env_var_enabled("HERMES_EXEC_ASK")
|
||||
|
||||
# Cron: no user is present to approve arbitrary code.
|
||||
if env_var_enabled("HERMES_CRON_SESSION"):
|
||||
if _get_cron_approval_mode() == "deny":
|
||||
return {
|
||||
"approved": False,
|
||||
"message": (
|
||||
"BLOCKED: execute_code runs arbitrary local Python "
|
||||
"(including subprocess calls that bypass shell-string "
|
||||
"approval checks). Cron jobs run without a user present "
|
||||
"to approve it. Use normal tools instead, or set "
|
||||
"approvals.cron_mode: approve only if this cron profile "
|
||||
"is intentionally trusted."
|
||||
),
|
||||
"pattern_key": pattern_key,
|
||||
"description": description,
|
||||
"outcome": "blocked",
|
||||
"user_consent": False,
|
||||
}
|
||||
return {"approved": True, "message": None}
|
||||
|
||||
# Only gateway/ask contexts get the one-shot whole-script approval.
|
||||
# * CLI interactive: the script's terminal() calls are guarded per-call
|
||||
# (context now propagates into the RPC thread, #33057); a whole-script
|
||||
# prompt would fire on every execute_code call.
|
||||
# * Local non-interactive non-gateway: documented limitation above.
|
||||
if not is_gateway and not is_ask:
|
||||
return {"approved": True, "message": None}
|
||||
|
||||
session_key = get_current_session_key()
|
||||
# Built only now (past the early-return gates) so the common non-approval
|
||||
# paths don't pay to copy a potentially-large script into this string.
|
||||
command = f"execute_code <<'PY'\n{code}\nPY"
|
||||
|
||||
# Smart mode: ask the aux LLM about the whole script. An APPROVE here only
|
||||
# suppresses the redundant whole-script prompt; the per-call terminal()
|
||||
# guards (restored by context propagation) still run independently.
|
||||
if approval_mode == "smart":
|
||||
verdict = _smart_approve(command, description)
|
||||
if verdict == "approve":
|
||||
logger.debug("Smart approval: auto-approved execute_code for session %s",
|
||||
session_key)
|
||||
return {"approved": True, "message": None,
|
||||
"smart_approved": True, "description": description}
|
||||
if verdict == "deny":
|
||||
return {
|
||||
"approved": False,
|
||||
"message": ("BLOCKED by smart approval: execute_code script "
|
||||
"execution was assessed as genuinely dangerous. "
|
||||
"Do NOT retry."),
|
||||
"smart_denied": True,
|
||||
"pattern_key": pattern_key,
|
||||
"description": description,
|
||||
"outcome": "denied",
|
||||
"user_consent": False,
|
||||
}
|
||||
# verdict == "escalate" → fall through to manual approval
|
||||
|
||||
notify_cb = None
|
||||
with _lock:
|
||||
notify_cb = _gateway_notify_cbs.get(session_key)
|
||||
|
||||
if notify_cb is None:
|
||||
# No gateway callback registered (e.g. ask-mode without a notifier):
|
||||
# surface a pending approval for backward compatibility.
|
||||
submit_pending(session_key, {
|
||||
"command": command,
|
||||
"pattern_key": pattern_key,
|
||||
"pattern_keys": [pattern_key],
|
||||
"description": description,
|
||||
})
|
||||
return {
|
||||
"approved": False,
|
||||
"pattern_key": pattern_key,
|
||||
"status": "pending_approval",
|
||||
"approval_pending": True,
|
||||
"command": command,
|
||||
"description": description,
|
||||
"message": (
|
||||
f"⚠️ {description}. Asking the user for approval.\n\n"
|
||||
f"**Code:**\n```python\n{code}\n```"
|
||||
),
|
||||
}
|
||||
|
||||
approval_data = {
|
||||
"command": command,
|
||||
"pattern_key": pattern_key,
|
||||
"pattern_keys": [pattern_key],
|
||||
"description": description,
|
||||
}
|
||||
decision = _await_gateway_decision(
|
||||
session_key, notify_cb, approval_data, surface="gateway"
|
||||
)
|
||||
if decision.get("notify_failed"):
|
||||
return {
|
||||
"approved": False,
|
||||
"message": ("BLOCKED: Failed to send execute_code approval request "
|
||||
"to user. Do NOT retry."),
|
||||
"pattern_key": pattern_key,
|
||||
"description": description,
|
||||
"outcome": "notify_failed",
|
||||
"user_consent": False,
|
||||
}
|
||||
|
||||
resolved = decision["resolved"]
|
||||
choice = decision["choice"]
|
||||
|
||||
if not resolved or choice is None or choice == "deny":
|
||||
reason = "timed out without user response" if not resolved else "denied by user"
|
||||
addendum = " Silence is not consent." if not resolved else ""
|
||||
return {
|
||||
"approved": False,
|
||||
"message": (
|
||||
f"BLOCKED: execute_code script {reason}. The user has NOT "
|
||||
f"consented to running this code. Do NOT retry, do NOT rephrase "
|
||||
f"the script, and do NOT attempt the same outcome via a "
|
||||
f"different tool.{addendum}"
|
||||
),
|
||||
"pattern_key": pattern_key,
|
||||
"description": description,
|
||||
"outcome": "timeout" if not resolved else "denied",
|
||||
"user_consent": False,
|
||||
}
|
||||
|
||||
# Approved — one-shot only. Deliberately NO approve_session/approve_permanent:
|
||||
# each execute_code script is distinct arbitrary code, so approval never
|
||||
# persists to future scripts.
|
||||
return {"approved": True, "message": None,
|
||||
"user_approved": True, "description": description}
|
||||
|
||||
|
||||
# Load permanent allowlist from config on module import
|
||||
load_permanent_allowlist()
|
||||
|
||||
@ -46,6 +46,8 @@ import uuid
|
||||
_IS_WINDOWS = platform.system() == "Windows"
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from tools.thread_context import propagate_context_to_thread
|
||||
|
||||
# Availability gate. On Windows we fall back to loopback TCP for the
|
||||
# sandbox RPC transport (AF_UNIX is unreliable on Windows Python) — see
|
||||
# ``_use_tcp_rpc`` in ``_execute_local`` below. That makes execute_code
|
||||
@ -74,13 +76,30 @@ MAX_STDERR_BYTES = 10_000 # 10 KB
|
||||
|
||||
# Environment variable scrubbing rules (shared between the local + remote
|
||||
# backends). Secret-substring block is applied first; anything left must
|
||||
# match either a safe prefix or, on Windows, an OS-essential name.
|
||||
# match a safe prefix, the operational HERMES_ allowlist, or (on Windows) an
|
||||
# OS-essential name.
|
||||
#
|
||||
# NB: the broad "HERMES_" prefix was deliberately removed (#27303) — it leaked
|
||||
# HERMES_*-named config that lacks a secret substring (e.g. HERMES_BASE_URL,
|
||||
# HERMES_KANBAN_DB, HERMES_*_WEBHOOK). The child only needs the few
|
||||
# location/profile vars in _HERMES_CHILD_ALLOWED below; HERMES_RPC_SOCKET /
|
||||
# HERMES_RPC_DIR / TZ / HOME are injected explicitly after scrubbing.
|
||||
_SAFE_ENV_PREFIXES = ("PATH", "HOME", "USER", "LANG", "LC_", "TERM",
|
||||
"TMPDIR", "TMP", "TEMP", "SHELL", "LOGNAME",
|
||||
"XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA",
|
||||
"HERMES_")
|
||||
"XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA")
|
||||
_SECRET_SUBSTRINGS = ("KEY", "TOKEN", "SECRET", "PASSWORD", "CREDENTIAL",
|
||||
"PASSWD", "AUTH")
|
||||
"PASSWD", "AUTH", "DSN", "WEBHOOK")
|
||||
|
||||
# Operational HERMES_* vars the child legitimately needs by exact name — these
|
||||
# are non-secret runtime-location flags (the same set hermes_cli treats as the
|
||||
# runtime location) that repo-root modules a sandbox script imports may read at
|
||||
# import time. None match _SECRET_SUBSTRINGS.
|
||||
_HERMES_CHILD_ALLOWED = frozenset({
|
||||
"HERMES_HOME",
|
||||
"HERMES_PROFILE",
|
||||
"HERMES_CONFIG",
|
||||
"HERMES_ENV",
|
||||
})
|
||||
|
||||
# Windows-only: a handful of variables are required by the OS/CRT itself.
|
||||
# Without them, even stdlib calls like ``socket.socket()`` fail with
|
||||
@ -119,9 +138,10 @@ def _scrub_child_env(source_env, is_passthrough=None, is_windows=None):
|
||||
|
||||
Rules (order matters):
|
||||
1. Passthrough vars (skill- or config-declared) always pass.
|
||||
2. Secret-substring names (KEY/TOKEN/etc.) are blocked.
|
||||
2. Secret-substring names (KEY/TOKEN/DSN/WEBHOOK/etc.) are blocked.
|
||||
3. Names matching a safe prefix pass.
|
||||
4. On Windows, a small OS-essential allowlist passes by exact name
|
||||
4. Operational HERMES_* vars (_HERMES_CHILD_ALLOWED) pass by exact name.
|
||||
5. On Windows, a small OS-essential allowlist passes by exact name
|
||||
— without these the child can't even create a socket or spawn a
|
||||
subprocess.
|
||||
|
||||
@ -147,6 +167,9 @@ def _scrub_child_env(source_env, is_passthrough=None, is_windows=None):
|
||||
if any(k.startswith(p) for p in _SAFE_ENV_PREFIXES):
|
||||
scrubbed[k] = v
|
||||
continue
|
||||
if k in _HERMES_CHILD_ALLOWED:
|
||||
scrubbed[k] = v
|
||||
continue
|
||||
if is_windows and k.upper() in _WINDOWS_ESSENTIAL_ENV_VARS:
|
||||
scrubbed[k] = v
|
||||
return scrubbed
|
||||
@ -887,9 +910,11 @@ def _execute_remote(
|
||||
_ship_file_to_remote(env, f"{sandbox_dir}/hermes_tools.py", tools_src)
|
||||
_ship_file_to_remote(env, f"{sandbox_dir}/script.py", code)
|
||||
|
||||
# Start RPC polling thread
|
||||
# Wrapped so the thread inherits the turn's approval context + callbacks
|
||||
# (see tools.thread_context) — else sandbox RPC tool calls lose approval
|
||||
# routing (#33057).
|
||||
rpc_thread = threading.Thread(
|
||||
target=_rpc_poll_loop,
|
||||
target=propagate_context_to_thread(_rpc_poll_loop),
|
||||
args=(
|
||||
env, f"{sandbox_dir}/rpc", effective_task_id,
|
||||
tool_call_log, tool_call_counter, max_tool_calls,
|
||||
@ -1049,6 +1074,21 @@ def execute_code(
|
||||
# Dispatch: remote backends use file-based RPC, local uses UDS
|
||||
from tools.terminal_tool import _get_env_config
|
||||
env_type = _get_env_config()["env_type"]
|
||||
|
||||
# execute_code runs arbitrary Python (subprocess/os.system/...) that never
|
||||
# passes through terminal()/DANGEROUS_PATTERNS, so guard the whole script
|
||||
# here before either dispatch path spawns it. Runs synchronously in the
|
||||
# caller (tool-executor) thread, which holds the session context (#30882).
|
||||
from tools.approval import check_execute_code_guard
|
||||
_guard = check_execute_code_guard(code, env_type)
|
||||
if not _guard.get("approved", False):
|
||||
return json.dumps({
|
||||
"status": "error",
|
||||
"error": _guard.get("message") or "execute_code blocked by approval guard.",
|
||||
"tool_calls_made": 0,
|
||||
"duration_seconds": 0,
|
||||
}, ensure_ascii=False)
|
||||
|
||||
if env_type != "local":
|
||||
return _execute_remote(code, task_id, enabled_tools)
|
||||
|
||||
@ -1135,8 +1175,11 @@ def execute_code(
|
||||
os.chmod(sock_path, 0o600)
|
||||
server_sock.listen(1)
|
||||
|
||||
# Wrapped so the thread inherits the turn's approval context + callbacks
|
||||
# (see tools.thread_context) — else gateway sandbox tool calls silently
|
||||
# auto-approve dangerous commands (#33057, #30882).
|
||||
rpc_thread = threading.Thread(
|
||||
target=_rpc_server_loop,
|
||||
target=propagate_context_to_thread(_rpc_server_loop),
|
||||
args=(
|
||||
server_sock, task_id, tool_call_log,
|
||||
tool_call_counter, max_tool_calls, sandbox_tools,
|
||||
|
||||
Reference in New Issue
Block a user