From 25742372eb5664bc9626143a486823b27fb4bc5e Mon Sep 17 00:00:00 2001 From: kyssta-exe Date: Fri, 5 Jun 2026 00:07:43 +0000 Subject: [PATCH] fix(approval): check is_approved in execute_code guard (#39275) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit check_execute_code_guard() never called is_approved() before entering the approval flow, and never persisted session/permanent approvals from the gateway response. This meant 'Approve session' and 'Always' buttons had no effect — every execute_code call re-prompted the user. - Add is_approved() check after get_current_session_key(), matching check_all_command_guards() - Persist session ('approve_session') and permanent ('approve_permanent') approvals based on the gateway choice, same as terminal command guard - Add 3 regression tests for session persistence, permanent persistence, and short-circuit on pre-existing approval --- .../test_execute_code_approval_cluster.py | 47 +++++++++++++++++++ tools/approval.py | 18 +++++-- 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/tests/tools/test_execute_code_approval_cluster.py b/tests/tools/test_execute_code_approval_cluster.py index db3b1d9e9..c5d7f3fb7 100644 --- a/tests/tools/test_execute_code_approval_cluster.py +++ b/tests/tools/test_execute_code_approval_cluster.py @@ -176,6 +176,53 @@ def test_guard_gateway_user_approves_is_one_shot(gw_session): assert A.is_approved(gw_session, "execute_code") is False +def test_guard_gateway_user_approves_session_persists(gw_session): + """'Approve session' stores session-level approval (#39275).""" + _register_resolver(gw_session, "session") + res = A.check_execute_code_guard("import os; print(1)", "local") + assert res["approved"] is True + assert res.get("user_approved") is True + # Session approval should now be stored. + assert A.is_approved(gw_session, "execute_code") is True + # Subsequent calls should auto-approve without prompting. + res2 = A.check_execute_code_guard("import os; print(2)", "local") + assert res2["approved"] is True + # Cleanup + with A._lock: + s = A._session_approved.get(gw_session, set()) + s.discard("execute_code") + + +def test_guard_gateway_user_approves_always_persists(gw_session): + """'Always' stores permanent approval (#39275).""" + _register_resolver(gw_session, "always") + res = A.check_execute_code_guard("import os; print(1)", "local") + assert res["approved"] is True + assert res.get("user_approved") is True + # Permanent approval should now be stored. + assert A.is_approved(gw_session, "execute_code") is True + # Cleanup + with A._lock: + A._permanent_approved.discard("execute_code") + s = A._session_approved.get(gw_session, set()) + s.discard("execute_code") + + +def test_guard_session_approval_short_circuits_prompt(gw_session): + """Once session-approved, execute_code skips the approval prompt (#39275).""" + # Manually set session approval. + A.approve_session(gw_session, "execute_code") + try: + # Even with a denier registered, the is_approved check short-circuits. + _register_resolver(gw_session, "deny") + res = A.check_execute_code_guard("import os", "local") + assert res["approved"] is True + finally: + with A._lock: + s = A._session_approved.get(gw_session, set()) + s.discard("execute_code") + + def test_guard_gateway_user_denies_blocks(gw_session): _register_resolver(gw_session, "deny") res = A.check_execute_code_guard("import os", "local") diff --git a/tools/approval.py b/tools/approval.py index 8e52f292f..c5051f719 100644 --- a/tools/approval.py +++ b/tools/approval.py @@ -1584,6 +1584,12 @@ def check_execute_code_guard(code: str, env_type: str) -> dict: # paths don't pay to copy a potentially-large script into this string. command = f"execute_code <<'PY'\n{code}\nPY" + # Check session/permanent approval — same gate as check_all_command_guards. + # Without this, "Approve session" / "Always" choices are stored but never + # consulted, so every execute_code call re-prompts the user (#39275). + if is_approved(session_key, pattern_key): + return {"approved": True, "message": None} + # Smart mode: ask the aux LLM about the whole script. An APPROVE here only # suppresses the redundant whole-script prompt; the per-call terminal() # guards (restored by context propagation) still run independently. @@ -1674,9 +1680,15 @@ def check_execute_code_guard(code: str, env_type: str) -> dict: "user_consent": False, } - # Approved — one-shot only. Deliberately NO approve_session/approve_permanent: - # each execute_code script is distinct arbitrary code, so approval never - # persists to future scripts. + # Approved — persist based on scope (same logic as check_all_command_guards). + if choice == "session": + approve_session(session_key, pattern_key) + elif choice == "always": + approve_session(session_key, pattern_key) + approve_permanent(pattern_key) + save_permanent_allowlist(_permanent_approved) + # choice == "once": no persistence — approval lasts this single call only. + return {"approved": True, "message": None, "user_approved": True, "description": description}