diff --git a/gateway/run.py b/gateway/run.py index 52b053db1..6adb98b8e 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1005,10 +1005,6 @@ except Exception as _bootstrap_exc: # Gateway runs in quiet mode - suppress debug output and use cwd directly (no temp dirs) os.environ["HERMES_QUIET"] = "1" -# Mark that we are inside the gateway process — used by `hermes gateway stop/restart` -# to refuse self-targeting calls that would kill the agent's own runtime. -os.environ["HERMES_IN_GATEWAY"] = "1" - # Enable interactive exec approval for dangerous commands on messaging platforms os.environ["HERMES_EXEC_ASK"] = "1" diff --git a/hermes_cli/cron.py b/hermes_cli/cron.py index 385d4e3eb..fa0acd819 100644 --- a/hermes_cli/cron.py +++ b/hermes_cli/cron.py @@ -18,13 +18,15 @@ from hermes_cli.colors import Colors, color # Patterns that indicate a cron job targets the gateway lifecycle. # Matches commands that restart/stop the gateway or its service manager. +# Deliberately specific — a bare "gateway ... restart" catch-all would block +# legitimate prompts that merely mention an unrelated gateway (e.g. "summarize +# the API gateway logs and report restart events"). _GATEWAY_LIFECYCLE_PATTERNS = re.compile( r"(?i)" r"(hermes\s+gateway\s+(restart|stop|start))" r"|(launchctl\s+(kickstart|unload|load|stop|restart)\s+.*hermes)" r"|(systemctl\s+(restart|stop|start)\s+.*hermes)" r"|(p?kill\s+.*hermes.*gateway)" - r"|(\bgateway.*restart)" ) @@ -191,7 +193,7 @@ def cron_create(args): combined = prompt if script: try: - script_text = Path(script).read_text() + script_text = Path(script).read_text(encoding="utf-8") combined = f"{combined}\n{script_text}" except (OSError, UnicodeDecodeError): pass diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index d95b26849..d600c62c0 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -5424,7 +5424,7 @@ def _gateway_command_inner(args): elif subcmd == "stop": # Defense: refuse self-targeting gateway stop from inside the gateway. # Prevents agent-initiated kill loops when combined with supervisor KeepAlive. - if os.getenv("HERMES_IN_GATEWAY") == "1": + if os.getenv("_HERMES_GATEWAY") == "1": print_error( "Refusing to stop the gateway from inside the gateway process.\n" "This command was blocked to prevent restart loops.\n" @@ -5509,7 +5509,7 @@ def _gateway_command_inner(args): elif subcmd == "restart": # Defense: refuse self-targeting gateway restart from inside the gateway. # Prevents agent-initiated kill loops when combined with supervisor KeepAlive. - if os.getenv("HERMES_IN_GATEWAY") == "1": + if os.getenv("_HERMES_GATEWAY") == "1": print_error( "Refusing to restart the gateway from inside the gateway process.\n" "This command was blocked to prevent restart loops.\n" diff --git a/tests/conftest.py b/tests/conftest.py index 17bc68d80..4e5db1535 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -182,6 +182,7 @@ _HERMES_BEHAVIORAL_VARS = frozenset({ "HERMES_SESSION_SOURCE", "HERMES_SESSION_KEY", "HERMES_GATEWAY_SESSION", + "_HERMES_GATEWAY", "HERMES_PLATFORM", "HERMES_MODEL", "HERMES_INFERENCE_MODEL", diff --git a/tests/hermes_cli/test_gateway_restart_loop.py b/tests/hermes_cli/test_gateway_restart_loop.py index 87015afc3..d6c9bb06c 100644 --- a/tests/hermes_cli/test_gateway_restart_loop.py +++ b/tests/hermes_cli/test_gateway_restart_loop.py @@ -1,7 +1,7 @@ """Tests for gateway restart-loop defenses (#30719). Covers: -- Defense 1: gateway stop/restart refuse when HERMES_IN_GATEWAY=1 +- Defense 1: gateway stop/restart refuse when _HERMES_GATEWAY=1 - Defense 2: cron create rejects prompts containing gateway lifecycle commands - _contains_gateway_lifecycle_command pattern matching """ @@ -61,6 +61,11 @@ class TestGatewayLifecyclePattern: "echo 'just a normal cron job'", "run the backup script", "gateway is running fine", + # Regression (#30728 follow-up): legit prompts that merely mention an + # unrelated gateway + a restart must NOT be blocked. + "Summarize the API gateway logs and report any restart events from last night", + "Check if the payment gateway needs a restart after the deploy", + "Monitor the gateway and tell me if a restart is recommended", ]) def test_safe_commands(self, text): assert not _contains_gateway_lifecycle_command(text), f"Should NOT match: {text!r}" @@ -189,10 +194,10 @@ class TestCronCreateLifecycleBlock: # --------------------------------------------------------------------------- class TestGatewaySelfTargetingGuard: - """Verify hermes gateway stop/restart refuse when HERMES_IN_GATEWAY=1.""" + """Verify hermes gateway stop/restart refuse when _HERMES_GATEWAY=1.""" def test_stop_refuses_inside_gateway(self, monkeypatch): - monkeypatch.setenv("HERMES_IN_GATEWAY", "1") + monkeypatch.setenv("_HERMES_GATEWAY", "1") from hermes_cli.gateway import gateway_command args = Namespace(gateway_command="stop", all=False, system=False) with pytest.raises(SystemExit) as exc_info: @@ -200,7 +205,7 @@ class TestGatewaySelfTargetingGuard: assert exc_info.value.code == 1 def test_restart_refuses_inside_gateway(self, monkeypatch): - monkeypatch.setenv("HERMES_IN_GATEWAY", "1") + monkeypatch.setenv("_HERMES_GATEWAY", "1") from hermes_cli.gateway import gateway_command args = Namespace(gateway_command="restart", all=False, system=False) with pytest.raises(SystemExit) as exc_info: @@ -208,23 +213,40 @@ class TestGatewaySelfTargetingGuard: assert exc_info.value.code == 1 def test_stop_allows_outside_gateway(self, monkeypatch): - monkeypatch.delenv("HERMES_IN_GATEWAY", raising=False) - from hermes_cli.gateway import gateway_command + # With the gateway marker unset, the self-targeting guard must NOT + # fire. Prove control reaches the real stop path (rather than driving + # real signal delivery, which would trip the live-system guard) by + # short-circuiting the first downstream call with a sentinel. + monkeypatch.delenv("_HERMES_GATEWAY", raising=False) + import hermes_cli.gateway as gw + + class _Reached(Exception): + pass + + def _sentinel(*a, **k): + raise _Reached() + + monkeypatch.setattr(gw, "_dispatch_via_service_manager_if_s6", _sentinel) + monkeypatch.setattr(gw, "_dispatch_all_via_service_manager_if_s6", _sentinel) args = Namespace(gateway_command="stop", all=False, system=False) - # Should not raise SystemExit(1) — it may fail for other reasons - # (no gateway running) but it won't exit with code 1 from the guard. - try: - gateway_command(args) - except SystemExit as e: - # The guard exit code is 1 and prints "Refusing" — make sure - # that's NOT what we hit. - assert e.code != 1 or "Refusing" not in str(e) + with pytest.raises(_Reached): + gw.gateway_command(args) def test_restart_allows_outside_gateway(self, monkeypatch): - monkeypatch.delenv("HERMES_IN_GATEWAY", raising=False) - from hermes_cli.gateway import gateway_command + # Same as above for restart: guard must not fire when the marker is + # unset. The first thing restart does after the guard is the s6 + # dispatch check — sentinel it so we never reach real signal delivery. + monkeypatch.delenv("_HERMES_GATEWAY", raising=False) + import hermes_cli.gateway as gw + + class _Reached(Exception): + pass + + def _sentinel(*a, **k): + raise _Reached() + + monkeypatch.setattr(gw, "_dispatch_via_service_manager_if_s6", _sentinel) + monkeypatch.setattr(gw, "_dispatch_all_via_service_manager_if_s6", _sentinel) args = Namespace(gateway_command="restart", all=False, system=False) - try: - gateway_command(args) - except SystemExit as e: - assert e.code != 1 or "Refusing" not in str(e) + with pytest.raises(_Reached): + gw.gateway_command(args)