fix(cli): launchd KeepAlive unconditional restart (#37388)
Replace KeepAlive.SuccessfulExit=false dict with <key>KeepAlive</key><true/> so launchd restarts hermes-gateway on any exit, matching the documented drain-then-exit restart protocol used by --graceful-restart.
This commit is contained in:
@ -227,9 +227,8 @@ def _graceful_restart_via_sigusr1(pid: int, drain_timeout: float) -> bool:
|
||||
|
||||
SIGUSR1 is wired in gateway/run.py to ``request_restart(via_service=True)``
|
||||
which drains in-flight agent runs (up to ``agent.restart_drain_timeout``
|
||||
seconds), then exits. systemd relaunches clean exits via
|
||||
``Restart=always``; launchd still uses a non-zero planned-restart exit
|
||||
because its plist has ``KeepAlive.SuccessfulExit = false``.
|
||||
seconds), then exits. Both systemd (``Restart=always``) and launchd
|
||||
(unconditional ``<key>KeepAlive</key><true/>``) restart on any exit.
|
||||
|
||||
This is the drain-aware alternative to ``systemctl restart`` / ``SIGTERM``,
|
||||
which SIGKILL in-flight agents after a short timeout.
|
||||
@ -3083,10 +3082,7 @@ def generate_launchd_plist() -> str:
|
||||
<true/>
|
||||
|
||||
<key>KeepAlive</key>
|
||||
<dict>
|
||||
<key>SuccessfulExit</key>
|
||||
<false/>
|
||||
</dict>
|
||||
<true/>
|
||||
|
||||
<key>StandardOutPath</key>
|
||||
<string>{log_dir}/gateway.log</string>
|
||||
@ -3249,7 +3245,7 @@ def launchd_stop():
|
||||
pass
|
||||
# bootout unloads the service definition so KeepAlive doesn't respawn
|
||||
# the process. A plain `kill SIGTERM` only signals the process — launchd
|
||||
# immediately restarts it because KeepAlive.SuccessfulExit = false.
|
||||
# immediately restarts it because KeepAlive is unconditionally true.
|
||||
# `hermes gateway start` re-bootstraps when it detects the job is unloaded.
|
||||
try:
|
||||
subprocess.run(["launchctl", "bootout", target], check=True, timeout=90)
|
||||
|
||||
@ -2579,3 +2579,24 @@ class TestServiceWorkingDirIsStable:
|
||||
assert m, "plist has no WorkingDirectory entry"
|
||||
assert Path(m.group(1)).resolve() == home.resolve()
|
||||
assert "/.worktrees/" not in m.group(1)
|
||||
|
||||
def test_launchd_plist_keepalive_unconditional(self, tmp_path, monkeypatch):
|
||||
"""KeepAlive must be unconditional <true/> so the gateway restarts on clean exits.
|
||||
|
||||
Bug #37388: the old ``KeepAlive.SuccessfulExit = false`` dict form meant
|
||||
launchd would NOT restart after a zero-exit (e.g. ``gateway run --replace``
|
||||
causes the old instance to exit cleanly). Switching to the scalar
|
||||
``<key>KeepAlive</key><true/>`` makes launchd restart regardless of exit code.
|
||||
"""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setattr(gateway_cli, "get_hermes_home", lambda: home)
|
||||
plist = gateway_cli.generate_launchd_plist()
|
||||
|
||||
# Scalar <true/> must be present immediately after the KeepAlive key
|
||||
assert "<key>KeepAlive</key>" in plist
|
||||
# The unconditional form
|
||||
assert "<key>KeepAlive</key>\n <true/>" in plist
|
||||
# The old conditional dict form must NOT appear
|
||||
assert "SuccessfulExit" not in plist
|
||||
assert "<key>KeepAlive</key>\n <dict>" not in plist
|
||||
|
||||
Reference in New Issue
Block a user