Salvaged from PR #4024 by @Sertug17. Fixes #4017. - Replace systemd-run --user --scope with setsid for portable session detach - Add system-level service detection to cmd_update gateway restart - Falls back to start_new_session=True on systems without setsid (macOS, minimal containers)
This commit is contained in:
@ -4617,8 +4617,8 @@ class GatewayRunner:
|
||||
async def _handle_update_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /update command — update Hermes Agent to the latest version.
|
||||
|
||||
Spawns ``hermes update`` in a separate systemd scope so it survives the
|
||||
gateway restart that ``hermes update`` may trigger at the end. Marker
|
||||
Spawns ``hermes update`` in a detached session (via ``setsid``) so it
|
||||
survives the gateway restart that ``hermes update`` may trigger. Marker
|
||||
files are written so either the current gateway process or the next one
|
||||
can notify the user when the update finishes.
|
||||
"""
|
||||
@ -4658,28 +4658,28 @@ class GatewayRunner:
|
||||
pending_path.write_text(json.dumps(pending))
|
||||
exit_code_path.unlink(missing_ok=True)
|
||||
|
||||
# Spawn `hermes update` in a separate cgroup so it survives gateway
|
||||
# restart. systemd-run --user --scope creates a transient scope unit.
|
||||
# Spawn `hermes update` detached so it survives gateway restart.
|
||||
# Use setsid for portable session detach (works under system services
|
||||
# where systemd-run --user fails due to missing D-Bus session).
|
||||
hermes_cmd_str = " ".join(shlex.quote(part) for part in hermes_cmd)
|
||||
update_cmd = (
|
||||
f"{hermes_cmd_str} update > {shlex.quote(str(output_path))} 2>&1; "
|
||||
f"status=$?; printf '%s' \"$status\" > {shlex.quote(str(exit_code_path))}"
|
||||
)
|
||||
try:
|
||||
systemd_run = shutil.which("systemd-run")
|
||||
if systemd_run:
|
||||
setsid_bin = shutil.which("setsid")
|
||||
if setsid_bin:
|
||||
# Preferred: setsid creates a new session, fully detached
|
||||
subprocess.Popen(
|
||||
[systemd_run, "--user", "--scope",
|
||||
"--unit=hermes-update", "--",
|
||||
"bash", "-c", update_cmd],
|
||||
[setsid_bin, "bash", "-c", update_cmd],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
start_new_session=True,
|
||||
)
|
||||
else:
|
||||
# Fallback: best-effort detach with start_new_session
|
||||
# Fallback: start_new_session=True calls os.setsid() in child
|
||||
subprocess.Popen(
|
||||
["bash", "-c", f"nohup {update_cmd} &"],
|
||||
["bash", "-c", update_cmd],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
start_new_session=True,
|
||||
|
||||
@ -3165,6 +3165,7 @@ def cmd_update(args):
|
||||
_gw_service_name = get_service_name()
|
||||
existing_pid = get_running_pid()
|
||||
has_systemd_service = False
|
||||
has_system_service = False
|
||||
has_launchd_service = False
|
||||
|
||||
try:
|
||||
@ -3177,6 +3178,19 @@ def cmd_update(args):
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||
pass
|
||||
|
||||
# Also check for a system-level service (hermes gateway install --system).
|
||||
# This covers gateways running under system systemd where --user
|
||||
# fails due to missing D-Bus session.
|
||||
if not has_systemd_service and is_linux():
|
||||
try:
|
||||
check = subprocess.run(
|
||||
["systemctl", "is-active", _gw_service_name],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
has_system_service = check.stdout.strip() == "active"
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||
pass
|
||||
|
||||
# Check for macOS launchd service
|
||||
if is_macos():
|
||||
try:
|
||||
@ -3191,7 +3205,7 @@ def cmd_update(args):
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||
pass
|
||||
|
||||
if existing_pid or has_systemd_service or has_launchd_service:
|
||||
if existing_pid or has_systemd_service or has_system_service or has_launchd_service:
|
||||
print()
|
||||
|
||||
# When a service manager is handling the gateway, let it
|
||||
@ -3232,6 +3246,21 @@ def cmd_update(args):
|
||||
print(" hermes gateway restart")
|
||||
else:
|
||||
print(" Try manually: hermes gateway restart")
|
||||
elif has_system_service:
|
||||
# System-level service (hermes gateway install --system).
|
||||
# No D-Bus session needed — systemctl without --user talks
|
||||
# directly to the system manager over /run/systemd/private.
|
||||
print("→ Restarting system gateway service...")
|
||||
restart = subprocess.run(
|
||||
["systemctl", "restart", _gw_service_name],
|
||||
capture_output=True, text=True, timeout=15,
|
||||
)
|
||||
if restart.returncode == 0:
|
||||
print("✓ Gateway restarted (system service).")
|
||||
else:
|
||||
print(f"⚠ Gateway restart failed: {restart.stderr.strip()}")
|
||||
print(" System services may require root. Try:")
|
||||
print(f" sudo systemctl restart {_gw_service_name}")
|
||||
elif has_launchd_service:
|
||||
# Refresh the plist first (picks up --replace and other
|
||||
# changes from the update we just pulled).
|
||||
|
||||
@ -202,7 +202,7 @@ class TestHandleUpdateCommand:
|
||||
|
||||
with patch("gateway.run._hermes_home", hermes_home), \
|
||||
patch("gateway.run.__file__", fake_file), \
|
||||
patch("shutil.which", side_effect=lambda x: "/usr/bin/hermes" if x == "hermes" else "/usr/bin/systemd-run"), \
|
||||
patch("shutil.which", side_effect=lambda x: "/usr/bin/hermes" if x == "hermes" else "/usr/bin/setsid"), \
|
||||
patch("subprocess.Popen"):
|
||||
result = await runner._handle_update_command(event)
|
||||
|
||||
@ -215,8 +215,8 @@ class TestHandleUpdateCommand:
|
||||
assert not (hermes_home / ".update_exit_code").exists()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_spawns_systemd_run(self, tmp_path):
|
||||
"""Uses systemd-run when available."""
|
||||
async def test_spawns_setsid(self, tmp_path):
|
||||
"""Uses setsid when available."""
|
||||
runner = _make_runner()
|
||||
event = _make_event()
|
||||
|
||||
@ -236,16 +236,16 @@ class TestHandleUpdateCommand:
|
||||
patch("subprocess.Popen", mock_popen):
|
||||
result = await runner._handle_update_command(event)
|
||||
|
||||
# Verify systemd-run was used
|
||||
# Verify setsid was used
|
||||
call_args = mock_popen.call_args[0][0]
|
||||
assert call_args[0] == "/usr/bin/systemd-run"
|
||||
assert "--scope" in call_args
|
||||
assert call_args[0] == "/usr/bin/setsid"
|
||||
assert call_args[1] == "bash"
|
||||
assert ".update_exit_code" in call_args[-1]
|
||||
assert "Starting Hermes update" in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fallback_nohup_when_no_systemd_run(self, tmp_path):
|
||||
"""Falls back to nohup when systemd-run is not available."""
|
||||
async def test_fallback_when_no_setsid(self, tmp_path):
|
||||
"""Falls back to start_new_session=True when setsid is not available."""
|
||||
runner = _make_runner()
|
||||
event = _make_event()
|
||||
|
||||
@ -260,24 +260,27 @@ class TestHandleUpdateCommand:
|
||||
|
||||
mock_popen = MagicMock()
|
||||
|
||||
def which_no_systemd(x):
|
||||
def which_no_setsid(x):
|
||||
if x == "hermes":
|
||||
return "/usr/bin/hermes"
|
||||
if x == "systemd-run":
|
||||
if x == "setsid":
|
||||
return None
|
||||
return None
|
||||
|
||||
with patch("gateway.run._hermes_home", hermes_home), \
|
||||
patch("gateway.run.__file__", fake_file), \
|
||||
patch("shutil.which", side_effect=which_no_systemd), \
|
||||
patch("shutil.which", side_effect=which_no_setsid), \
|
||||
patch("subprocess.Popen", mock_popen):
|
||||
result = await runner._handle_update_command(event)
|
||||
|
||||
# Verify bash -c nohup fallback was used
|
||||
# Verify plain bash -c fallback (no nohup, no setsid)
|
||||
call_args = mock_popen.call_args[0][0]
|
||||
assert call_args[0] == "bash"
|
||||
assert "nohup" in call_args[2]
|
||||
assert "nohup" not in call_args[2]
|
||||
assert ".update_exit_code" in call_args[2]
|
||||
# start_new_session=True should be in kwargs
|
||||
call_kwargs = mock_popen.call_args[1]
|
||||
assert call_kwargs.get("start_new_session") is True
|
||||
assert "Starting Hermes update" in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@ -25,6 +25,8 @@ def _make_run_side_effect(
|
||||
verify_ok=True,
|
||||
commit_count="3",
|
||||
systemd_active=False,
|
||||
system_service_active=False,
|
||||
system_restart_rc=0,
|
||||
launchctl_loaded=False,
|
||||
):
|
||||
"""Build a subprocess.run side_effect that simulates git + service commands."""
|
||||
@ -45,14 +47,23 @@ def _make_run_side_effect(
|
||||
if "rev-list" in joined:
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout=f"{commit_count}\n", stderr="")
|
||||
|
||||
# systemctl --user is-active
|
||||
# systemctl is-active — distinguish --user from system scope
|
||||
if "systemctl" in joined and "is-active" in joined:
|
||||
if systemd_active:
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="")
|
||||
return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="")
|
||||
if "--user" in joined:
|
||||
if systemd_active:
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="")
|
||||
return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="")
|
||||
else:
|
||||
# System-level check (no --user)
|
||||
if system_service_active:
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="")
|
||||
return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="")
|
||||
|
||||
# systemctl --user restart
|
||||
# systemctl restart — distinguish --user from system scope
|
||||
if "systemctl" in joined and "restart" in joined:
|
||||
if "--user" not in joined and system_service_active:
|
||||
stderr = "" if system_restart_rc == 0 else "Failed to restart: Permission denied"
|
||||
return subprocess.CompletedProcess(cmd, system_restart_rc, stdout="", stderr=stderr)
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
||||
|
||||
# launchctl list ai.hermes.gateway
|
||||
@ -393,3 +404,91 @@ class TestCmdUpdateLaunchdRestart:
|
||||
assert "Stopped gateway" not in captured
|
||||
assert "Gateway restarted" not in captured
|
||||
assert "Gateway restarted via launchd" not in captured
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# cmd_update — system-level systemd service detection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestCmdUpdateSystemService:
|
||||
"""cmd_update detects system-level gateway services where --user fails."""
|
||||
|
||||
@patch("shutil.which", return_value=None)
|
||||
@patch("subprocess.run")
|
||||
def test_update_detects_system_service_and_restarts(
|
||||
self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
|
||||
):
|
||||
"""When user systemd is inactive but a system service exists, restart via system scope."""
|
||||
monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
|
||||
monkeypatch.setattr(gateway_cli, "is_linux", lambda: True)
|
||||
|
||||
mock_run.side_effect = _make_run_side_effect(
|
||||
commit_count="3",
|
||||
systemd_active=False,
|
||||
system_service_active=True,
|
||||
)
|
||||
|
||||
with patch("gateway.status.get_running_pid", return_value=12345), \
|
||||
patch("gateway.status.remove_pid_file"):
|
||||
cmd_update(mock_args)
|
||||
|
||||
captured = capsys.readouterr().out
|
||||
assert "system gateway service" in captured.lower()
|
||||
assert "Gateway restarted (system service)" in captured
|
||||
# Verify systemctl restart (no --user) was called
|
||||
restart_calls = [
|
||||
c for c in mock_run.call_args_list
|
||||
if "restart" in " ".join(str(a) for a in c.args[0])
|
||||
and "systemctl" in " ".join(str(a) for a in c.args[0])
|
||||
and "--user" not in " ".join(str(a) for a in c.args[0])
|
||||
]
|
||||
assert len(restart_calls) == 1
|
||||
|
||||
@patch("shutil.which", return_value=None)
|
||||
@patch("subprocess.run")
|
||||
def test_update_system_service_restart_failure_shows_sudo_hint(
|
||||
self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
|
||||
):
|
||||
"""When system service restart fails (e.g. no root), show sudo hint."""
|
||||
monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
|
||||
monkeypatch.setattr(gateway_cli, "is_linux", lambda: True)
|
||||
|
||||
mock_run.side_effect = _make_run_side_effect(
|
||||
commit_count="3",
|
||||
systemd_active=False,
|
||||
system_service_active=True,
|
||||
system_restart_rc=1,
|
||||
)
|
||||
|
||||
with patch("gateway.status.get_running_pid", return_value=12345), \
|
||||
patch("gateway.status.remove_pid_file"):
|
||||
cmd_update(mock_args)
|
||||
|
||||
captured = capsys.readouterr().out
|
||||
assert "sudo systemctl restart" in captured
|
||||
|
||||
@patch("shutil.which", return_value=None)
|
||||
@patch("subprocess.run")
|
||||
def test_user_service_takes_priority_over_system(
|
||||
self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
|
||||
):
|
||||
"""When both user and system services are active, user wins."""
|
||||
monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
|
||||
monkeypatch.setattr(gateway_cli, "is_linux", lambda: True)
|
||||
|
||||
mock_run.side_effect = _make_run_side_effect(
|
||||
commit_count="3",
|
||||
systemd_active=True,
|
||||
system_service_active=True,
|
||||
)
|
||||
|
||||
with patch("gateway.status.get_running_pid", return_value=12345), \
|
||||
patch("gateway.status.remove_pid_file"), \
|
||||
patch("os.kill"):
|
||||
cmd_update(mock_args)
|
||||
|
||||
captured = capsys.readouterr().out
|
||||
# Should restart via user service, not system
|
||||
assert "Gateway restarted." in captured
|
||||
assert "(system service)" not in captured
|
||||
|
||||
Reference in New Issue
Block a user