fix(docker): seed s6 gateway state for legacy run cmd (#34829)
* fix(docker): seed s6 gateway state for legacy run cmd * fix(docker): honor no-supervise during legacy gateway migration --------- Co-authored-by: Donovan Yohan <donovan-yohan@users.noreply.github.com>
This commit is contained in:
@ -24,7 +24,7 @@ import logging
|
|||||||
import os
|
import os
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Literal
|
from typing import Literal, Sequence
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -57,6 +57,7 @@ def reconcile_profile_gateways(
|
|||||||
hermes_home: Path,
|
hermes_home: Path,
|
||||||
scandir: Path,
|
scandir: Path,
|
||||||
dry_run: bool = False,
|
dry_run: bool = False,
|
||||||
|
container_argv: Sequence[str] | None = None,
|
||||||
) -> list[ReconcileAction]:
|
) -> list[ReconcileAction]:
|
||||||
"""Recreate s6 service registrations for every persistent profile.
|
"""Recreate s6 service registrations for every persistent profile.
|
||||||
|
|
||||||
@ -82,6 +83,8 @@ def reconcile_profile_gateways(
|
|||||||
directories are created at ``<scandir>/gateway-<profile>/``.
|
directories are created at ``<scandir>/gateway-<profile>/``.
|
||||||
dry_run: When True, walk and return the action list without
|
dry_run: When True, walk and return the action list without
|
||||||
touching the filesystem. For tests and `--dry-run` debug.
|
touching the filesystem. For tests and `--dry-run` debug.
|
||||||
|
container_argv: Optional container PID 1 argv override. Production
|
||||||
|
reads ``/proc/1/cmdline``; tests inject it directly.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
One :class:`ReconcileAction` per profile, in this order:
|
One :class:`ReconcileAction` per profile, in this order:
|
||||||
@ -93,8 +96,15 @@ def reconcile_profile_gateways(
|
|||||||
# populated the root profile dir. The slot exists so
|
# populated the root profile dir. The slot exists so
|
||||||
# ``hermes gateway start`` (no ``-p``) has somewhere to land;
|
# ``hermes gateway start`` (no ``-p``) has somewhere to land;
|
||||||
# auto-up only when the prior state was "running" (same rule as
|
# auto-up only when the prior state was "running" (same rule as
|
||||||
# named profiles).
|
# named profiles). If the container was launched with the legacy
|
||||||
default_prior_state = _read_prior_state(hermes_home)
|
# `gateway run` command and no state exists yet, seed that intent
|
||||||
|
# as `running` so the s6 reconciler preserves the pre-s6 behavior.
|
||||||
|
legacy_default_state = _maybe_migrate_legacy_gateway_run_state(
|
||||||
|
hermes_home,
|
||||||
|
container_argv=container_argv,
|
||||||
|
dry_run=dry_run,
|
||||||
|
)
|
||||||
|
default_prior_state = legacy_default_state or _read_prior_state(hermes_home)
|
||||||
default_should_start = default_prior_state in _AUTOSTART_STATES
|
default_should_start = default_prior_state in _AUTOSTART_STATES
|
||||||
if not dry_run:
|
if not dry_run:
|
||||||
_cleanup_stale_runtime_files(hermes_home)
|
_cleanup_stale_runtime_files(hermes_home)
|
||||||
@ -147,6 +157,66 @@ def reconcile_profile_gateways(
|
|||||||
return actions
|
return actions
|
||||||
|
|
||||||
|
|
||||||
|
def _maybe_migrate_legacy_gateway_run_state(
|
||||||
|
hermes_home: Path,
|
||||||
|
*,
|
||||||
|
container_argv: Sequence[str] | None,
|
||||||
|
dry_run: bool,
|
||||||
|
) -> str | None:
|
||||||
|
"""Seed root gateway_state for pre-s6 `gateway run` containers.
|
||||||
|
|
||||||
|
The tini image let Docker users run the gateway as the container
|
||||||
|
command (`docker run ... gateway run`). After the s6 migration,
|
||||||
|
profile gateways are restored from persisted gateway_state.json; a
|
||||||
|
legacy container with no state file would therefore register the
|
||||||
|
default service down and never start. Only synthesize state when no
|
||||||
|
root gateway_state.json exists so explicit stopped/failed states keep
|
||||||
|
winning across restarts.
|
||||||
|
"""
|
||||||
|
state_file = hermes_home / "gateway_state.json"
|
||||||
|
if state_file.exists():
|
||||||
|
return None
|
||||||
|
|
||||||
|
if os.environ.get("HERMES_GATEWAY_NO_SUPERVISE", "").lower() in ("1", "true", "yes"):
|
||||||
|
return None
|
||||||
|
|
||||||
|
argv = tuple(container_argv) if container_argv is not None else _read_container_argv()
|
||||||
|
if not _is_legacy_gateway_run_request(argv):
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not dry_run:
|
||||||
|
import time
|
||||||
|
state_file.write_text(json.dumps({
|
||||||
|
"gateway_state": "running",
|
||||||
|
"timestamp": int(time.time()),
|
||||||
|
"migrated_from": "legacy-container-cmd",
|
||||||
|
}) + "\n")
|
||||||
|
return "running"
|
||||||
|
|
||||||
|
|
||||||
|
def _read_container_argv() -> tuple[str, ...]:
|
||||||
|
"""Best-effort read of the container PID 1 argv."""
|
||||||
|
try:
|
||||||
|
raw = Path("/proc/1/cmdline").read_bytes()
|
||||||
|
except OSError:
|
||||||
|
return ()
|
||||||
|
return tuple(part.decode("utf-8", "replace") for part in raw.split(b"\0") if part)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_legacy_gateway_run_request(argv: Sequence[str]) -> bool:
|
||||||
|
"""Return True for Docker commands equivalent to `gateway run`."""
|
||||||
|
args = list(argv)
|
||||||
|
if args and Path(args[0]).name == "init":
|
||||||
|
args = args[1:]
|
||||||
|
if args and args[0].endswith("main-wrapper.sh"):
|
||||||
|
args = args[1:]
|
||||||
|
if args and Path(args[0]).name == "hermes":
|
||||||
|
args = args[1:]
|
||||||
|
if "--no-supervise" in args:
|
||||||
|
return False
|
||||||
|
return len(args) >= 2 and args[0] == "gateway" and args[1] == "run"
|
||||||
|
|
||||||
|
|
||||||
def _read_prior_state(profile_dir: Path) -> str | None:
|
def _read_prior_state(profile_dir: Path) -> str | None:
|
||||||
"""Read gateway_state.json's ``gateway_state`` field, or None if
|
"""Read gateway_state.json's ``gateway_state`` field, or None if
|
||||||
missing or unparseable. Unparseable counts as "no prior state" so
|
missing or unparseable. Unparseable counts as "no prior state" so
|
||||||
|
|||||||
@ -484,6 +484,88 @@ def test_default_slot_autostarts_when_root_state_running(tmp_path: Path) -> None
|
|||||||
assert not (scandir / "gateway-default" / "down").exists()
|
assert not (scandir / "gateway-default" / "down").exists()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"container_argv",
|
||||||
|
[
|
||||||
|
("gateway", "run"),
|
||||||
|
("/init", "/opt/hermes/docker/main-wrapper.sh", "gateway", "run"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_legacy_gateway_run_cmd_seeds_default_running_state(
|
||||||
|
tmp_path: Path,
|
||||||
|
container_argv: tuple[str, ...],
|
||||||
|
) -> None:
|
||||||
|
"""Pre-s6 Docker users often ran `gateway run` as the container
|
||||||
|
command. With no persisted gateway_state.json yet, s6 reconciliation
|
||||||
|
must migrate that legacy intent into a running default gateway slot."""
|
||||||
|
scandir = tmp_path / "run-service"; scandir.mkdir()
|
||||||
|
|
||||||
|
actions = reconcile_profile_gateways(
|
||||||
|
hermes_home=tmp_path,
|
||||||
|
scandir=scandir,
|
||||||
|
dry_run=False,
|
||||||
|
container_argv=container_argv,
|
||||||
|
)
|
||||||
|
|
||||||
|
default_action = next(a for a in actions if a.profile == "default")
|
||||||
|
assert default_action.prior_state == "running"
|
||||||
|
assert default_action.action == "started"
|
||||||
|
assert not (scandir / "gateway-default" / "down").exists()
|
||||||
|
state = json.loads((tmp_path / "gateway_state.json").read_text())
|
||||||
|
assert state["gateway_state"] == "running"
|
||||||
|
assert state["migrated_from"] == "legacy-container-cmd"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"container_argv",
|
||||||
|
[
|
||||||
|
("gateway", "run", "--no-supervise"),
|
||||||
|
("/init", "/opt/hermes/docker/main-wrapper.sh", "gateway", "run", "--no-supervise"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_legacy_gateway_run_no_supervise_does_not_seed_s6_state(
|
||||||
|
tmp_path: Path,
|
||||||
|
container_argv: tuple[str, ...],
|
||||||
|
) -> None:
|
||||||
|
"""`gateway run --no-supervise` is an explicit opt-out from s6 migration."""
|
||||||
|
scandir = tmp_path / "run-service"; scandir.mkdir()
|
||||||
|
|
||||||
|
actions = reconcile_profile_gateways(
|
||||||
|
hermes_home=tmp_path,
|
||||||
|
scandir=scandir,
|
||||||
|
dry_run=False,
|
||||||
|
container_argv=container_argv,
|
||||||
|
)
|
||||||
|
|
||||||
|
default_action = next(a for a in actions if a.profile == "default")
|
||||||
|
assert default_action.prior_state is None
|
||||||
|
assert default_action.action == "registered"
|
||||||
|
assert (scandir / "gateway-default" / "down").exists()
|
||||||
|
assert not (tmp_path / "gateway_state.json").exists()
|
||||||
|
|
||||||
|
|
||||||
|
def test_legacy_gateway_run_env_no_supervise_does_not_seed_s6_state(
|
||||||
|
tmp_path: Path,
|
||||||
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
) -> None:
|
||||||
|
"""Env opt-out matches the CLI `--no-supervise` flag."""
|
||||||
|
scandir = tmp_path / "run-service"; scandir.mkdir()
|
||||||
|
monkeypatch.setenv("HERMES_GATEWAY_NO_SUPERVISE", "1")
|
||||||
|
|
||||||
|
actions = reconcile_profile_gateways(
|
||||||
|
hermes_home=tmp_path,
|
||||||
|
scandir=scandir,
|
||||||
|
dry_run=False,
|
||||||
|
container_argv=("gateway", "run"),
|
||||||
|
)
|
||||||
|
|
||||||
|
default_action = next(a for a in actions if a.profile == "default")
|
||||||
|
assert default_action.prior_state is None
|
||||||
|
assert default_action.action == "registered"
|
||||||
|
assert (scandir / "gateway-default" / "down").exists()
|
||||||
|
assert not (tmp_path / "gateway_state.json").exists()
|
||||||
|
|
||||||
|
|
||||||
def test_default_slot_does_not_autostart_when_root_state_stopped(
|
def test_default_slot_does_not_autostart_when_root_state_stopped(
|
||||||
tmp_path: Path,
|
tmp_path: Path,
|
||||||
) -> None:
|
) -> None:
|
||||||
@ -491,12 +573,17 @@ def test_default_slot_does_not_autostart_when_root_state_stopped(
|
|||||||
_seed_default_root(tmp_path, state="stopped")
|
_seed_default_root(tmp_path, state="stopped")
|
||||||
|
|
||||||
actions = reconcile_profile_gateways(
|
actions = reconcile_profile_gateways(
|
||||||
hermes_home=tmp_path, scandir=scandir, dry_run=False,
|
hermes_home=tmp_path,
|
||||||
|
scandir=scandir,
|
||||||
|
dry_run=False,
|
||||||
|
container_argv=("gateway", "run"),
|
||||||
)
|
)
|
||||||
|
|
||||||
default_action = next(a for a in actions if a.profile == "default")
|
default_action = next(a for a in actions if a.profile == "default")
|
||||||
assert default_action.action == "registered"
|
assert default_action.action == "registered"
|
||||||
assert (scandir / "gateway-default" / "down").exists()
|
assert (scandir / "gateway-default" / "down").exists()
|
||||||
|
state = json.loads((tmp_path / "gateway_state.json").read_text())
|
||||||
|
assert state["gateway_state"] == "stopped"
|
||||||
|
|
||||||
|
|
||||||
def test_default_slot_does_not_autostart_when_root_state_startup_failed(
|
def test_default_slot_does_not_autostart_when_root_state_startup_failed(
|
||||||
|
|||||||
Reference in New Issue
Block a user