From 064875a5401686520dfcc9f36a90e3be1dfe3dee Mon Sep 17 00:00:00 2001 From: Bartok Date: Sun, 31 May 2026 22:46:04 -0500 Subject: [PATCH] fix(docker): support s6 /init images in terminal sandbox (#34628) (#34635) s6-overlay images (e.g. hermes-agent:latest) use /init as PID 1 and exec /run/s6/basedir/bin/init during stage0 startup. The Docker terminal backend unconditionally added Docker --init and mounted /run as noexec, which broke those images in two ways: --init created a second competing PID-1 init, and the noexec /run made s6 stage0 fail with "exec: /run/s6/basedir/bin/init: Permission denied" (exit 126), so the container died and terminal commands reported a generic "container is not running" error. Detect images whose entrypoint is /init via 'docker image inspect' and, for those images only, skip Docker --init and mount /run with exec. All other images keep the hardened --init + noexec defaults. Detection is best-effort: any inspect failure falls back to the safe defaults. --- tests/tools/test_docker_environment.py | 112 +++++++++++++++++++++++++ tools/environments/docker.py | 102 +++++++++++++++++++--- 2 files changed, 203 insertions(+), 11 deletions(-) diff --git a/tests/tools/test_docker_environment.py b/tests/tools/test_docker_environment.py index 92db4287f..099e167e7 100644 --- a/tests/tools/test_docker_environment.py +++ b/tests/tools/test_docker_environment.py @@ -1517,3 +1517,115 @@ def test_credential_mount_works_when_source_is_valid_file(monkeypatch, tmp_path) assert run_calls, "docker run should have been called" run_args_str = " ".join(run_calls[0][0]) assert "token.json" in run_args_str + + +# ── s6-overlay /init image handling (issue #34628) ──────────────── + + +def _mock_subprocess_run_with_entrypoint(monkeypatch, entrypoint_json): + """Like _mock_subprocess_run, but `docker image inspect` returns the given + entrypoint JSON so _image_uses_init_entrypoint can be exercised end-to-end. + """ + calls = [] + + def _run(cmd, **kwargs): + calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs)) + if isinstance(cmd, list) and len(cmd) >= 2: + if cmd[1] == "version": + return subprocess.CompletedProcess(cmd, 0, stdout="Docker version", stderr="") + if cmd[1] == "image" and len(cmd) >= 3 and cmd[2] == "inspect": + return subprocess.CompletedProcess(cmd, 0, stdout=entrypoint_json + "\n", stderr="") + if cmd[1] == "run": + return subprocess.CompletedProcess(cmd, 0, stdout="fake-container-id\n", stderr="") + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + monkeypatch.setattr(docker_env.subprocess, "run", _run) + return calls + + +def test_image_uses_init_entrypoint_detects_s6_init(monkeypatch): + """An image whose entrypoint is /init is detected as an s6-overlay image.""" + def _run(cmd, **kwargs): + return subprocess.CompletedProcess(cmd, 0, stdout='["/init"]', stderr="") + + monkeypatch.setattr(docker_env.subprocess, "run", _run) + assert docker_env._image_uses_init_entrypoint("/usr/bin/docker", "hermes-agent:latest") is True + + +def test_image_uses_init_entrypoint_false_for_plain_image(monkeypatch): + """A normal image (no /init entrypoint) is not treated as s6-overlay.""" + def _run(cmd, **kwargs): + return subprocess.CompletedProcess(cmd, 0, stdout='["/bin/sh","-c"]', stderr="") + + monkeypatch.setattr(docker_env.subprocess, "run", _run) + assert docker_env._image_uses_init_entrypoint("/usr/bin/docker", "python:3.11") is False + + +def test_image_uses_init_entrypoint_false_for_null_entrypoint(monkeypatch): + """Images with no declared entrypoint (null) keep hardened defaults.""" + def _run(cmd, **kwargs): + return subprocess.CompletedProcess(cmd, 0, stdout="null", stderr="") + + monkeypatch.setattr(docker_env.subprocess, "run", _run) + assert docker_env._image_uses_init_entrypoint("/usr/bin/docker", "alpine") is False + + +def test_image_uses_init_entrypoint_false_on_inspect_failure(monkeypatch): + """An inspect failure (e.g. image not pulled) is best-effort -> defaults kept.""" + def _run(cmd, **kwargs): + return subprocess.CompletedProcess(cmd, 1, stdout="", stderr="No such image") + + monkeypatch.setattr(docker_env.subprocess, "run", _run) + assert docker_env._image_uses_init_entrypoint("/usr/bin/docker", "missing:tag") is False + + +def test_image_uses_init_entrypoint_false_on_exception(monkeypatch): + """A subprocess error never raises out of detection — defaults kept.""" + def _run(cmd, **kwargs): + raise OSError("docker daemon down") + + monkeypatch.setattr(docker_env.subprocess, "run", _run) + assert docker_env._image_uses_init_entrypoint("/usr/bin/docker", "x") is False + + +def test_s6_image_skips_docker_init_and_mounts_run_exec(monkeypatch): + """For an s6-overlay /init image, docker run must omit --init and mount + /run with exec (issue #34628).""" + monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") + calls = _mock_subprocess_run_with_entrypoint(monkeypatch, '["/init"]') + + _make_dummy_env(image="hermes-agent:latest") + + run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"] + assert run_calls, "docker run should have been called" + run_args = run_calls[0][0] + + assert "--init" not in run_args, "s6 /init image must not get Docker --init" + + tmpfs_vals = [run_args[i + 1] for i, a in enumerate(run_args[:-1]) if a == "--tmpfs"] + run_mounts = [v for v in tmpfs_vals if v.startswith("/run:")] + assert run_mounts, f"no /run tmpfs mount found in {tmpfs_vals}" + assert "exec" in run_mounts[0] and "noexec" not in run_mounts[0], ( + f"/run must be mounted exec for s6 images, got: {run_mounts[0]}" + ) + + +def test_plain_image_keeps_docker_init_and_run_noexec(monkeypatch): + """A non-s6 image keeps the hardened defaults: Docker --init and noexec /run.""" + monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") + calls = _mock_subprocess_run_with_entrypoint(monkeypatch, '["/bin/sh","-c"]') + + _make_dummy_env(image="python:3.11") + + run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"] + assert run_calls, "docker run should have been called" + run_args = run_calls[0][0] + + assert "--init" in run_args, "non-s6 image must keep Docker --init" + + tmpfs_vals = [run_args[i + 1] for i, a in enumerate(run_args[:-1]) if a == "--tmpfs"] + run_mounts = [v for v in tmpfs_vals if v.startswith("/run:")] + assert run_mounts, f"no /run tmpfs mount found in {tmpfs_vals}" + assert "noexec" in run_mounts[0], ( + f"/run must stay noexec for non-s6 images, got: {run_mounts[0]}" + ) diff --git a/tools/environments/docker.py b/tools/environments/docker.py index 306f4705c..5a0af2692 100644 --- a/tools/environments/docker.py +++ b/tools/environments/docker.py @@ -5,6 +5,7 @@ configurable resource limits (CPU, memory, disk), and optional filesystem persistence via bind mounts. """ +import json import logging import os import re @@ -329,9 +330,15 @@ _BASE_SECURITY_ARGS = [ "--pids-limit", "256", "--tmpfs", "/tmp:rw,nosuid,size=512m", "--tmpfs", "/var/tmp:rw,noexec,nosuid,size=256m", - "--tmpfs", "/run:rw,noexec,nosuid,size=64m", ] +# /run is split out from _BASE_SECURITY_ARGS because s6-overlay images need it +# mounted ``exec``: s6 stage0 later runs ``exec /run/s6/basedir/bin/init``, which +# fails with "Permission denied" (exit 126) on a ``noexec`` mount. For all other +# images we keep the hardened ``noexec`` default. +_RUN_TMPFS_NOEXEC = "--tmpfs", "/run:rw,noexec,nosuid,size=64m" +_RUN_TMPFS_EXEC = "--tmpfs", "/run:rw,exec,nosuid,size=64m" + # Extra caps needed when the container starts as root and an init/entrypoint # must drop privileges (via `s6-setuidgid`, `gosu`, `su`, or similar). # Skipped when --user is passed because the container already starts @@ -342,11 +349,63 @@ _PRIVDROP_CAP_ARGS = [ ] -def _build_security_args(run_as_host_user: bool) -> list[str]: - """Return the security/cap/tmpfs args tailored to the privilege mode.""" +def _build_security_args(run_as_host_user: bool, run_exec: bool = False) -> list[str]: + """Return the security/cap/tmpfs args tailored to the privilege mode. + + ``run_exec`` mounts ``/run`` with ``exec`` instead of the hardened + ``noexec`` default. This is required for s6-overlay images whose ``/init`` + entrypoint execs ``/run/s6/basedir/bin/init`` during startup; see + ``_image_uses_init_entrypoint``. + """ + run_tmpfs = list(_RUN_TMPFS_EXEC if run_exec else _RUN_TMPFS_NOEXEC) + args = list(_BASE_SECURITY_ARGS) + run_tmpfs if run_as_host_user: - return list(_BASE_SECURITY_ARGS) - return list(_BASE_SECURITY_ARGS) + list(_PRIVDROP_CAP_ARGS) + return args + return args + list(_PRIVDROP_CAP_ARGS) + + +def _image_uses_init_entrypoint(docker_exe: str, image: str) -> bool: + """Return True if ``image``'s entrypoint is the s6-overlay ``/init``. + + Such images (e.g. anything built on ``s6-overlay``, including + ``hermes-agent:latest``) already provide their own PID-1 init and execute + ``/run/s6/basedir/bin/init`` during stage0 startup. They are incompatible + with Docker's ``--init`` (two competing PID-1 inits) and with a ``noexec`` + ``/run`` mount. Detection is best-effort: on any inspection failure we + return False and keep the hardened defaults. + """ + try: + result = subprocess.run( + [docker_exe, "image", "inspect", image, + "--format", "{{json .Config.Entrypoint}}"], + capture_output=True, + text=True, + timeout=15, + ) + except (subprocess.SubprocessError, OSError) as e: + logger.debug("Docker: could not inspect entrypoint for %s: %s", image, e) + return False + if result.returncode != 0: + # Image may not be pulled yet; the run will pull it. Defaults are safe + # for non-s6 images, so don't block on this. + logger.debug( + "Docker: image inspect for %s returned %d (stderr=%s)", + image, result.returncode, result.stderr.strip(), + ) + return False + raw = (result.stdout or "").strip() + if not raw or raw == "null": + return False + try: + entrypoint = json.loads(raw) + except (ValueError, TypeError): + return False + if isinstance(entrypoint, str): + entrypoint = [entrypoint] + if not isinstance(entrypoint, list) or not entrypoint: + return False + first = str(entrypoint[0]).strip() + return first in ("/init", "/package/admin/s6-overlay/command/init") def _resolve_host_user_spec() -> Optional[str]: @@ -672,7 +731,28 @@ class DockerEnvironment(BaseEnvironment): ) # Fall back to the full cap set — without --user, an image's # init may still need s6-setuidgid/gosu/su to drop privileges. - security_args = _build_security_args(run_as_host_user and bool(user_args)) + + # Resolve the docker executable once so it works even when + # /usr/local/bin is not in PATH (common on macOS gateway/service). + self._docker_exe = find_docker() or "docker" + + # s6-overlay images (e.g. hermes-agent:latest) already use /init as PID 1 + # and exec /run/s6/basedir/bin/init during startup. For those images we + # must (a) skip Docker's --init (two competing PID-1 inits) and (b) mount + # /run with exec instead of noexec, or s6 stage0 dies with exit 126 + # "Permission denied". Detected once here; defaults are kept on any + # inspection failure. See issue #34628. + image_uses_s6_init = _image_uses_init_entrypoint(self._docker_exe, image) + if image_uses_s6_init: + logger.info( + "Docker: image %s uses /init (s6-overlay) as entrypoint — " + "skipping --init and mounting /run with exec.", + image, + ) + security_args = _build_security_args( + run_as_host_user and bool(user_args), + run_exec=image_uses_s6_init, + ) logger.info(f"Docker volume_args: {volume_args}") # User-supplied extra docker run flags (docker_extra_args in config.yaml). @@ -695,10 +775,6 @@ class DockerEnvironment(BaseEnvironment): ) logger.info(f"Docker run_args: {all_run_args}") - # Resolve the docker executable once so it works even when - # /usr/local/bin is not in PATH (common on macOS gateway/service). - self._docker_exe = find_docker() or "docker" - # Start the container directly via `docker run -d`. container_name = f"hermes-{uuid.uuid4().hex[:8]}" # Labels make hermes-created containers identifiable to: @@ -763,9 +839,13 @@ class DockerEnvironment(BaseEnvironment): reused = True if not reused: + # tini/catatonit as PID 1 reaps zombie children — but s6-overlay + # images already provide their own /init PID 1, so adding --init + # there creates two competing inits and breaks startup (#34628). + init_args = [] if image_uses_s6_init else ["--init"] run_cmd = [ self._docker_exe, "run", "-d", - "--init", # tini/catatonit as PID 1 — reaps zombie children + *init_args, "--name", container_name, *label_args, "-w", cwd,