s6-overlay images (e.g. hermes-agent:latest) use /init as PID 1 and exec /run/s6/basedir/bin/init during stage0 startup. The Docker terminal backend unconditionally added Docker --init and mounted /run as noexec, which broke those images in two ways: --init created a second competing PID-1 init, and the noexec /run made s6 stage0 fail with "exec: /run/s6/basedir/bin/init: Permission denied" (exit 126), so the container died and terminal commands reported a generic "container is not running" error. Detect images whose entrypoint is /init via 'docker image inspect' and, for those images only, skip Docker --init and mount /run with exec. All other images keep the hardened --init + noexec defaults. Detection is best-effort: any inspect failure falls back to the safe defaults.
This commit is contained in:
@ -1517,3 +1517,115 @@ def test_credential_mount_works_when_source_is_valid_file(monkeypatch, tmp_path)
|
||||
assert run_calls, "docker run should have been called"
|
||||
run_args_str = " ".join(run_calls[0][0])
|
||||
assert "token.json" in run_args_str
|
||||
|
||||
|
||||
# ── s6-overlay /init image handling (issue #34628) ────────────────
|
||||
|
||||
|
||||
def _mock_subprocess_run_with_entrypoint(monkeypatch, entrypoint_json):
|
||||
"""Like _mock_subprocess_run, but `docker image inspect` returns the given
|
||||
entrypoint JSON so _image_uses_init_entrypoint can be exercised end-to-end.
|
||||
"""
|
||||
calls = []
|
||||
|
||||
def _run(cmd, **kwargs):
|
||||
calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
|
||||
if isinstance(cmd, list) and len(cmd) >= 2:
|
||||
if cmd[1] == "version":
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="Docker version", stderr="")
|
||||
if cmd[1] == "image" and len(cmd) >= 3 and cmd[2] == "inspect":
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout=entrypoint_json + "\n", stderr="")
|
||||
if cmd[1] == "run":
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="fake-container-id\n", stderr="")
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "run", _run)
|
||||
return calls
|
||||
|
||||
|
||||
def test_image_uses_init_entrypoint_detects_s6_init(monkeypatch):
|
||||
"""An image whose entrypoint is /init is detected as an s6-overlay image."""
|
||||
def _run(cmd, **kwargs):
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout='["/init"]', stderr="")
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "run", _run)
|
||||
assert docker_env._image_uses_init_entrypoint("/usr/bin/docker", "hermes-agent:latest") is True
|
||||
|
||||
|
||||
def test_image_uses_init_entrypoint_false_for_plain_image(monkeypatch):
|
||||
"""A normal image (no /init entrypoint) is not treated as s6-overlay."""
|
||||
def _run(cmd, **kwargs):
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout='["/bin/sh","-c"]', stderr="")
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "run", _run)
|
||||
assert docker_env._image_uses_init_entrypoint("/usr/bin/docker", "python:3.11") is False
|
||||
|
||||
|
||||
def test_image_uses_init_entrypoint_false_for_null_entrypoint(monkeypatch):
|
||||
"""Images with no declared entrypoint (null) keep hardened defaults."""
|
||||
def _run(cmd, **kwargs):
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="null", stderr="")
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "run", _run)
|
||||
assert docker_env._image_uses_init_entrypoint("/usr/bin/docker", "alpine") is False
|
||||
|
||||
|
||||
def test_image_uses_init_entrypoint_false_on_inspect_failure(monkeypatch):
|
||||
"""An inspect failure (e.g. image not pulled) is best-effort -> defaults kept."""
|
||||
def _run(cmd, **kwargs):
|
||||
return subprocess.CompletedProcess(cmd, 1, stdout="", stderr="No such image")
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "run", _run)
|
||||
assert docker_env._image_uses_init_entrypoint("/usr/bin/docker", "missing:tag") is False
|
||||
|
||||
|
||||
def test_image_uses_init_entrypoint_false_on_exception(monkeypatch):
|
||||
"""A subprocess error never raises out of detection — defaults kept."""
|
||||
def _run(cmd, **kwargs):
|
||||
raise OSError("docker daemon down")
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "run", _run)
|
||||
assert docker_env._image_uses_init_entrypoint("/usr/bin/docker", "x") is False
|
||||
|
||||
|
||||
def test_s6_image_skips_docker_init_and_mounts_run_exec(monkeypatch):
|
||||
"""For an s6-overlay /init image, docker run must omit --init and mount
|
||||
/run with exec (issue #34628)."""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
calls = _mock_subprocess_run_with_entrypoint(monkeypatch, '["/init"]')
|
||||
|
||||
_make_dummy_env(image="hermes-agent:latest")
|
||||
|
||||
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
|
||||
assert run_calls, "docker run should have been called"
|
||||
run_args = run_calls[0][0]
|
||||
|
||||
assert "--init" not in run_args, "s6 /init image must not get Docker --init"
|
||||
|
||||
tmpfs_vals = [run_args[i + 1] for i, a in enumerate(run_args[:-1]) if a == "--tmpfs"]
|
||||
run_mounts = [v for v in tmpfs_vals if v.startswith("/run:")]
|
||||
assert run_mounts, f"no /run tmpfs mount found in {tmpfs_vals}"
|
||||
assert "exec" in run_mounts[0] and "noexec" not in run_mounts[0], (
|
||||
f"/run must be mounted exec for s6 images, got: {run_mounts[0]}"
|
||||
)
|
||||
|
||||
|
||||
def test_plain_image_keeps_docker_init_and_run_noexec(monkeypatch):
|
||||
"""A non-s6 image keeps the hardened defaults: Docker --init and noexec /run."""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
calls = _mock_subprocess_run_with_entrypoint(monkeypatch, '["/bin/sh","-c"]')
|
||||
|
||||
_make_dummy_env(image="python:3.11")
|
||||
|
||||
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
|
||||
assert run_calls, "docker run should have been called"
|
||||
run_args = run_calls[0][0]
|
||||
|
||||
assert "--init" in run_args, "non-s6 image must keep Docker --init"
|
||||
|
||||
tmpfs_vals = [run_args[i + 1] for i, a in enumerate(run_args[:-1]) if a == "--tmpfs"]
|
||||
run_mounts = [v for v in tmpfs_vals if v.startswith("/run:")]
|
||||
assert run_mounts, f"no /run tmpfs mount found in {tmpfs_vals}"
|
||||
assert "noexec" in run_mounts[0], (
|
||||
f"/run must stay noexec for non-s6 images, got: {run_mounts[0]}"
|
||||
)
|
||||
|
||||
@ -5,6 +5,7 @@ configurable resource limits (CPU, memory, disk), and optional filesystem
|
||||
persistence via bind mounts.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
@ -329,9 +330,15 @@ _BASE_SECURITY_ARGS = [
|
||||
"--pids-limit", "256",
|
||||
"--tmpfs", "/tmp:rw,nosuid,size=512m",
|
||||
"--tmpfs", "/var/tmp:rw,noexec,nosuid,size=256m",
|
||||
"--tmpfs", "/run:rw,noexec,nosuid,size=64m",
|
||||
]
|
||||
|
||||
# /run is split out from _BASE_SECURITY_ARGS because s6-overlay images need it
|
||||
# mounted ``exec``: s6 stage0 later runs ``exec /run/s6/basedir/bin/init``, which
|
||||
# fails with "Permission denied" (exit 126) on a ``noexec`` mount. For all other
|
||||
# images we keep the hardened ``noexec`` default.
|
||||
_RUN_TMPFS_NOEXEC = "--tmpfs", "/run:rw,noexec,nosuid,size=64m"
|
||||
_RUN_TMPFS_EXEC = "--tmpfs", "/run:rw,exec,nosuid,size=64m"
|
||||
|
||||
# Extra caps needed when the container starts as root and an init/entrypoint
|
||||
# must drop privileges (via `s6-setuidgid`, `gosu`, `su`, or similar).
|
||||
# Skipped when --user is passed because the container already starts
|
||||
@ -342,11 +349,63 @@ _PRIVDROP_CAP_ARGS = [
|
||||
]
|
||||
|
||||
|
||||
def _build_security_args(run_as_host_user: bool) -> list[str]:
|
||||
"""Return the security/cap/tmpfs args tailored to the privilege mode."""
|
||||
def _build_security_args(run_as_host_user: bool, run_exec: bool = False) -> list[str]:
|
||||
"""Return the security/cap/tmpfs args tailored to the privilege mode.
|
||||
|
||||
``run_exec`` mounts ``/run`` with ``exec`` instead of the hardened
|
||||
``noexec`` default. This is required for s6-overlay images whose ``/init``
|
||||
entrypoint execs ``/run/s6/basedir/bin/init`` during startup; see
|
||||
``_image_uses_init_entrypoint``.
|
||||
"""
|
||||
run_tmpfs = list(_RUN_TMPFS_EXEC if run_exec else _RUN_TMPFS_NOEXEC)
|
||||
args = list(_BASE_SECURITY_ARGS) + run_tmpfs
|
||||
if run_as_host_user:
|
||||
return list(_BASE_SECURITY_ARGS)
|
||||
return list(_BASE_SECURITY_ARGS) + list(_PRIVDROP_CAP_ARGS)
|
||||
return args
|
||||
return args + list(_PRIVDROP_CAP_ARGS)
|
||||
|
||||
|
||||
def _image_uses_init_entrypoint(docker_exe: str, image: str) -> bool:
|
||||
"""Return True if ``image``'s entrypoint is the s6-overlay ``/init``.
|
||||
|
||||
Such images (e.g. anything built on ``s6-overlay``, including
|
||||
``hermes-agent:latest``) already provide their own PID-1 init and execute
|
||||
``/run/s6/basedir/bin/init`` during stage0 startup. They are incompatible
|
||||
with Docker's ``--init`` (two competing PID-1 inits) and with a ``noexec``
|
||||
``/run`` mount. Detection is best-effort: on any inspection failure we
|
||||
return False and keep the hardened defaults.
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[docker_exe, "image", "inspect", image,
|
||||
"--format", "{{json .Config.Entrypoint}}"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=15,
|
||||
)
|
||||
except (subprocess.SubprocessError, OSError) as e:
|
||||
logger.debug("Docker: could not inspect entrypoint for %s: %s", image, e)
|
||||
return False
|
||||
if result.returncode != 0:
|
||||
# Image may not be pulled yet; the run will pull it. Defaults are safe
|
||||
# for non-s6 images, so don't block on this.
|
||||
logger.debug(
|
||||
"Docker: image inspect for %s returned %d (stderr=%s)",
|
||||
image, result.returncode, result.stderr.strip(),
|
||||
)
|
||||
return False
|
||||
raw = (result.stdout or "").strip()
|
||||
if not raw or raw == "null":
|
||||
return False
|
||||
try:
|
||||
entrypoint = json.loads(raw)
|
||||
except (ValueError, TypeError):
|
||||
return False
|
||||
if isinstance(entrypoint, str):
|
||||
entrypoint = [entrypoint]
|
||||
if not isinstance(entrypoint, list) or not entrypoint:
|
||||
return False
|
||||
first = str(entrypoint[0]).strip()
|
||||
return first in ("/init", "/package/admin/s6-overlay/command/init")
|
||||
|
||||
|
||||
def _resolve_host_user_spec() -> Optional[str]:
|
||||
@ -672,7 +731,28 @@ class DockerEnvironment(BaseEnvironment):
|
||||
)
|
||||
# Fall back to the full cap set — without --user, an image's
|
||||
# init may still need s6-setuidgid/gosu/su to drop privileges.
|
||||
security_args = _build_security_args(run_as_host_user and bool(user_args))
|
||||
|
||||
# Resolve the docker executable once so it works even when
|
||||
# /usr/local/bin is not in PATH (common on macOS gateway/service).
|
||||
self._docker_exe = find_docker() or "docker"
|
||||
|
||||
# s6-overlay images (e.g. hermes-agent:latest) already use /init as PID 1
|
||||
# and exec /run/s6/basedir/bin/init during startup. For those images we
|
||||
# must (a) skip Docker's --init (two competing PID-1 inits) and (b) mount
|
||||
# /run with exec instead of noexec, or s6 stage0 dies with exit 126
|
||||
# "Permission denied". Detected once here; defaults are kept on any
|
||||
# inspection failure. See issue #34628.
|
||||
image_uses_s6_init = _image_uses_init_entrypoint(self._docker_exe, image)
|
||||
if image_uses_s6_init:
|
||||
logger.info(
|
||||
"Docker: image %s uses /init (s6-overlay) as entrypoint — "
|
||||
"skipping --init and mounting /run with exec.",
|
||||
image,
|
||||
)
|
||||
security_args = _build_security_args(
|
||||
run_as_host_user and bool(user_args),
|
||||
run_exec=image_uses_s6_init,
|
||||
)
|
||||
|
||||
logger.info(f"Docker volume_args: {volume_args}")
|
||||
# User-supplied extra docker run flags (docker_extra_args in config.yaml).
|
||||
@ -695,10 +775,6 @@ class DockerEnvironment(BaseEnvironment):
|
||||
)
|
||||
logger.info(f"Docker run_args: {all_run_args}")
|
||||
|
||||
# Resolve the docker executable once so it works even when
|
||||
# /usr/local/bin is not in PATH (common on macOS gateway/service).
|
||||
self._docker_exe = find_docker() or "docker"
|
||||
|
||||
# Start the container directly via `docker run -d`.
|
||||
container_name = f"hermes-{uuid.uuid4().hex[:8]}"
|
||||
# Labels make hermes-created containers identifiable to:
|
||||
@ -763,9 +839,13 @@ class DockerEnvironment(BaseEnvironment):
|
||||
reused = True
|
||||
|
||||
if not reused:
|
||||
# tini/catatonit as PID 1 reaps zombie children — but s6-overlay
|
||||
# images already provide their own /init PID 1, so adding --init
|
||||
# there creates two competing inits and breaks startup (#34628).
|
||||
init_args = [] if image_uses_s6_init else ["--init"]
|
||||
run_cmd = [
|
||||
self._docker_exe, "run", "-d",
|
||||
"--init", # tini/catatonit as PID 1 — reaps zombie children
|
||||
*init_args,
|
||||
"--name", container_name,
|
||||
*label_args,
|
||||
"-w", cwd,
|
||||
|
||||
Reference in New Issue
Block a user