From 03ba06ebfbf5e7b1eb8a194a8f94ed081dc890fc Mon Sep 17 00:00:00 2001 From: Ben Barclay Date: Thu, 4 Jun 2026 13:34:23 +1000 Subject: [PATCH] fix(docker): chown gateway install tree on UID remap (salvage #37928) (#38655) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Salvage of #37928 (@sarvesh1327), reduced to the still-needed delta. `/opt/hermes/gateway` is a runtime-writable Python package: on first import the supervised gateway writes `__pycache__` beneath it, and the image does not set PYTHONDONTWRITEBYTECODE. When HERMES_UID/PUID is remapped at boot (e.g. Unraid 99), `usermod -u` only re-chowns the hermes home dir; the build trees under /opt/hermes keep the build-time UID (10000). main already chowns `.venv`, `ui-tui`, and `node_modules` on remap (#38556) but missed `gateway`, so the remapped gateway hits EACCES writing `__pycache__` (#27221). Add `/opt/hermes/gateway` to both chown sites — the Dockerfile build-time `chown -R hermes:hermes` line and the stage2-hook build-tree repair — so it tracks the remapped UID like the sibling trees. Differs from #37928 as submitted: dropped the `uid_gid_remapped` flag and the `|| [ "$uid_gid_remapped" = true ]` chown gate. main's #38556 already solved that half, and more correctly — it probes the actual tree ownership (`venv_owner != actual_hermes_uid`) rather than tracking same-boot remaps, which also catches pre-existing ownership drift and stays idempotent. Keeping #37928's flag would regress that. The salvage is the `gateway`-tree addition only. Verified end-to-end against a real image build: on baseline main a remap to UID 99 leaves `gateway` owned by 10000 and a write as uid 99 fails EACCES; with this change `gateway` is chowned to 99:100 and the write succeeds, while the default-uid (no-remap) path is unchanged. Fixes #27221. Co-authored-by: Sarvesh --- Dockerfile | 5 +- docker/stage2-hook.sh | 5 ++ .../test_dockerfile_node_modules_perms.py | 10 +++- .../test_stage2_hook_install_dir_chown.py | 57 +++++++++++++++++++ 4 files changed, 73 insertions(+), 4 deletions(-) create mode 100644 tests/tools/test_stage2_hook_install_dir_chown.py diff --git a/Dockerfile b/Dockerfile index 149ec0376..ebd7ccac8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -185,13 +185,16 @@ RUN cd web && npm run build && \ # hermes_cli/main.py succeeds (see #18800). /opt/hermes/web is build-time # only (HERMES_WEB_DIST points at hermes_cli/web_dist) and is intentionally # not chowned here. +# /opt/hermes/gateway is runtime-writable: Python may create __pycache__ and +# gateway state artifacts beneath the package after services drop privileges, +# especially when the hermes UID is remapped at boot (#27221). # The .venv MUST remain hermes-writable so lazy_deps.py can install # remaining optional platform packages and future pin bumps at first use. # Without this, `uv pip install` fails with EACCES and adapters silently # fail to load. See tools/lazy_deps.py. USER root RUN chmod -R a+rX /opt/hermes && \ - chown -R hermes:hermes /opt/hermes/.venv /opt/hermes/ui-tui /opt/hermes/node_modules + chown -R hermes:hermes /opt/hermes/.venv /opt/hermes/ui-tui /opt/hermes/gateway /opt/hermes/node_modules # Start as root so the s6-overlay stage2 hook can usermod/groupmod and chown # the data volume. Each supervised service then drops to the hermes user via # `s6-setuidgid hermes` in its run script. If HERMES_UID is unset, services diff --git a/docker/stage2-hook.sh b/docker/stage2-hook.sh index 16af485b7..de6b6ce3a 100755 --- a/docker/stage2-hook.sh +++ b/docker/stage2-hook.sh @@ -216,6 +216,10 @@ fi # the source mtime is newer than dist/ or when HERMES_TUI_FORCE_BUILD # is set) and writes to ui-tui/dist/. Without this chown the new # hermes UID can't write the build output (#28851). +# - gateway: Python writes __pycache__ and runtime artifacts beneath the +# gateway package on first import. After a UID remap those source-owned +# paths still belong to the build-time UID (10000) unless repaired here, +# producing EACCES for the supervised gateway (#27221). # - node_modules: root-level dependencies (puppeteer, web tooling) # that runtime code may walk/update. # The set mirrors the build-time `chown -R hermes:hermes` line in the @@ -241,6 +245,7 @@ if [ -n "$venv_owner" ] && [ "$venv_owner" != "$actual_hermes_uid" ]; then chown -R hermes:hermes \ "$INSTALL_DIR/.venv" \ "$INSTALL_DIR/ui-tui" \ + "$INSTALL_DIR/gateway" \ "$INSTALL_DIR/node_modules" \ 2>/dev/null || \ echo "[stage2] Warning: chown of build trees failed (rootless container?) — continuing" diff --git a/tests/tools/test_dockerfile_node_modules_perms.py b/tests/tools/test_dockerfile_node_modules_perms.py index 56243248a..671a8d843 100644 --- a/tests/tools/test_dockerfile_node_modules_perms.py +++ b/tests/tools/test_dockerfile_node_modules_perms.py @@ -29,11 +29,15 @@ def test_dockerfile_chowns_runtime_node_modules_to_hermes_user() -> None: chown_block = "\n".join(chown_lines) - # both runtime-mutable trees must be passed to the chown command. + # Runtime-mutable trees must be passed to the chown command. # /opt/hermes/web is intentionally excluded: it is build-time only, # because HERMES_WEB_DIST points at hermes_cli/web_dist for runtime. - for required_path in ("/opt/hermes/ui-tui", "/opt/hermes/node_modules"): + for required_path in ( + "/opt/hermes/ui-tui", + "/opt/hermes/node_modules", + "/opt/hermes/gateway", + ): assert required_path in chown_block, ( f"{required_path} must be passed to a chown -R hermes:hermes " - f"command in the Dockerfile (see #18800)" + f"command in the Dockerfile (see #18800, #27221)" ) diff --git a/tests/tools/test_stage2_hook_install_dir_chown.py b/tests/tools/test_stage2_hook_install_dir_chown.py new file mode 100644 index 000000000..3e68aac76 --- /dev/null +++ b/tests/tools/test_stage2_hook_install_dir_chown.py @@ -0,0 +1,57 @@ +"""Contract test: stage2-hook repairs ownership of the gateway install tree. + +When HERMES_UID is remapped at container boot, ``usermod -u`` only rewrites +files under the hermes user's home directory ($HERMES_HOME == /opt/data). +Runtime-writable trees under ``/opt/hermes`` must be explicitly chowned to the +new UID before services drop privileges. ``/opt/hermes/gateway`` is one such +tree: Python writes ``__pycache__`` beneath the package on first import, which +fails with EACCES if the tree still belongs to the build-time UID (10000) after +a remap (#27221). +""" +from __future__ import annotations + +import re +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parents[2] +STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh" + + +@pytest.fixture(scope="module") +def stage2_text() -> str: + if not STAGE2_HOOK.exists(): + pytest.skip("docker/stage2-hook.sh not present in this checkout") + return STAGE2_HOOK.read_text() + + +def _install_dir_chown_block(text: str) -> str: + match = re.search( + r"(chown -R hermes:hermes \\\n" + r"(?:\s+\"\$INSTALL_DIR/[^\"]+\" \\\n)+" + r"\s+2>/dev/null \|\| \\\n" + r"\s+echo \"\[stage2\] Warning: chown of build trees failed.*?\")", + text, + flags=re.DOTALL, + ) + assert match, "stage2-hook.sh must repair ownership of runtime-writable install trees" + return match.group(1) + + +def test_uid_remap_chowns_runtime_writable_gateway_tree(stage2_text: str) -> None: + block = _install_dir_chown_block(stage2_text) + assert '"$INSTALL_DIR/gateway"' in block, ( + "the build-tree ownership repair must chown $INSTALL_DIR/gateway so the " + "gateway runtime can write Python cache artifacts after a UID remap (#27221)" + ) + + +def test_install_dir_chown_keeps_existing_runtime_writable_trees(stage2_text: str) -> None: + block = _install_dir_chown_block(stage2_text) + for required in ( + '"$INSTALL_DIR/.venv"', + '"$INSTALL_DIR/ui-tui"', + '"$INSTALL_DIR/node_modules"', + ): + assert required in block