fix(config): chown ensure_hermes_home dirs to HERMES_UID/GID in Docker (#34107) (#34268)

Fixes #34107. When Hermes runs in Docker with HERMES_UID=1000 /
HERMES_GID=911, the entrypoint chowns the top-level HERMES_HOME once
at startup — but subdirectories created at runtime by
ensure_hermes_home() (especially for profile namespaces under
profiles/<name>/ spawned by kanban workers) were landing as root:root
and blocking subsequent uid-mapped worker invocations with:

  PermissionError: [Errno 13] Permission denied:
    '/opt/data/profiles/charles/logs/curator'

Fix: add _resolve_hermes_uid_gid + _chown_to_hermes_uid helpers that
read the env vars and apply chown after mkdir. Invoke from _secure_dir
which already runs after every directory creation in the home-init path,
so all newly-created subdirs (including the profile namespaces) get the
right ownership.

Safety properties:

- No-op when HERMES_UID/HERMES_GID unset (the dominant non-Docker path)
- No-op on Windows (os.chown doesn't exist; AttributeError swallowed)
- No-op when running as non-root (EPERM swallowed — the entrypoint's
  startup chown -R picks it up on next restart, and in most cases the
  dir was already correctly-owned by the calling user)
- Uses -1 sentinel for missing field so only the set value applies
- Empty-string env vars treated as unset

Adds 14 tests across:
- TestResolveHermesUidGid (7) — env-var parsing
- TestChownToHermesUid (5) — chown helper invariants
- TestSecureDirChown (2) — end-to-end through _secure_dir

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Bartok
2026-05-31 22:27:30 -05:00
committed by GitHub
parent e3b3d4d75e
commit 740fb28d02
2 changed files with 260 additions and 0 deletions

View File

@ -539,6 +539,65 @@ def get_project_root() -> Path:
"""Get the project installation directory."""
return Path(__file__).parent.parent.resolve()
def _resolve_hermes_uid_gid() -> tuple[Optional[int], Optional[int]]:
"""Read the HERMES_UID / HERMES_GID env vars set by Docker deployments.
Docker containers running Hermes commonly set these to map the in-container
user to a host user so volume-mounted state files end up with the right
ownership. The entrypoint chowns the top-level HERMES_HOME once, but
subdirectories created at runtime by ``ensure_hermes_home()`` (especially
for profile namespaces under ``profiles/<name>/``) need the same chown
or they land as ``root:root`` and block subsequent uid-mapped workers
with ``PermissionError [Errno 13]``. See #34107.
Returns ``(uid, gid)`` parsed from the env vars, or ``(None, None)``
when either is missing/invalid. Returns ``(None, None)`` on Windows
too (where chown is a no-op anyway).
"""
if sys.platform == "win32":
return None, None
uid_str = os.environ.get("HERMES_UID", "").strip()
gid_str = os.environ.get("HERMES_GID", "").strip()
try:
uid = int(uid_str) if uid_str else None
except ValueError:
uid = None
try:
gid = int(gid_str) if gid_str else None
except ValueError:
gid = None
return uid, gid
def _chown_to_hermes_uid(path) -> None:
"""Chown ``path`` to ``HERMES_UID:HERMES_GID`` if those env vars are set.
No-op when:
- Either env var is unset/invalid
- The current process isn't root (chown will EPERM — silently ignored)
- On Windows (chown semantics don't apply)
Used by :func:`_secure_dir` to keep ownership consistent across all
directories created by :func:`ensure_hermes_home` on Docker deployments.
See #34107.
"""
uid, gid = _resolve_hermes_uid_gid()
if uid is None and gid is None:
return
try:
# os.chown with -1 means "don't change" for that field.
os.chown(
path,
uid if uid is not None else -1,
gid if gid is not None else -1,
)
except (OSError, AttributeError, NotImplementedError):
# OSError covers EPERM (not running as root) and ENOENT (race),
# both of which are non-fatal — the dir is still created and
# the entrypoint's startup chown -R will fix it on next restart.
pass
def _secure_dir(path):
"""Set directory to owner-only access (0700 by default). No-op on Windows.
@ -551,6 +610,11 @@ def _secure_dir(path):
caddy, etc.) needs to traverse HERMES_HOME to reach a served subdirectory.
The execute-only bit on a directory permits cd-through without exposing
directory listings.
Also applies ``HERMES_UID``/``HERMES_GID``-based ownership when those env
vars are set (#34107 — Docker deployments need this so profile subdirs
created at runtime by kanban workers don't land as root:root and block
subsequent uid-mapped workers).
"""
if is_managed():
return
@ -563,6 +627,7 @@ def _secure_dir(path):
os.chmod(path, mode)
except (OSError, NotImplementedError):
pass
_chown_to_hermes_uid(path)
def _is_container() -> bool:

View File

@ -0,0 +1,195 @@
"""Regression tests for #34107 — Docker UID/GID handling in ensure_hermes_home.
When Hermes runs in Docker with ``HERMES_UID=1000`` / ``HERMES_GID=911``,
the entrypoint chowns the top-level ``HERMES_HOME`` once at startup. But
subdirectories created at runtime by ``ensure_hermes_home()`` — especially
for profile namespaces under ``profiles/<name>/`` spawned by kanban
workers — were landing as ``root:root`` and blocking subsequent
uid-mapped worker invocations with ``PermissionError [Errno 13]``.
The fix is a ``_chown_to_hermes_uid`` helper that reads the env vars and
applies chown after ``mkdir``, invoked from ``_secure_dir`` (which already
runs after every directory creation in the home-init path).
"""
from __future__ import annotations
import os
import sys
from pathlib import Path
from unittest.mock import patch
import pytest
# ---------------------------------------------------------------------------
# _resolve_hermes_uid_gid
# ---------------------------------------------------------------------------
class TestResolveHermesUidGid:
def test_returns_parsed_values_when_both_set(self, monkeypatch):
monkeypatch.setenv("HERMES_UID", "1000")
monkeypatch.setenv("HERMES_GID", "911")
from hermes_cli.config import _resolve_hermes_uid_gid
uid, gid = _resolve_hermes_uid_gid()
assert uid == 1000
assert gid == 911
def test_returns_none_when_unset(self, monkeypatch):
monkeypatch.delenv("HERMES_UID", raising=False)
monkeypatch.delenv("HERMES_GID", raising=False)
from hermes_cli.config import _resolve_hermes_uid_gid
uid, gid = _resolve_hermes_uid_gid()
assert uid is None
assert gid is None
def test_uid_only_returns_gid_none(self, monkeypatch):
monkeypatch.setenv("HERMES_UID", "1000")
monkeypatch.delenv("HERMES_GID", raising=False)
from hermes_cli.config import _resolve_hermes_uid_gid
uid, gid = _resolve_hermes_uid_gid()
assert uid == 1000
assert gid is None
def test_invalid_uid_returns_none_for_that_field(self, monkeypatch):
monkeypatch.setenv("HERMES_UID", "not-a-number")
monkeypatch.setenv("HERMES_GID", "911")
from hermes_cli.config import _resolve_hermes_uid_gid
uid, gid = _resolve_hermes_uid_gid()
assert uid is None
assert gid == 911
def test_empty_string_treated_as_unset(self, monkeypatch):
monkeypatch.setenv("HERMES_UID", "")
monkeypatch.setenv("HERMES_GID", "")
from hermes_cli.config import _resolve_hermes_uid_gid
uid, gid = _resolve_hermes_uid_gid()
assert uid is None
assert gid is None
def test_whitespace_padded_values(self, monkeypatch):
monkeypatch.setenv("HERMES_UID", " 1000 ")
monkeypatch.setenv("HERMES_GID", " 911")
from hermes_cli.config import _resolve_hermes_uid_gid
uid, gid = _resolve_hermes_uid_gid()
assert uid == 1000
assert gid == 911
@pytest.mark.skipif(sys.platform != "win32", reason="Windows-specific")
def test_windows_returns_none_none(self, monkeypatch):
monkeypatch.setenv("HERMES_UID", "1000")
monkeypatch.setenv("HERMES_GID", "911")
from hermes_cli.config import _resolve_hermes_uid_gid
uid, gid = _resolve_hermes_uid_gid()
assert uid is None
assert gid is None
# ---------------------------------------------------------------------------
# _chown_to_hermes_uid
# ---------------------------------------------------------------------------
class TestChownToHermesUid:
def test_calls_os_chown_when_both_set(self, tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_UID", "1000")
monkeypatch.setenv("HERMES_GID", "911")
from hermes_cli import config as cfg
d = tmp_path / "subdir"
d.mkdir()
with patch.object(cfg.os, "chown") as mock_chown:
cfg._chown_to_hermes_uid(d)
mock_chown.assert_called_once_with(d, 1000, 911)
def test_uses_minus_one_for_missing_field(self, tmp_path, monkeypatch):
"""When only one env var is set, the other field passes -1 to
os.chown which means 'do not change' on POSIX."""
monkeypatch.setenv("HERMES_UID", "1000")
monkeypatch.delenv("HERMES_GID", raising=False)
from hermes_cli import config as cfg
d = tmp_path / "subdir"
d.mkdir()
with patch.object(cfg.os, "chown") as mock_chown:
cfg._chown_to_hermes_uid(d)
mock_chown.assert_called_once_with(d, 1000, -1)
def test_no_op_when_neither_set(self, tmp_path, monkeypatch):
monkeypatch.delenv("HERMES_UID", raising=False)
monkeypatch.delenv("HERMES_GID", raising=False)
from hermes_cli import config as cfg
d = tmp_path / "subdir"
d.mkdir()
with patch.object(cfg.os, "chown") as mock_chown:
cfg._chown_to_hermes_uid(d)
mock_chown.assert_not_called()
def test_eperm_is_silently_swallowed(self, tmp_path, monkeypatch):
"""When running as non-root, os.chown raises EPERM. That's fine —
the entrypoint's startup chown -R will pick it up on restart, and
in most cases the dir was already correctly-owned by the calling
user anyway."""
monkeypatch.setenv("HERMES_UID", "1000")
monkeypatch.setenv("HERMES_GID", "911")
from hermes_cli import config as cfg
d = tmp_path / "subdir"
d.mkdir()
def _raises_eperm(*args, **kwargs):
raise PermissionError("operation not permitted")
with patch.object(cfg.os, "chown", side_effect=_raises_eperm):
# Must not raise — the catch is non-fatal.
cfg._chown_to_hermes_uid(d)
def test_attributeerror_swallowed_for_windows_compat(self, tmp_path, monkeypatch):
"""os.chown doesn't exist on Windows. Catching AttributeError keeps
the helper portable."""
monkeypatch.setenv("HERMES_UID", "1000")
monkeypatch.setenv("HERMES_GID", "911")
from hermes_cli import config as cfg
d = tmp_path / "subdir"
d.mkdir()
with patch.object(cfg.os, "chown", side_effect=AttributeError("no chown on this platform")):
cfg._chown_to_hermes_uid(d) # must not raise
# ---------------------------------------------------------------------------
# End-to-end: _secure_dir now also chowns
# ---------------------------------------------------------------------------
class TestSecureDirChown:
@pytest.mark.skipif(sys.platform == "win32", reason="chown is no-op on Windows")
def test_secure_dir_invokes_chown_when_env_set(self, tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_UID", "1000")
monkeypatch.setenv("HERMES_GID", "911")
from hermes_cli import config as cfg
d = tmp_path / "subdir"
d.mkdir()
with patch.object(cfg.os, "chown") as mock_chown:
cfg._secure_dir(d)
mock_chown.assert_called_once_with(d, 1000, 911)
@pytest.mark.skipif(sys.platform == "win32", reason="chown is no-op on Windows")
def test_secure_dir_no_chown_when_env_unset(self, tmp_path, monkeypatch):
monkeypatch.delenv("HERMES_UID", raising=False)
monkeypatch.delenv("HERMES_GID", raising=False)
from hermes_cli import config as cfg
d = tmp_path / "subdir"
d.mkdir()
with patch.object(cfg.os, "chown") as mock_chown:
cfg._secure_dir(d)
mock_chown.assert_not_called()