diff --git a/agent/file_safety.py b/agent/file_safety.py index e317b3439..62a5f363d 100644 --- a/agent/file_safety.py +++ b/agent/file_safety.py @@ -451,3 +451,113 @@ def get_cross_profile_warning(path: str) -> Optional[str]: f"``cross_profile=True``. (Defense-in-depth — not a security " f"boundary; the terminal tool can still bypass.)" ) + + +# --------------------------------------------------------------------------- +# Sandbox-mirror write guard (#32049) +# +# Non-local terminal backends (Docker, Daytona, etc.) bind a sandbox-local +# directory to the container's ``$HOME``. The on-disk layout looks like +# +# /profiles//sandboxes///home/.hermes/... +# +# When the agent (running host-side) speculates that authoritative profile +# state lives at one of those sandbox-mirror paths, the write lands on the +# mirror — never read by the host process — while the host file is left +# untouched. The agent reports success, the user sees no change, and on +# disk two divergent copies accumulate. See #32049 for evidence. +# +# This guard is path-shape-only: it detects the +# ``…/sandboxes///home/.hermes/…`` segment and warns +# regardless of which Hermes profile is active. It does NOT cover the +# inner-container case where the bind mount strips the ``sandboxes/`` prefix +# (the agent's view inside the container is plain ``/root/.hermes/...``); +# that case needs a separate dispatch-layer or host-side ``profile_state`` +# tool. +# --------------------------------------------------------------------------- + + +def _find_sandbox_mirror_segments(parts: tuple) -> Optional[int]: + """Return the index of the inner ``.hermes`` part in a sandbox-mirror path. + + Matches ``…/sandboxes///home/.hermes/…`` and returns the + index where the inner Hermes-state portion starts. Returns ``None`` for + paths that do not contain the sandbox-mirror shape. + """ + for i, part in enumerate(parts): + if part != "sandboxes": + continue + # Need at least: sandboxes / / / home / .hermes / + if i + 5 >= len(parts): + continue + if parts[i + 3] == "home" and parts[i + 4] == ".hermes": + return i + 4 + return None + + +def classify_sandbox_mirror_target(path: str) -> Optional[dict]: + """Classify a write target as a sandbox-mirror of authoritative Hermes state. + + Returns ``None`` when the path does not match the sandbox-mirror shape. + Otherwise returns a dict with: + + * ``target_path``: the resolved path string + * ``mirror_root``: the ``…/sandboxes///home/.hermes`` + prefix (so callers can show users which sandbox owns the mirror) + * ``inner_path``: the portion under the mirror's ``.hermes`` (what the + agent likely meant to address on the host) + + Detection is path-shape-only — does not require any Hermes resolver to + succeed, so it works correctly even when called from contexts where + HERMES_HOME resolution would be ambiguous. + """ + try: + target = Path(os.path.expanduser(str(path))).resolve() + except (OSError, RuntimeError): + return None + + parts = target.parts + inner_idx = _find_sandbox_mirror_segments(parts) + if inner_idx is None: + return None + + mirror_root = str(Path(*parts[: inner_idx + 1])) + inner_path = str(Path(*parts[inner_idx + 1 :])) if inner_idx + 1 < len(parts) else "" + + return { + "target_path": str(target), + "mirror_root": mirror_root, + "inner_path": inner_path, + } + + +def get_sandbox_mirror_warning(path: str) -> Optional[str]: + """Return a model-facing warning when ``path`` lands in a sandbox mirror. + + Returns ``None`` when the path is not a sandbox-mirror target. Caller + is expected to surface the warning to the agent as a tool-result + error. The bypass kwarg (``cross_profile=True``) is shared with the + cross-profile guard: both are soft "I know what I'm doing" overrides + a user can authorise. + + Defense-in-depth, NOT a security boundary: the terminal tool runs as + the same OS user and can write the mirror path directly. The guard + exists to surface the misclassification before the silent-success + + divergent-copy footgun in #32049 fires. + """ + info = classify_sandbox_mirror_target(path) + if info is None: + return None + return ( + f"Sandbox-mirror write blocked by soft guard: {info['target_path']} " + f"sits under {info['mirror_root']!r}, which is a per-task mirror " + f"created by a non-local terminal backend (docker/daytona/etc.). " + f"Writes here land on a copy that the host Hermes process never " + f"reads — the authoritative file is likely {info['inner_path']!r} " + f"under the real HERMES_HOME. Use the host-side tool for " + f"authoritative state (e.g. ``memory`` for memories), or address " + f"the host path directly. To bypass this guard after explicit " + f"user direction, retry the call with ``cross_profile=True``. " + f"(Defense-in-depth — not a security boundary; the terminal tool " + f"can still bypass.)" + ) diff --git a/tests/agent/test_file_safety_sandbox_mirror.py b/tests/agent/test_file_safety_sandbox_mirror.py new file mode 100644 index 000000000..695997206 --- /dev/null +++ b/tests/agent/test_file_safety_sandbox_mirror.py @@ -0,0 +1,224 @@ +"""Tests for the sandbox-mirror write guard in agent/file_safety. + +The guard fires when a tool tries to write into the per-task mirror +directory created by a non-local terminal backend (Docker, Daytona, etc.). +Those paths look like ``…/sandboxes///home/.hermes/…`` and +they accumulate divergent copies of authoritative profile state (SOUL.md, +config.yaml, memories/*.md) because the host Hermes process never reads +them. Soft guard — defense in depth, NOT a security boundary. + +Reference: #32049 — under ``terminal.backend: docker``, the agent's +``write_file`` / ``patch`` calls landed on the sandbox mirror of SOUL.md +while the host process kept loading the untouched authoritative file. +The agent reported success; the rule never took effect. +""" +from __future__ import annotations + +from pathlib import Path + +import pytest + + +# --------------------------------------------------------------------------- +# classify_sandbox_mirror_target — pure path-shape detection +# --------------------------------------------------------------------------- + + +class TestClassifySandboxMirrorTarget: + def test_docker_mirror_soul_md_classified(self, tmp_path): + """The exact path shape reported in #32049.""" + from agent.file_safety import classify_sandbox_mirror_target + + target = ( + tmp_path + / "profiles" / "group1" + / "sandboxes" / "docker" / "default" / "home" / ".hermes" + / "profiles" / "group1" / "SOUL.md" + ) + target.parent.mkdir(parents=True) + target.write_text("# mirror copy\n") + + result = classify_sandbox_mirror_target(str(target)) + assert result is not None + assert result["target_path"] == str(target.resolve()) + assert result["mirror_root"].endswith( + "sandboxes/docker/default/home/.hermes" + ) + assert result["inner_path"] == "profiles/group1/SOUL.md" + + @pytest.mark.parametrize( + "backend,inner", + [ + ("docker", "profiles/coder/memories/MEMORY.md"), + ("daytona", "profiles/default/cron/jobs.json"), + ("podman", ".env"), + ], + ) + def test_other_backends_and_inner_files_match(self, tmp_path, backend, inner): + """The detector is backend-agnostic — sandbox-mirror shape is what matters.""" + from agent.file_safety import classify_sandbox_mirror_target + + target = ( + tmp_path + / "sandboxes" / backend / "task-42" / "home" / ".hermes" + / Path(inner) + ) + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text("x") + + result = classify_sandbox_mirror_target(str(target)) + assert result is not None + assert result["inner_path"] == inner + assert backend in result["mirror_root"] + + def test_path_outside_sandbox_returns_none(self, tmp_path): + """A plain Hermes path is not a mirror.""" + from agent.file_safety import classify_sandbox_mirror_target + + target = tmp_path / ".hermes" / "profiles" / "group1" / "SOUL.md" + target.parent.mkdir(parents=True) + target.write_text("# real SOUL\n") + + assert classify_sandbox_mirror_target(str(target)) is None + + def test_sandboxes_segment_without_home_hermes_returns_none(self, tmp_path): + """A ``sandboxes/`` directory unrelated to Hermes-state mirroring (e.g. + the sandbox workspace itself) is not flagged.""" + from agent.file_safety import classify_sandbox_mirror_target + + target = ( + tmp_path + / "sandboxes" / "docker" / "task-42" / "workspace" / "main.py" + ) + target.parent.mkdir(parents=True) + target.write_text("print('hi')\n") + + assert classify_sandbox_mirror_target(str(target)) is None + + def test_sandboxes_segment_with_home_but_no_hermes_returns_none(self, tmp_path): + """``sandboxes///home/anything-not-hermes`` is not a mirror.""" + from agent.file_safety import classify_sandbox_mirror_target + + target = ( + tmp_path + / "sandboxes" / "docker" / "task-42" / "home" / ".bashrc" + ) + target.parent.mkdir(parents=True) + target.write_text("alias ll='ls -la'\n") + + assert classify_sandbox_mirror_target(str(target)) is None + + def test_truncated_sandbox_path_returns_none(self, tmp_path): + """``…/sandboxes//`` without ``home/.hermes/`` is not a mirror.""" + from agent.file_safety import classify_sandbox_mirror_target + + target = tmp_path / "sandboxes" / "docker" / "task-42" + target.mkdir(parents=True) + + assert classify_sandbox_mirror_target(str(target)) is None + + def test_non_existent_path_still_classifies_by_shape(self, tmp_path): + """Detection is path-shape only — it must not require the file to exist + (the agent is about to CREATE the mirror file, that's the bug).""" + from agent.file_safety import classify_sandbox_mirror_target + + target = ( + tmp_path + / "profiles" / "group1" + / "sandboxes" / "docker" / "default" / "home" / ".hermes" + / "profiles" / "group1" / "SOUL.md" + ) + # Parent directory exists so .resolve() doesn't strip the tail + # under strict mode, but the file itself does NOT exist. + target.parent.mkdir(parents=True) + assert not target.exists() + + result = classify_sandbox_mirror_target(str(target)) + assert result is not None + assert result["inner_path"] == "profiles/group1/SOUL.md" + + +# --------------------------------------------------------------------------- +# get_sandbox_mirror_warning — the model-facing string +# --------------------------------------------------------------------------- + + +class TestGetSandboxMirrorWarning: + def test_non_mirror_returns_none(self, tmp_path): + from agent.file_safety import get_sandbox_mirror_warning + + target = tmp_path / ".hermes" / "profiles" / "group1" / "SOUL.md" + target.parent.mkdir(parents=True) + target.write_text("# real SOUL\n") + + assert get_sandbox_mirror_warning(str(target)) is None + + def test_mirror_warning_names_mirror_root_and_inner_path(self, tmp_path): + from agent.file_safety import get_sandbox_mirror_warning + + target = ( + tmp_path + / "profiles" / "group1" + / "sandboxes" / "docker" / "default" / "home" / ".hermes" + / "profiles" / "group1" / "SOUL.md" + ) + target.parent.mkdir(parents=True) + target.write_text("# mirror copy\n") + + warn = get_sandbox_mirror_warning(str(target)) + assert warn is not None + # Must name the mirror root so the user can locate the sandbox. + assert "sandboxes/docker/default/home/.hermes" in warn + # Must hint at what the agent likely meant. + assert "profiles/group1/SOUL.md" in warn + # Must name the bypass kwarg shared with the cross-profile guard. + assert "cross_profile=True" in warn + + def test_warning_is_defense_in_depth_not_boundary(self, tmp_path): + from agent.file_safety import get_sandbox_mirror_warning + + target = ( + tmp_path + / "sandboxes" / "docker" / "t" / "home" / ".hermes" + / "profiles" / "g" / "SOUL.md" + ) + target.parent.mkdir(parents=True) + target.write_text("x") + + warn = get_sandbox_mirror_warning(str(target)) + # Must self-document as defense-in-depth so future reviewers + # don't promote it to a hard block (matches the existing + # cross-profile guard's contract). + assert "not a security boundary" in warn.lower() + + +# --------------------------------------------------------------------------- +# Independence from cross-profile classifier +# --------------------------------------------------------------------------- + + +class TestSandboxMirrorIsOrthogonalToCrossProfile: + """The sandbox-mirror guard must fire even when the inner path is + in-profile from the host's view — the bug is the mirror, not the + profile mismatch.""" + + def test_same_profile_mirror_still_flagged(self, tmp_path, monkeypatch): + import agent.file_safety as fs + monkeypatch.setattr(fs, "_hermes_root_path", lambda: tmp_path) + monkeypatch.setattr(fs, "_hermes_home_path", lambda: tmp_path / "profiles" / "group1") + + target = ( + tmp_path + / "profiles" / "group1" + / "sandboxes" / "docker" / "default" / "home" / ".hermes" + / "profiles" / "group1" / "SOUL.md" + ) + target.parent.mkdir(parents=True) + target.write_text("x") + + # cross-profile classifier: active profile == target's inner-mirror + # profile name; on the existing detector the path's parts[2] is + # ``sandboxes``, not a scoped area, so it returns None. + assert fs.classify_cross_profile_target(str(target)) is None + # sandbox-mirror classifier: fires unconditionally on the shape. + assert fs.classify_sandbox_mirror_target(str(target)) is not None diff --git a/tools/file_tools.py b/tools/file_tools.py index a16103afc..abf3e8bba 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -290,20 +290,32 @@ def _check_sensitive_path(filepath: str, task_id: str = "default") -> str | None def _check_cross_profile_path(filepath: str, task_id: str = "default") -> str | None: - """Return a cross-profile warning string when ``filepath`` lands in - another Hermes profile's skills/plugins/cron/memories directory. + """Return a soft-guard warning when ``filepath`` lands in another Hermes + profile's scoped area or a sandbox-mirror of authoritative profile state. - Returns ``None`` when the write is in-scope (same profile) or outside - Hermes scope entirely. Soft guard — the agent can override by passing - ``cross_profile=True`` to its write tool after explicit user direction. + Two detectors run in order: - Defense-in-depth, NOT a security boundary — the terminal tool runs - as the same OS user and can write any of these paths directly. - See ``agent/file_safety.classify_cross_profile_target`` for the - detection rules. + * cross-profile (#TBD) — writes that hit another profile's + ``skills/plugins/cron/memories`` directory. + * sandbox-mirror (#32049) — writes that hit the + ``…/sandboxes///home/.hermes/…`` mirror created by a + non-local terminal backend (Docker, Daytona, etc.), where the host + Hermes process never reads the mirror and the authoritative file is + left untouched. + + Returns ``None`` when the write is in-scope or outside Hermes scope. + Both detectors are soft guards — the agent can override either by + passing ``cross_profile=True`` to its write tool after explicit user + direction. Defense-in-depth, NOT a security boundary — the terminal + tool runs as the same OS user and can write any of these paths + directly. See ``agent/file_safety.classify_cross_profile_target`` and + ``classify_sandbox_mirror_target`` for the detection rules. """ try: - from agent.file_safety import get_cross_profile_warning + from agent.file_safety import ( + get_cross_profile_warning, + get_sandbox_mirror_warning, + ) except Exception: # Fail open on import error — the existing sensitive-path guard # plus the write_denied list still apply. @@ -317,7 +329,10 @@ def _check_cross_profile_path(filepath: str, task_id: str = "default") -> str | except (OSError, ValueError): resolved = filepath - return get_cross_profile_warning(resolved) + warning = get_cross_profile_warning(resolved) + if warning is not None: + return warning + return get_sandbox_mirror_warning(resolved) def _is_expected_write_exception(exc: Exception) -> bool: