fix(file-safety): extend sandbox-mirror guard to cover inner-container path (#32049) (#32407)

* fix(file-safety): extend sandbox-mirror guard to cover inner-container path (#32049)

Brian's shape-based guard (#32213) catches paths that still carry the
full sandboxes/<backend>/<task>/home/.hermes/… prefix on the host side.
The inner-container case is not covered: when file tools execute inside
Docker the bind-mount strips that prefix, so the guard receives plain
/root/.hermes/… and passes through. The root:root ownership on the
divergent SOUL.md in #32049 confirms this is the primary failure mode.

Add a ContextVar (_CONTAINER_HERMES_MIRROR) set by DockerEnvironment
when persistent=True. classify_container_mirror_target / get_container_
mirror_warning detect any write whose resolved path falls under that
prefix, using the same warning format and cross_profile=True bypass
contract as the existing guards. Chain the new guard in
_check_cross_profile_path after the two existing detectors.

* fix(file-safety): derive Docker mirror guard from task

---------

Co-authored-by: Ben <ben@nousresearch.com>
This commit is contained in:
whyhkzk
2026-06-02 12:03:37 +08:00
committed by GitHub
parent a5aecf26fa
commit 1495f0cc38
3 changed files with 237 additions and 6 deletions

View File

@ -561,3 +561,80 @@ def get_sandbox_mirror_warning(path: str) -> Optional[str]:
f"(Defense-in-depth — not a security boundary; the terminal tool "
f"can still bypass.)"
)
# ---------------------------------------------------------------------------
# Container-context mirror guard (inner-container case — #32049 follow-up)
#
# Brian's shape-based detector (#32213) catches paths that still carry the
# full ``…/sandboxes/<backend>/<task>/home/.hermes/…`` prefix on the host.
# But when file tools execute *inside* the container the bind-mount strips
# that prefix: the agent sees plain ``/root/.hermes/…``. The root:root
# ownership on the divergent SOUL.md in #32049 confirms this is the primary
# failure mode.
#
# Fix: file_tools passes the active Docker mirror prefix when the terminal
# backend is docker + persistent. This catches the very first file-tool call,
# before a DockerEnvironment object necessarily exists.
# ---------------------------------------------------------------------------
def classify_container_mirror_target(
path: str,
mirror_prefix: str | None = None,
) -> Optional[dict]:
"""Classify a write target as a container-side sandbox mirror.
``mirror_prefix`` must be supplied by the caller after it has established
that file tools are executing in a container whose home is a sandbox
mirror. Returns ``None`` when no such context is active or the path is not
under the mirror prefix. Otherwise returns:
* ``target_path``: resolved path string
* ``mirror_root``: the declared container mirror prefix
* ``inner_path``: portion under the mirror root (what the agent
likely meant to address in the host HERMES_HOME)
"""
if not mirror_prefix:
return None
try:
target = Path(os.path.expanduser(str(path))).resolve()
mirror = Path(os.path.expanduser(mirror_prefix)).resolve()
inner = target.relative_to(mirror)
except (OSError, RuntimeError, ValueError):
return None
return {
"target_path": str(target),
"mirror_root": str(mirror),
"inner_path": inner.as_posix(),
}
def get_container_mirror_warning(
path: str,
mirror_prefix: str | None = None,
) -> Optional[str]:
"""Return a model-facing warning when *path* lands in the container's
sandbox mirror of authoritative Hermes state.
The caller supplies ``mirror_prefix`` only when the current file-tool
backend is known to execute inside a Docker sandbox. Same contract as
``get_cross_profile_warning``: soft guard, returns ``None`` for
non-mirror paths, caller surfaces as a tool-result error. Bypass via
``cross_profile=True`` after explicit user direction.
"""
info = classify_container_mirror_target(path, mirror_prefix)
if info is None:
return None
return (
f"Sandbox-mirror write blocked by soft guard: {info['target_path']} "
f"sits under {info['mirror_root']!r}, which is the container's "
f"bind-mounted home — a per-task mirror that the host Hermes "
f"process never reads. The authoritative file is "
f"{info['inner_path']!r} under the real HERMES_HOME. Use the "
f"host-side tool for authoritative state (e.g. ``memory`` for "
f"memories), or address the host path directly. To bypass after "
f"explicit user direction, retry with ``cross_profile=True``. "
f"(Defense-in-depth — not a security boundary; the terminal tool "
f"can still bypass.)"
)

View File

@ -0,0 +1,106 @@
"""Tests for the container-context sandbox-mirror guard (#32049 follow-up).
Brian's shape-based guard (#32213) catches paths that carry the full
``…/sandboxes/<backend>/<task>/home/.hermes/…`` prefix. This covers the
complementary inner-container case: when file tools execute inside Docker,
the bind-mount strips that prefix and the guard sees plain ``/root/.hermes/…``.
The root:root ownership on the divergent SOUL.md in #32049 confirms this
is the primary failure mode.
"""
from __future__ import annotations
import pytest
class TestClassifyContainerMirrorTarget:
def test_returns_none_without_context(self):
"""No Docker context — /root/.hermes/… must not be flagged."""
from agent.file_safety import classify_container_mirror_target
assert classify_container_mirror_target("/root/.hermes/profiles/group1/SOUL.md") is None
def test_catches_soul_md_with_context(self):
"""Primary failure mode from #32049: agent writes SOUL.md via container path."""
from agent.file_safety import classify_container_mirror_target
result = classify_container_mirror_target(
"/root/.hermes/profiles/group1/SOUL.md",
mirror_prefix="/root/.hermes",
)
assert result is not None
assert result["mirror_root"].replace("\\", "/").endswith("root/.hermes")
assert result["inner_path"] == "profiles/group1/SOUL.md"
@pytest.mark.parametrize("inner", [
"SOUL.md",
"memories/MEMORY.md",
])
def test_catches_authoritative_profile_files(self, inner):
from agent.file_safety import classify_container_mirror_target
result = classify_container_mirror_target(
f"/root/.hermes/{inner}",
mirror_prefix="/root/.hermes",
)
assert result is not None
assert result["inner_path"] == inner
def test_non_hermes_path_not_flagged(self):
"""/root/workspace/… is not .hermes state and must not be blocked."""
from agent.file_safety import classify_container_mirror_target
assert (
classify_container_mirror_target(
"/root/workspace/main.py",
mirror_prefix="/root/.hermes",
)
is None
)
class TestGetContainerMirrorWarning:
def test_warning_names_inner_path_and_bypass(self):
from agent.file_safety import get_container_mirror_warning
warn = get_container_mirror_warning(
"/root/.hermes/profiles/group1/SOUL.md",
mirror_prefix="/root/.hermes",
)
assert warn is not None
assert "profiles/group1/SOUL.md" in warn
assert "cross_profile=True" in warn
class TestOrthogonality:
"""Container-context guard catches what the shape-based guard (#32213) misses."""
def test_inner_container_path_caught_by_context_guard(self):
"""No sandboxes/ segment — shape guard passes, context guard blocks."""
from agent.file_safety import classify_container_mirror_target
path = "/root/.hermes/profiles/group1/SOUL.md"
assert classify_container_mirror_target(path) is None # no context
assert classify_container_mirror_target(path, mirror_prefix="/root/.hermes") is not None
class TestFileToolIntegration:
"""file_tools must catch the mirror path before creating DockerEnvironment."""
def test_guard_uses_current_docker_config_before_env_exists(self, monkeypatch):
import tools.file_tools as file_tools
monkeypatch.setattr(
file_tools,
"_get_container_mirror_prefix_for_task",
lambda task_id: "/root/.hermes",
)
warning = file_tools._check_cross_profile_path(
"/root/.hermes/profiles/group1/SOUL.md",
task_id="new-task",
)
assert warning is not None
assert "Sandbox-mirror write blocked" in warning
assert "profiles/group1/SOUL.md" in warning

View File

@ -289,11 +289,46 @@ def _check_sensitive_path(filepath: str, task_id: str = "default") -> str | None
return None
def _get_container_mirror_prefix_for_task(task_id: str = "default") -> str | None:
"""Return the container-side Hermes mirror prefix for Docker file tools."""
try:
from tools.terminal_tool import (
_active_environments,
_env_lock,
_get_env_config,
_resolve_container_task_id,
)
container_key = _resolve_container_task_id(task_id)
except Exception:
return None
try:
with _env_lock:
env = _active_environments.get(container_key) or _active_environments.get(task_id)
if env is not None:
if env.__class__.__name__ == "DockerEnvironment" and bool(
getattr(env, "_persistent", False)
):
return "/root/.hermes"
return None
config = _get_env_config()
except Exception:
return None
if config.get("env_type") == "docker" and config.get("container_persistent", True):
return "/root/.hermes"
return None
def _check_cross_profile_path(filepath: str, task_id: str = "default") -> str | None:
"""Return a soft-guard warning when ``filepath`` lands in another Hermes
profile's scoped area or a sandbox-mirror of authoritative profile state.
profile's scoped area, a host-side sandbox-mirror of authoritative profile
state, or the Docker container's sandbox mirror of Hermes state.
Two detectors run in order:
Three detectors run in order:
* cross-profile (#TBD) — writes that hit another profile's
``skills/plugins/cron/memories`` directory.
@ -302,17 +337,22 @@ def _check_cross_profile_path(filepath: str, task_id: str = "default") -> str |
non-local terminal backend (Docker, Daytona, etc.), where the host
Hermes process never reads the mirror and the authoritative file is
left untouched.
* container-mirror (#32049 follow-up) — writes from inside a Docker
container whose bind-mounted home strips the ``sandboxes/`` prefix, so
the agent sees a plain ``/root/.hermes/…`` path.
Returns ``None`` when the write is in-scope or outside Hermes scope.
Both detectors are soft guards — the agent can override either by
All detectors are soft guards — the agent can override any by
passing ``cross_profile=True`` to its write tool after explicit user
direction. Defense-in-depth, NOT a security boundary — the terminal
tool runs as the same OS user and can write any of these paths
directly. See ``agent/file_safety.classify_cross_profile_target`` and
``classify_sandbox_mirror_target`` for the detection rules.
directly. See ``agent/file_safety.classify_cross_profile_target``,
``classify_sandbox_mirror_target`` and ``classify_container_mirror_target``
for the detection rules.
"""
try:
from agent.file_safety import (
get_container_mirror_warning,
get_cross_profile_warning,
get_sandbox_mirror_warning,
)
@ -332,7 +372,15 @@ def _check_cross_profile_path(filepath: str, task_id: str = "default") -> str |
warning = get_cross_profile_warning(resolved)
if warning is not None:
return warning
return get_sandbox_mirror_warning(resolved)
warning = get_sandbox_mirror_warning(resolved)
if warning is not None:
return warning
return get_container_mirror_warning(
resolved,
mirror_prefix=_get_container_mirror_prefix_for_task(task_id),
)
def _is_expected_write_exception(exc: Exception) -> bool: