fix(file-tools): anchor relative-path resolution to absolute base; report resolved path (#35399)

Relative paths in write_file/patch could resolve against the agent PROCESS cwd
instead of the terminal's working directory. In a git-worktree session with a
stale TERMINAL_CWD='.' (a relative base), early edits silently landed in the
MAIN checkout, verified there, and reported success — while the agent inspected
the worktree and saw nothing, misreading it as the patch tool no-op'ing.

- _resolve_base_dir(): resolution base is now ALWAYS absolute. A relative
  TERMINAL_CWD is anchored to the process cwd once, deterministically, instead
  of being left to resolve()-time cwd. Live terminal cwd stays authoritative.
- write_file/patch pass the resolved absolute path to the shell FileOps layer
  so the tool layer and shell layer can't disagree about which file is edited.
- Responses now report the absolute resolved_path and files_modified, so a
  wrong-cwd mismatch is visible on the first call.
- _path_resolution_warning(): emits a _warning when a relative path resolves
  OUTSIDE the live terminal cwd (e.g. a worktree session writing into main).

Validation: 11 new unit tests + 43 live E2E assertions (worktree routing,
mid-session cd, V4A patches, divergence warning, absolute paths, consecutive
patches); 466 existing file/path/terminal tests green.
This commit is contained in:
Teknium
2026-05-30 07:55:36 -07:00
committed by GitHub
parent 0c6e133c04
commit 96643b4a52
2 changed files with 300 additions and 10 deletions

View File

@ -0,0 +1,197 @@
"""Regression tests for file-tool path resolution base correctness.
The bug (observed in a worktree dev session, May 2026): when the resolution
base for a relative path is itself RELATIVE — e.g. ``TERMINAL_CWD="."`` from a
stale config — ``_resolve_path_for_task`` resolved the path against the agent's
PROCESS cwd instead of the intended workspace. In a git-worktree session this
silently routed ``patch``/``write_file`` edits into the *main* checkout: the
write landed, self-verified, and reported success — against the wrong file.
The agent then grepped the worktree, saw nothing, and concluded the patch tool
had silently no-op'd. It hadn't; it wrote to the wrong place.
Core invariant these tests pin:
The resolution base for a relative path MUST always be absolute. A relative
``TERMINAL_CWD`` (``.``, ``./sub``, ``..``) must be anchored deterministically,
never left to resolve against whatever the process cwd happens to be.
"""
import os
from pathlib import Path
import pytest
import tools.file_tools as ft
@pytest.fixture
def _isolated_cwd(tmp_path, monkeypatch):
"""Two checkouts: workspace (intended) + decoy (process cwd)."""
workspace = tmp_path / "workspace"
decoy = tmp_path / "decoy"
workspace.mkdir()
decoy.mkdir()
(workspace / "target.py").write_text("WORKSPACE_ORIGINAL\n")
(decoy / "target.py").write_text("DECOY_ORIGINAL\n")
# Process cwd = decoy, analogous to "main repo" while the terminal is in
# the worktree.
monkeypatch.chdir(decoy)
# No live-terminal-cwd tracking recorded yet (fresh-session condition).
monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": None)
return workspace, decoy
def test_relative_terminal_cwd_anchors_to_absolute_not_process_cwd(_isolated_cwd, monkeypatch):
"""TERMINAL_CWD='.' must NOT silently mean 'the agent process cwd'.
A relative base is meaningless as a resolution anchor. The resolver must
make it absolute deterministically. We assert the resolved path is
absolute and stable regardless of where os.getcwd() points.
"""
workspace, decoy = _isolated_cwd
# Poison config: literal relative '.'
monkeypatch.setenv("TERMINAL_CWD", ".")
resolved = ft._resolve_path_for_task("target.py", task_id="default")
assert resolved.is_absolute(), f"resolution base leaked a relative path: {resolved}"
# The exact anchor for a bare '.' is the process cwd resolved to absolute —
# that is acceptable as long as it is ABSOLUTE and stable. The bug was that
# a relative base produced surprising results; the fix is that the base is
# always absolutised. (We do not require it to point at the workspace here —
# that's what live-cwd tracking is for; see the next test.)
assert str(resolved) == str((Path(os.getcwd()) / "target.py").resolve())
def test_live_tracking_cwd_wins_over_relative_terminal_cwd(_isolated_cwd, monkeypatch):
"""When the terminal reports its absolute cwd, that is authoritative.
This is the real-world fix: the terminal's tracked absolute cwd (the
worktree) must override a stale relative TERMINAL_CWD so edits land where
the agent is actually working.
"""
workspace, decoy = _isolated_cwd
monkeypatch.setenv("TERMINAL_CWD", ".")
monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": str(workspace))
resolved = ft._resolve_path_for_task("target.py", task_id="default")
assert resolved == (workspace / "target.py")
def test_absolute_terminal_cwd_used_verbatim(_isolated_cwd, monkeypatch):
"""An absolute TERMINAL_CWD is the resolution base (no live tracking)."""
workspace, decoy = _isolated_cwd
monkeypatch.setenv("TERMINAL_CWD", str(workspace))
resolved = ft._resolve_path_for_task("target.py", task_id="default")
assert resolved == (workspace / "target.py")
def test_absolute_input_path_ignores_base(_isolated_cwd, monkeypatch):
"""An absolute input path is never re-anchored."""
workspace, decoy = _isolated_cwd
monkeypatch.setenv("TERMINAL_CWD", ".")
abs_target = str(workspace / "target.py")
resolved = ft._resolve_path_for_task(abs_target, task_id="default")
assert resolved == Path(abs_target).resolve()
def test_resolution_base_always_absolute_no_terminal_cwd(_isolated_cwd, monkeypatch):
"""With TERMINAL_CWD unset, the base falls back to an ABSOLUTE process cwd."""
workspace, decoy = _isolated_cwd
monkeypatch.delenv("TERMINAL_CWD", raising=False)
resolved = ft._resolve_path_for_task("target.py", task_id="default")
assert resolved.is_absolute()
assert str(resolved) == str((Path(os.getcwd()) / "target.py").resolve())
# ── B-(ii): workspace-divergence warning ────────────────────────────────────
def test_warning_fires_when_relative_path_escapes_workspace(_isolated_cwd, monkeypatch):
"""Relative path resolving outside the live workspace must warn."""
workspace, decoy = _isolated_cwd
# Live cwd = workspace, but the relative path resolves to decoy (process cwd)
# because TERMINAL_CWD is the poison '.'. Simulate by pointing live tracking
# at workspace while the resolved path is under decoy.
monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": str(workspace))
resolved_in_decoy = decoy / "target.py"
warn = ft._path_resolution_warning("target.py", resolved_in_decoy, task_id="default")
assert warn is not None
assert "OUTSIDE the active workspace" in warn
assert str(decoy) in warn
assert str(workspace) in warn
def test_no_warning_when_relative_path_inside_workspace(_isolated_cwd, monkeypatch):
workspace, decoy = _isolated_cwd
monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": str(workspace))
resolved_in_workspace = workspace / "target.py"
warn = ft._path_resolution_warning("target.py", resolved_in_workspace, task_id="default")
assert warn is None
def test_no_warning_for_absolute_input(_isolated_cwd, monkeypatch):
workspace, decoy = _isolated_cwd
monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": str(workspace))
warn = ft._path_resolution_warning(str(decoy / "target.py"), decoy / "target.py", task_id="default")
assert warn is None
def test_no_warning_when_no_live_cwd(_isolated_cwd, monkeypatch):
workspace, decoy = _isolated_cwd
monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": None)
warn = ft._path_resolution_warning("target.py", decoy / "target.py", task_id="default")
assert warn is None
# ── Fix A: write_file / patch report the resolved ABSOLUTE path ──────────────
def test_write_file_reports_resolved_absolute_path(_isolated_cwd, monkeypatch):
"""write_file_tool must put the absolute on-disk path in files_modified."""
workspace, decoy = _isolated_cwd
monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": str(workspace))
import json
out = json.loads(ft.write_file_tool("newfile.txt", "hello\n", task_id="t1"))
expected = str((workspace / "newfile.txt").resolve())
assert out.get("resolved_path") == expected
assert out.get("files_modified") == [expected]
assert (workspace / "newfile.txt").read_text() == "hello\n"
def test_patch_reports_resolved_absolute_path(_isolated_cwd, monkeypatch):
"""patch_tool (replace mode) must put the absolute on-disk path in files_modified."""
workspace, decoy = _isolated_cwd
monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": str(workspace))
import json
out = json.loads(ft.patch_tool(
mode="replace", path="target.py",
old_string="WORKSPACE_ORIGINAL", new_string="WORKSPACE_PATCHED",
task_id="t1",
))
expected = str((workspace / "target.py").resolve())
assert not out.get("error"), out
assert out.get("resolved_path") == expected
assert out.get("files_modified") == [expected]
assert "WORKSPACE_PATCHED" in (workspace / "target.py").read_text()
# And the decoy copy is untouched.
assert (decoy / "target.py").read_text() == "DECOY_ORIGINAL\n"

View File

@ -116,15 +116,80 @@ def _get_live_tracking_cwd(task_id: str = "default") -> str | None:
return None
def _resolve_base_dir(task_id: str = "default") -> Path:
"""Return the ABSOLUTE base directory for resolving relative paths.
Resolution order:
1. The task's live terminal cwd (the directory the agent is actually
working in — e.g. a git worktree). Authoritative when known.
2. ``$TERMINAL_CWD`` from config/env.
3. The process cwd.
The returned base is ALWAYS absolute. This is the core invariant that
prevents the worktree-cwd divergence bug: a relative ``TERMINAL_CWD``
(commonly the literal ``"."`` from a stale config) is meaningless as a
resolution anchor — left to ``Path.resolve()`` it silently resolves
against whatever the agent PROCESS cwd happens to be (e.g. the main repo
while the terminal is in a worktree), routing edits to the wrong checkout.
Anchoring a relative base against the process cwd here makes the resolution
deterministic and inspectable rather than dependent on resolve()-time cwd.
"""
live = _get_live_tracking_cwd(task_id)
if live:
base = Path(live).expanduser()
else:
raw = os.environ.get("TERMINAL_CWD")
base = Path(raw).expanduser() if raw else Path(os.getcwd())
if not base.is_absolute():
# A relative base (".", "./sub", "..") is anchored to the process cwd
# once, here, so the result no longer depends on cwd at resolve() time.
base = Path(os.getcwd()) / base
return base.resolve()
def _resolve_path_for_task(filepath: str, task_id: str = "default") -> Path:
"""Resolve *filepath* against the task's live terminal cwd when possible."""
"""Resolve *filepath* against the task's absolute base directory.
See :func:`_resolve_base_dir` for how the base is chosen. Absolute input
paths are returned resolved-but-unanchored.
"""
p = Path(filepath).expanduser()
if not p.is_absolute():
base = _get_live_tracking_cwd(task_id) or os.environ.get(
"TERMINAL_CWD", os.getcwd()
)
p = Path(base) / p
return p.resolve()
if p.is_absolute():
return p.resolve()
return (_resolve_base_dir(task_id) / p).resolve()
def _path_resolution_warning(filepath: str, resolved: Path, task_id: str = "default") -> str | None:
"""Warn when a relative path resolved OUTSIDE the task's workspace root.
Surfaces the worktree-cwd divergence the moment it would matter: if the
agent passes a relative path but it resolves under a directory that is not
the live terminal cwd (i.e. the edit is about to land in a different
checkout than the one the agent is working in), return a message naming the
absolute target. ``None`` when the path is absolute, the base is unknown,
or the resolved path is correctly under the workspace root.
"""
try:
if Path(filepath).expanduser().is_absolute():
return None
live = _get_live_tracking_cwd(task_id)
if not live:
return None # No authoritative workspace root to compare against.
root = Path(live).expanduser().resolve()
# Is `resolved` inside `root`?
try:
resolved.relative_to(root)
return None # Inside the workspace — expected.
except ValueError:
return (
f"Relative path {filepath!r} resolved to {str(resolved)!r}, which is "
f"OUTSIDE the active workspace ({str(root)!r}). The edit will land in "
f"a different directory than the terminal's cwd. If this is not "
f"intended (e.g. a git-worktree session writing into the main "
f"checkout), pass an absolute path under the workspace instead."
)
except Exception:
return None
def _is_blocked_device_path(path: str) -> bool:
@ -930,12 +995,21 @@ def write_file_tool(path: str, content: str, task_id: str = "default",
# fire — its message names the sibling subagent.
cross_warning = file_state.check_stale(task_id, _resolved)
stale_warning = _check_file_staleness(path, task_id)
# Workspace-divergence warning: relative path resolving outside the
# terminal's cwd (the worktree-cwd bug). Lowest priority of the three.
cwd_warning = _path_resolution_warning(path, Path(_resolved), task_id)
file_ops = _get_file_ops(task_id)
result = file_ops.write_file(path, content)
result = file_ops.write_file(_resolved, content)
result_dict = result.to_dict()
effective_warning = cross_warning or stale_warning
effective_warning = cross_warning or stale_warning or cwd_warning
if effective_warning:
result_dict["_warning"] = effective_warning
# Always report the ABSOLUTE path actually written, so a wrong-cwd
# mismatch is visible in the response instead of silently routing
# the edit to the wrong checkout.
result_dict["resolved_path"] = _resolved
if not result_dict.get("error"):
result_dict["files_modified"] = [_resolved]
# Refresh stamps after the successful write so consecutive
# writes by this task don't trigger false staleness warnings.
_update_read_timestamp(path, task_id)
@ -1027,6 +1101,10 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
_path_to_resolved[_p] = _r
_cross = file_state.check_stale(task_id, _r) if _r else None
_sw = _cross or _check_file_staleness(_p, task_id)
if not _sw and _r:
# Workspace-divergence warning (worktree-cwd bug): relative
# path resolving outside the terminal's cwd.
_sw = _path_resolution_warning(_p, Path(_r), task_id)
if _sw:
stale_warnings.append(_sw)
@ -1037,7 +1115,13 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
return tool_error("path required")
if old_string is None or new_string is None:
return tool_error("old_string and new_string required")
result = file_ops.patch_replace(path, old_string, new_string, replace_all)
# Pass the resolved ABSOLUTE path to the shell layer so it
# operates on the exact file the tool layer resolved — the
# shell's own cwd may differ (worktree-cwd bug), and a relative
# path would let the two layers disagree about which file is
# being edited.
_replace_target = _path_to_resolved.get(path) or path
result = file_ops.patch_replace(_replace_target, old_string, new_string, replace_all)
elif mode == "patch":
if not patch:
return tool_error("patch content required")
@ -1048,9 +1132,18 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
result_dict = result.to_dict()
if stale_warnings:
result_dict["_warning"] = stale_warnings[0] if len(stale_warnings) == 1 else " | ".join(stale_warnings)
# Report the ABSOLUTE path(s) actually patched so a wrong-cwd
# mismatch (e.g. a worktree session editing the main checkout) is
# visible in the response instead of silently landing elsewhere.
_resolved_modified = [
_path_to_resolved.get(_p) or _p for _p in _paths_to_check
]
# Refresh stored timestamps for all successfully-patched paths so
# consecutive edits by this task don't trigger false warnings.
if not result_dict.get("error"):
result_dict["files_modified"] = _resolved_modified
if len(_resolved_modified) == 1:
result_dict["resolved_path"] = _resolved_modified[0]
for _p in _paths_to_check:
_update_read_timestamp(_p, task_id)
_r = _path_to_resolved.get(_p)