From 96643b4a52b118477b07c838e30eb8ae7372062c Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sat, 30 May 2026 07:55:36 -0700 Subject: [PATCH] fix(file-tools): anchor relative-path resolution to absolute base; report resolved path (#35399) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Relative paths in write_file/patch could resolve against the agent PROCESS cwd instead of the terminal's working directory. In a git-worktree session with a stale TERMINAL_CWD='.' (a relative base), early edits silently landed in the MAIN checkout, verified there, and reported success — while the agent inspected the worktree and saw nothing, misreading it as the patch tool no-op'ing. - _resolve_base_dir(): resolution base is now ALWAYS absolute. A relative TERMINAL_CWD is anchored to the process cwd once, deterministically, instead of being left to resolve()-time cwd. Live terminal cwd stays authoritative. - write_file/patch pass the resolved absolute path to the shell FileOps layer so the tool layer and shell layer can't disagree about which file is edited. - Responses now report the absolute resolved_path and files_modified, so a wrong-cwd mismatch is visible on the first call. - _path_resolution_warning(): emits a _warning when a relative path resolves OUTSIDE the live terminal cwd (e.g. a worktree session writing into main). Validation: 11 new unit tests + 43 live E2E assertions (worktree routing, mid-session cd, V4A patches, divergence warning, absolute paths, consecutive patches); 466 existing file/path/terminal tests green. --- tests/tools/test_file_tools_cwd_resolution.py | 197 ++++++++++++++++++ tools/file_tools.py | 113 +++++++++- 2 files changed, 300 insertions(+), 10 deletions(-) create mode 100644 tests/tools/test_file_tools_cwd_resolution.py diff --git a/tests/tools/test_file_tools_cwd_resolution.py b/tests/tools/test_file_tools_cwd_resolution.py new file mode 100644 index 000000000..6bb7c1bf3 --- /dev/null +++ b/tests/tools/test_file_tools_cwd_resolution.py @@ -0,0 +1,197 @@ +"""Regression tests for file-tool path resolution base correctness. + +The bug (observed in a worktree dev session, May 2026): when the resolution +base for a relative path is itself RELATIVE — e.g. ``TERMINAL_CWD="."`` from a +stale config — ``_resolve_path_for_task`` resolved the path against the agent's +PROCESS cwd instead of the intended workspace. In a git-worktree session this +silently routed ``patch``/``write_file`` edits into the *main* checkout: the +write landed, self-verified, and reported success — against the wrong file. +The agent then grepped the worktree, saw nothing, and concluded the patch tool +had silently no-op'd. It hadn't; it wrote to the wrong place. + +Core invariant these tests pin: + The resolution base for a relative path MUST always be absolute. A relative + ``TERMINAL_CWD`` (``.``, ``./sub``, ``..``) must be anchored deterministically, + never left to resolve against whatever the process cwd happens to be. +""" + +import os +from pathlib import Path + +import pytest + +import tools.file_tools as ft + + +@pytest.fixture +def _isolated_cwd(tmp_path, monkeypatch): + """Two checkouts: workspace (intended) + decoy (process cwd).""" + workspace = tmp_path / "workspace" + decoy = tmp_path / "decoy" + workspace.mkdir() + decoy.mkdir() + (workspace / "target.py").write_text("WORKSPACE_ORIGINAL\n") + (decoy / "target.py").write_text("DECOY_ORIGINAL\n") + # Process cwd = decoy, analogous to "main repo" while the terminal is in + # the worktree. + monkeypatch.chdir(decoy) + # No live-terminal-cwd tracking recorded yet (fresh-session condition). + monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": None) + return workspace, decoy + + +def test_relative_terminal_cwd_anchors_to_absolute_not_process_cwd(_isolated_cwd, monkeypatch): + """TERMINAL_CWD='.' must NOT silently mean 'the agent process cwd'. + + A relative base is meaningless as a resolution anchor. The resolver must + make it absolute deterministically. We assert the resolved path is + absolute and stable regardless of where os.getcwd() points. + """ + workspace, decoy = _isolated_cwd + # Poison config: literal relative '.' + monkeypatch.setenv("TERMINAL_CWD", ".") + + resolved = ft._resolve_path_for_task("target.py", task_id="default") + + assert resolved.is_absolute(), f"resolution base leaked a relative path: {resolved}" + # The exact anchor for a bare '.' is the process cwd resolved to absolute — + # that is acceptable as long as it is ABSOLUTE and stable. The bug was that + # a relative base produced surprising results; the fix is that the base is + # always absolutised. (We do not require it to point at the workspace here — + # that's what live-cwd tracking is for; see the next test.) + assert str(resolved) == str((Path(os.getcwd()) / "target.py").resolve()) + + +def test_live_tracking_cwd_wins_over_relative_terminal_cwd(_isolated_cwd, monkeypatch): + """When the terminal reports its absolute cwd, that is authoritative. + + This is the real-world fix: the terminal's tracked absolute cwd (the + worktree) must override a stale relative TERMINAL_CWD so edits land where + the agent is actually working. + """ + workspace, decoy = _isolated_cwd + monkeypatch.setenv("TERMINAL_CWD", ".") + monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": str(workspace)) + + resolved = ft._resolve_path_for_task("target.py", task_id="default") + + assert resolved == (workspace / "target.py") + + +def test_absolute_terminal_cwd_used_verbatim(_isolated_cwd, monkeypatch): + """An absolute TERMINAL_CWD is the resolution base (no live tracking).""" + workspace, decoy = _isolated_cwd + monkeypatch.setenv("TERMINAL_CWD", str(workspace)) + + resolved = ft._resolve_path_for_task("target.py", task_id="default") + + assert resolved == (workspace / "target.py") + + +def test_absolute_input_path_ignores_base(_isolated_cwd, monkeypatch): + """An absolute input path is never re-anchored.""" + workspace, decoy = _isolated_cwd + monkeypatch.setenv("TERMINAL_CWD", ".") + abs_target = str(workspace / "target.py") + + resolved = ft._resolve_path_for_task(abs_target, task_id="default") + + assert resolved == Path(abs_target).resolve() + + +def test_resolution_base_always_absolute_no_terminal_cwd(_isolated_cwd, monkeypatch): + """With TERMINAL_CWD unset, the base falls back to an ABSOLUTE process cwd.""" + workspace, decoy = _isolated_cwd + monkeypatch.delenv("TERMINAL_CWD", raising=False) + + resolved = ft._resolve_path_for_task("target.py", task_id="default") + + assert resolved.is_absolute() + assert str(resolved) == str((Path(os.getcwd()) / "target.py").resolve()) + + +# ── B-(ii): workspace-divergence warning ──────────────────────────────────── + + +def test_warning_fires_when_relative_path_escapes_workspace(_isolated_cwd, monkeypatch): + """Relative path resolving outside the live workspace must warn.""" + workspace, decoy = _isolated_cwd + # Live cwd = workspace, but the relative path resolves to decoy (process cwd) + # because TERMINAL_CWD is the poison '.'. Simulate by pointing live tracking + # at workspace while the resolved path is under decoy. + monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": str(workspace)) + resolved_in_decoy = decoy / "target.py" + + warn = ft._path_resolution_warning("target.py", resolved_in_decoy, task_id="default") + + assert warn is not None + assert "OUTSIDE the active workspace" in warn + assert str(decoy) in warn + assert str(workspace) in warn + + +def test_no_warning_when_relative_path_inside_workspace(_isolated_cwd, monkeypatch): + workspace, decoy = _isolated_cwd + monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": str(workspace)) + resolved_in_workspace = workspace / "target.py" + + warn = ft._path_resolution_warning("target.py", resolved_in_workspace, task_id="default") + + assert warn is None + + +def test_no_warning_for_absolute_input(_isolated_cwd, monkeypatch): + workspace, decoy = _isolated_cwd + monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": str(workspace)) + + warn = ft._path_resolution_warning(str(decoy / "target.py"), decoy / "target.py", task_id="default") + + assert warn is None + + +def test_no_warning_when_no_live_cwd(_isolated_cwd, monkeypatch): + workspace, decoy = _isolated_cwd + monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": None) + + warn = ft._path_resolution_warning("target.py", decoy / "target.py", task_id="default") + + assert warn is None + + +# ── Fix A: write_file / patch report the resolved ABSOLUTE path ────────────── + + +def test_write_file_reports_resolved_absolute_path(_isolated_cwd, monkeypatch): + """write_file_tool must put the absolute on-disk path in files_modified.""" + workspace, decoy = _isolated_cwd + monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": str(workspace)) + + import json + out = json.loads(ft.write_file_tool("newfile.txt", "hello\n", task_id="t1")) + + expected = str((workspace / "newfile.txt").resolve()) + assert out.get("resolved_path") == expected + assert out.get("files_modified") == [expected] + assert (workspace / "newfile.txt").read_text() == "hello\n" + + +def test_patch_reports_resolved_absolute_path(_isolated_cwd, monkeypatch): + """patch_tool (replace mode) must put the absolute on-disk path in files_modified.""" + workspace, decoy = _isolated_cwd + monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": str(workspace)) + + import json + out = json.loads(ft.patch_tool( + mode="replace", path="target.py", + old_string="WORKSPACE_ORIGINAL", new_string="WORKSPACE_PATCHED", + task_id="t1", + )) + + expected = str((workspace / "target.py").resolve()) + assert not out.get("error"), out + assert out.get("resolved_path") == expected + assert out.get("files_modified") == [expected] + assert "WORKSPACE_PATCHED" in (workspace / "target.py").read_text() + # And the decoy copy is untouched. + assert (decoy / "target.py").read_text() == "DECOY_ORIGINAL\n" + diff --git a/tools/file_tools.py b/tools/file_tools.py index 54a089fc9..6ea6ff0a3 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -116,15 +116,80 @@ def _get_live_tracking_cwd(task_id: str = "default") -> str | None: return None +def _resolve_base_dir(task_id: str = "default") -> Path: + """Return the ABSOLUTE base directory for resolving relative paths. + + Resolution order: + 1. The task's live terminal cwd (the directory the agent is actually + working in — e.g. a git worktree). Authoritative when known. + 2. ``$TERMINAL_CWD`` from config/env. + 3. The process cwd. + + The returned base is ALWAYS absolute. This is the core invariant that + prevents the worktree-cwd divergence bug: a relative ``TERMINAL_CWD`` + (commonly the literal ``"."`` from a stale config) is meaningless as a + resolution anchor — left to ``Path.resolve()`` it silently resolves + against whatever the agent PROCESS cwd happens to be (e.g. the main repo + while the terminal is in a worktree), routing edits to the wrong checkout. + Anchoring a relative base against the process cwd here makes the resolution + deterministic and inspectable rather than dependent on resolve()-time cwd. + """ + live = _get_live_tracking_cwd(task_id) + if live: + base = Path(live).expanduser() + else: + raw = os.environ.get("TERMINAL_CWD") + base = Path(raw).expanduser() if raw else Path(os.getcwd()) + if not base.is_absolute(): + # A relative base (".", "./sub", "..") is anchored to the process cwd + # once, here, so the result no longer depends on cwd at resolve() time. + base = Path(os.getcwd()) / base + return base.resolve() + + def _resolve_path_for_task(filepath: str, task_id: str = "default") -> Path: - """Resolve *filepath* against the task's live terminal cwd when possible.""" + """Resolve *filepath* against the task's absolute base directory. + + See :func:`_resolve_base_dir` for how the base is chosen. Absolute input + paths are returned resolved-but-unanchored. + """ p = Path(filepath).expanduser() - if not p.is_absolute(): - base = _get_live_tracking_cwd(task_id) or os.environ.get( - "TERMINAL_CWD", os.getcwd() - ) - p = Path(base) / p - return p.resolve() + if p.is_absolute(): + return p.resolve() + return (_resolve_base_dir(task_id) / p).resolve() + + +def _path_resolution_warning(filepath: str, resolved: Path, task_id: str = "default") -> str | None: + """Warn when a relative path resolved OUTSIDE the task's workspace root. + + Surfaces the worktree-cwd divergence the moment it would matter: if the + agent passes a relative path but it resolves under a directory that is not + the live terminal cwd (i.e. the edit is about to land in a different + checkout than the one the agent is working in), return a message naming the + absolute target. ``None`` when the path is absolute, the base is unknown, + or the resolved path is correctly under the workspace root. + """ + try: + if Path(filepath).expanduser().is_absolute(): + return None + live = _get_live_tracking_cwd(task_id) + if not live: + return None # No authoritative workspace root to compare against. + root = Path(live).expanduser().resolve() + # Is `resolved` inside `root`? + try: + resolved.relative_to(root) + return None # Inside the workspace — expected. + except ValueError: + return ( + f"Relative path {filepath!r} resolved to {str(resolved)!r}, which is " + f"OUTSIDE the active workspace ({str(root)!r}). The edit will land in " + f"a different directory than the terminal's cwd. If this is not " + f"intended (e.g. a git-worktree session writing into the main " + f"checkout), pass an absolute path under the workspace instead." + ) + except Exception: + return None def _is_blocked_device_path(path: str) -> bool: @@ -930,12 +995,21 @@ def write_file_tool(path: str, content: str, task_id: str = "default", # fire — its message names the sibling subagent. cross_warning = file_state.check_stale(task_id, _resolved) stale_warning = _check_file_staleness(path, task_id) + # Workspace-divergence warning: relative path resolving outside the + # terminal's cwd (the worktree-cwd bug). Lowest priority of the three. + cwd_warning = _path_resolution_warning(path, Path(_resolved), task_id) file_ops = _get_file_ops(task_id) - result = file_ops.write_file(path, content) + result = file_ops.write_file(_resolved, content) result_dict = result.to_dict() - effective_warning = cross_warning or stale_warning + effective_warning = cross_warning or stale_warning or cwd_warning if effective_warning: result_dict["_warning"] = effective_warning + # Always report the ABSOLUTE path actually written, so a wrong-cwd + # mismatch is visible in the response instead of silently routing + # the edit to the wrong checkout. + result_dict["resolved_path"] = _resolved + if not result_dict.get("error"): + result_dict["files_modified"] = [_resolved] # Refresh stamps after the successful write so consecutive # writes by this task don't trigger false staleness warnings. _update_read_timestamp(path, task_id) @@ -1027,6 +1101,10 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None, _path_to_resolved[_p] = _r _cross = file_state.check_stale(task_id, _r) if _r else None _sw = _cross or _check_file_staleness(_p, task_id) + if not _sw and _r: + # Workspace-divergence warning (worktree-cwd bug): relative + # path resolving outside the terminal's cwd. + _sw = _path_resolution_warning(_p, Path(_r), task_id) if _sw: stale_warnings.append(_sw) @@ -1037,7 +1115,13 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None, return tool_error("path required") if old_string is None or new_string is None: return tool_error("old_string and new_string required") - result = file_ops.patch_replace(path, old_string, new_string, replace_all) + # Pass the resolved ABSOLUTE path to the shell layer so it + # operates on the exact file the tool layer resolved — the + # shell's own cwd may differ (worktree-cwd bug), and a relative + # path would let the two layers disagree about which file is + # being edited. + _replace_target = _path_to_resolved.get(path) or path + result = file_ops.patch_replace(_replace_target, old_string, new_string, replace_all) elif mode == "patch": if not patch: return tool_error("patch content required") @@ -1048,9 +1132,18 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None, result_dict = result.to_dict() if stale_warnings: result_dict["_warning"] = stale_warnings[0] if len(stale_warnings) == 1 else " | ".join(stale_warnings) + # Report the ABSOLUTE path(s) actually patched so a wrong-cwd + # mismatch (e.g. a worktree session editing the main checkout) is + # visible in the response instead of silently landing elsewhere. + _resolved_modified = [ + _path_to_resolved.get(_p) or _p for _p in _paths_to_check + ] # Refresh stored timestamps for all successfully-patched paths so # consecutive edits by this task don't trigger false warnings. if not result_dict.get("error"): + result_dict["files_modified"] = _resolved_modified + if len(_resolved_modified) == 1: + result_dict["resolved_path"] = _resolved_modified[0] for _p in _paths_to_check: _update_read_timestamp(_p, task_id) _r = _path_to_resolved.get(_p)