perf(read_file): compact line-number gutter — ~14% fewer tokens per read (#35368)

read_file's gutter used a fixed-width zero/space-padded prefix (" 1|content"). The padding is pure token overhead: measured with cl100k on real Hermes source, the padded gutter costs ~48% more tokens than bare content and ~16% more than a compact "<n>|content" gutter, because the leading spaces tokenize into extra tokens on every line. Switched the default to the compact "<n>|content" form. An A/B (Sonnet 4.6 via OpenRouter, 2 passes, 4-task battery, every claim verified against ground truth) showed: - padded : 4/4 PASS both passes - compact : 4/4 PASS both passes ← keeps line-referencing + patch - none : 3/4 PASS both passes ← dropping numbers entirely made the model hand-count lines and answer off-by-one (33 vs 34) So we keep the line numbers (the model genuinely uses them to reference lines) but drop the wasteful padding — capturing ~14% of the read-token cost with zero measured accuracy change. Dropping numbers entirely (the larger 33% saving) is rejected: it regresses line-referencing. patch/fuzzy_match never consumed the gutter (they match old_string text and compute char offsets internally), so editing is unaffected. No downstream parser keys on the fixed-width columns. HERMES_READ_GUTTER= padded restores the legacy format for anyone relying on alignment. Tests: updated the 3 format assertions to the compact gutter; added an env-override test for the legacy padded format. 209 file-tool tests green.
2026-05-30 07:01:22 -07:00
parent 5f84c9144a
commit ea6eaabd8f
3 changed files with 34 additions and 9 deletions
--- a/tests/tools/test_file_operations.py
+++ b/tests/tools/test_file_operations.py
@ -345,15 +345,23 @@ class TestShellFileOpsHelpers:
    def test_add_line_numbers(self, file_ops):
        content = "line one\nline two\nline three"
        result = file_ops._add_line_numbers(content)
-        assert "     1|line one" in result
-        assert "     2|line two" in result
-        assert "     3|line three" in result
+        # Compact gutter: "<n>|content" (no fixed-width padding).
+        assert "1|line one" in result
+        assert "2|line two" in result
+        assert "3|line three" in result

    def test_add_line_numbers_with_offset(self, file_ops):
        content = "continued\nmore"
        result = file_ops._add_line_numbers(content, start_line=50)
-        assert "    50|continued" in result
-        assert "    51|more" in result
+        assert "50|continued" in result
+        assert "51|more" in result
+
+    def test_add_line_numbers_padded_env_override(self, file_ops, monkeypatch):
+        # Legacy fixed-width format available via HERMES_READ_GUTTER=padded.
+        monkeypatch.setenv("HERMES_READ_GUTTER", "padded")
+        result = file_ops._add_line_numbers("line one\nline two")
+        assert "     1|line one" in result
+        assert "     2|line two" in result

    def test_add_line_numbers_truncates_long_lines(self, file_ops):
        long_line = "x" * (MAX_LINE_LENGTH + 100)
@ -405,7 +413,7 @@ class TestShellFileOpsHelpers:
        assert "HERMES_FENCE" not in result.content
        assert "\x1b]" not in result.content
        assert "\x07" not in result.content
-        assert "     1|print('ok')" in result.content
+        assert "1|print('ok')" in result.content

    def test_read_file_raw_strips_leaked_terminal_fence_markers(self, mock_env):
        leaked = (
--- a/tests/tools/test_file_operations_edge_cases.py
+++ b/tests/tools/test_file_operations_edge_cases.py
@ -292,7 +292,7 @@ class TestPaginationBounds:
            result = ops.read_file("notes.txt", offset=0, limit=0)

        assert result.error is None
-        assert "     1|line1" in result.content
+        assert "1|line1" in result.content
        sed_commands = [cmd for cmd in commands if cmd.startswith("sed -n")]
        assert sed_commands == ["sed -n '1,1p' 'notes.txt'"]

--- a/tools/file_operations.py
+++ b/tools/file_operations.py
@ -702,8 +702,25 @@ class ShellFileOperations(FileOperations):
        return ext in IMAGE_EXTENSIONS
    
    def _add_line_numbers(self, content: str, start_line: int = 1) -> str:
-        """Add line numbers to content in LINE_NUM|CONTENT format."""
+        """Add line numbers to content in ``LINE_NUM|CONTENT`` format.
+
+        The gutter uses a compact ``<n>|`` prefix (e.g. ``34|foo``) rather
+        than a fixed-width zero/space-padded one (``    34|foo``). The
+        padding was pure token overhead: on dense source the padded gutter
+        cost ~48% more tokens than the bare content and ~16% more than the
+        compact form, because the leading spaces + zero-padding tokenize
+        into extra tokens on every single line. An A/B (Sonnet 4.6, 2
+        passes) showed the compact gutter matches the padded gutter on
+        line-reference / patch / value-lookup / structure tasks (4/4 both),
+        while dropping line numbers entirely regressed line-referencing
+        (the model hand-counted and was off-by-one, 3/4) — so we keep the
+        numbers, just not the padding. ``HERMES_READ_GUTTER=padded``
+        restores the legacy fixed-width format for anyone who relied on
+        column alignment.
+        """
+        import os as _os
        from tools.tool_output_limits import get_max_line_length
+        padded = (_os.environ.get("HERMES_READ_GUTTER") or "").lower() == "padded"
        max_line_length = get_max_line_length()
        lines = content.split('\n')
        numbered = []
@ -711,7 +728,7 @@ class ShellFileOperations(FileOperations):
            # Truncate long lines
            if len(line) > max_line_length:
                line = line[:max_line_length] + "... [truncated]"
-            numbered.append(f"{i:6d}|{line}")
+            numbered.append(f"{i:6d}|{line}" if padded else f"{i}|{line}")
        return '\n'.join(numbered)
    
    def _expand_path(self, path: str) -> str: