Adds a user-chosen compression boundary to the existing /compress command. /compress here [N] summarizes everything except the most recent N exchanges (default 2), which are preserved verbatim — letting the user pick the compression boundary instead of relying on the automatic token-budget heuristic. Inspired by Claude Code's Rewind 'Summarize up to here' action (v2.1.139, Week 20, May 2026): https://code.claude.com/docs/en/whats-new/2026-w20 - hermes_cli/partial_compress.py: pure split/parse helpers + seam-alternation guard (shared by CLI and gateway). - cli.py / gateway/run.py: route 'here [N]' / '--keep N' to partial compression; compress only the head, re-append the verbatim tail through the seam guard. - Preserves message-flow role alternation (seam guard merges any illegal user->user / assistant->assistant adjacency). - Reuses the existing _compress_context session-rotation/lock machinery — no changes to the compression core. - Bare /compress (full) and /compress <focus> behavior unchanged. Tests: 12 helper unit tests + 5 CLI integration tests + E2E (interleaved tool-call transcript, degenerate/multimodal seams, real handler path).
120 lines
4.5 KiB
Python
120 lines
4.5 KiB
Python
"""Tests for /compress here [N] — boundary-aware partial compression.
|
|
|
|
Verifies the CLI handler (_manual_compress) splits the history, compresses
|
|
only the head, and re-appends the verbatim tail. Inspired by Claude Code's
|
|
Rewind "Summarize up to here" action (v2.1.139, May 2026).
|
|
"""
|
|
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
from tests.cli.test_cli_init import _make_cli
|
|
|
|
|
|
def _make_history() -> list[dict[str, str]]:
|
|
# 8 messages = 4 exchanges.
|
|
h: list[dict[str, str]] = []
|
|
for i in range(4):
|
|
h.append({"role": "user", "content": f"u{i}"})
|
|
h.append({"role": "assistant", "content": f"a{i}"})
|
|
return h
|
|
|
|
|
|
def _wire_agent(shell, compressed_head):
|
|
shell.agent = MagicMock()
|
|
shell.agent.compression_enabled = True
|
|
shell.agent._cached_system_prompt = ""
|
|
shell.agent.session_id = None
|
|
shell.agent.tools = None
|
|
shell.agent._compress_context.return_value = (compressed_head, "")
|
|
|
|
|
|
def test_compress_here_compresses_head_only(capsys):
|
|
"""/compress here 2 passes only the head to _compress_context."""
|
|
shell = _make_cli()
|
|
history = _make_history()
|
|
shell.conversation_history = history
|
|
# Pretend compression collapses the head into a single summary message.
|
|
summary = [{"role": "user", "content": "[summary of earlier turns]"}]
|
|
_wire_agent(shell, summary)
|
|
|
|
with patch("agent.model_metadata.estimate_request_tokens_rough", return_value=100):
|
|
shell._manual_compress("/compress here 2")
|
|
|
|
# _compress_context should have been called with the HEAD only
|
|
# (everything before the last 2 user-starts = first 4 messages).
|
|
shell.agent._compress_context.assert_called_once()
|
|
call = shell.agent._compress_context.call_args
|
|
passed_head = call.args[0]
|
|
assert passed_head == history[:4]
|
|
# focus_topic must be None in partial mode (modes are exclusive).
|
|
assert call.kwargs.get("focus_topic") is None
|
|
|
|
|
|
def test_compress_here_reappends_verbatim_tail(capsys):
|
|
"""The most recent exchanges are preserved verbatim after the summary."""
|
|
shell = _make_cli()
|
|
history = _make_history()
|
|
shell.conversation_history = history
|
|
# Head compresses to an assistant-role summary so the seam
|
|
# (assistant -> user tail) is already valid — tail rides along whole.
|
|
summary = [{"role": "assistant", "content": "[summary]"}]
|
|
_wire_agent(shell, summary)
|
|
|
|
with patch("agent.model_metadata.estimate_request_tokens_rough", return_value=100):
|
|
shell._manual_compress("/compress here 2")
|
|
|
|
# Result = compressed head + verbatim tail (last 2 exchanges).
|
|
assert shell.conversation_history == summary + history[4:]
|
|
# Tail boundary keeps role alternation valid (tail starts on user).
|
|
assert history[4]["role"] == "user"
|
|
# No consecutive same-role user/assistant messages anywhere.
|
|
roles = [m["role"] for m in shell.conversation_history
|
|
if m["role"] in ("user", "assistant")]
|
|
assert all(roles[i] != roles[i + 1] for i in range(len(roles) - 1))
|
|
|
|
|
|
def test_compress_here_banner_mentions_summarizing_up_to_here(capsys):
|
|
shell = _make_cli()
|
|
history = _make_history()
|
|
shell.conversation_history = history
|
|
_wire_agent(shell, [{"role": "user", "content": "[summary]"}])
|
|
|
|
with patch("agent.model_metadata.estimate_request_tokens_rough", return_value=100):
|
|
shell._manual_compress("/compress here")
|
|
|
|
out = capsys.readouterr().out
|
|
assert "Summarizing up to here" in out
|
|
assert "verbatim" in out
|
|
|
|
|
|
def test_bare_compress_still_full(capsys):
|
|
"""/compress with no args compresses the whole history (full mode)."""
|
|
shell = _make_cli()
|
|
history = _make_history()
|
|
shell.conversation_history = history
|
|
_wire_agent(shell, list(history))
|
|
|
|
with patch("agent.model_metadata.estimate_request_tokens_rough", return_value=100):
|
|
shell._manual_compress("/compress")
|
|
|
|
call = shell.agent._compress_context.call_args
|
|
# Full mode passes the entire history as the head.
|
|
assert call.args[0] == history
|
|
out = capsys.readouterr().out
|
|
assert "Summarizing up to here" not in out
|
|
|
|
|
|
def test_focus_still_works(capsys):
|
|
"""/compress <focus> keeps the existing focus behavior."""
|
|
shell = _make_cli()
|
|
history = _make_history()
|
|
shell.conversation_history = history
|
|
_wire_agent(shell, list(history))
|
|
|
|
with patch("agent.model_metadata.estimate_request_tokens_rough", return_value=100):
|
|
shell._manual_compress("/compress database schema")
|
|
|
|
call = shell.agent._compress_context.call_args
|
|
assert call.args[0] == history
|
|
assert call.kwargs.get("focus_topic") == "database schema"
|