From bcc83010006c7059ee4d0be63fe74afc74867625 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 29 May 2026 17:49:15 -0700 Subject: [PATCH] =?UTF-8?q?Inspired=20by=20Claude=20Code:=20/compress=20he?= =?UTF-8?q?re=20[N]=20=E2=80=94=20boundary-aware=20'summarize=20up=20to=20?= =?UTF-8?q?here'=20(#35048)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a user-chosen compression boundary to the existing /compress command. /compress here [N] summarizes everything except the most recent N exchanges (default 2), which are preserved verbatim — letting the user pick the compression boundary instead of relying on the automatic token-budget heuristic. Inspired by Claude Code's Rewind 'Summarize up to here' action (v2.1.139, Week 20, May 2026): https://code.claude.com/docs/en/whats-new/2026-w20 - hermes_cli/partial_compress.py: pure split/parse helpers + seam-alternation guard (shared by CLI and gateway). - cli.py / gateway/run.py: route 'here [N]' / '--keep N' to partial compression; compress only the head, re-append the verbatim tail through the seam guard. - Preserves message-flow role alternation (seam guard merges any illegal user->user / assistant->assistant adjacency). - Reuses the existing _compress_context session-rotation/lock machinery — no changes to the compression core. - Bare /compress (full) and /compress behavior unchanged. Tests: 12 helper unit tests + 5 CLI integration tests + E2E (interleaved tool-call transcript, degenerate/multimodal seams, real handler path). --- cli.py | 70 +++++-- gateway/run.py | 39 +++- hermes_cli/commands.py | 4 +- hermes_cli/partial_compress.py | 235 +++++++++++++++++++++++ tests/cli/test_compress_here.py | 119 ++++++++++++ tests/cli/test_partial_compress.py | 198 +++++++++++++++++++ website/docs/reference/slash-commands.md | 4 +- 7 files changed, 650 insertions(+), 19 deletions(-) create mode 100644 hermes_cli/partial_compress.py create mode 100644 tests/cli/test_compress_here.py create mode 100644 tests/cli/test_partial_compress.py diff --git a/cli.py b/cli.py index 2bc64c9c5..23035297f 100644 --- a/cli.py +++ b/cli.py @@ -9947,10 +9947,20 @@ class HermesCLI: def _manual_compress(self, cmd_original: str = ""): """Manually trigger context compression on the current conversation. - Accepts an optional focus topic: ``/compress `` guides the - summariser to preserve information related to *focus* while being - more aggressive about discarding everything else. Inspired by - Claude Code's ``/compact `` feature. + Two modes: + + * ``/compress []`` — compress the *whole* history. An + optional focus topic guides the summariser to preserve + information related to *focus* while being more aggressive + about discarding everything else. Inspired by Claude Code's + ``/compact `` feature. + * ``/compress here [N]`` — boundary-aware compression. Summarize + everything *except* the most recent ``N`` exchanges (default + 2), which are preserved verbatim. Inspired by Claude Code's + Rewind "Summarize up to here" action (v2.1.139, May 2026, + https://code.claude.com/docs/en/whats-new/2026-w20). Lets the + user pick the compression boundary instead of leaving it to + the automatic token-budget heuristic. """ if not self.conversation_history or len(self.conversation_history) < 4: print("(._.) Not enough conversation to compress (need at least 4 messages).") @@ -9964,12 +9974,21 @@ class HermesCLI: print("(._.) Compression is disabled in config.") return - # Extract optional focus topic from the command (e.g. "/compress database schema") - focus_topic = "" + from hermes_cli.partial_compress import ( + parse_partial_compress_args, + rejoin_compressed_head_and_tail, + split_history_for_partial_compress, + ) + + # Args after the command word (e.g. "/compress here 3" -> "here 3"). + raw_args = "" if cmd_original: - parts = cmd_original.strip().split(None, 1) - if len(parts) > 1: - focus_topic = parts[1].strip() + _parts = cmd_original.strip().split(None, 1) + if len(_parts) > 1: + raw_args = _parts[1].strip() + + partial, keep_last, focus_topic = parse_partial_compress_args(raw_args) + focus_topic = focus_topic or "" original_count = len(self.conversation_history) with self._busy_command("Compressing context..."): @@ -9977,6 +9996,22 @@ class HermesCLI: from agent.model_metadata import estimate_request_tokens_rough from agent.manual_compression_feedback import summarize_manual_compression original_history = list(self.conversation_history) + + # Boundary-aware split: only the head is summarized; the + # most recent `keep_last` exchanges ride along verbatim. + tail: list = [] + head = original_history + if partial: + head, tail = split_history_for_partial_compress( + original_history, keep_last + ) + if not tail: + # Split degenerated (everything would be kept, or + # no head left to compress). Fall back to full + # compression so the user still gets an action. + partial = False + head = original_history + # Include system prompt + tool schemas in the estimate — # a transcript-only number understates real request pressure # and can even appear to grow after compression because a @@ -9988,7 +10023,11 @@ class HermesCLI: system_prompt=_sys_prompt, tools=_tools, ) - if focus_topic: + if partial: + print(f"🗜️ Summarizing up to here: compressing {len(head)} of " + f"{original_count} messages (~{approx_tokens:,} tokens), " + f"keeping last {keep_last} exchange(s) verbatim...") + elif focus_topic: print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens), " f"focus: \"{focus_topic}\"...") else: @@ -10001,12 +10040,21 @@ class HermesCLI: # which already contain the agent identity — resulting in the # identity block appearing twice (issue #15281). compressed, _ = self.agent._compress_context( - original_history, + head, None, approx_tokens=approx_tokens, focus_topic=focus_topic or None, force=True, ) + # Re-append the verbatim tail after the compressed head. + # The split guarantees `tail` begins on a user turn, so the + # compressed-head -> tail boundary is normally valid + # (the head's compressed output ends on assistant/tool). + # rejoin_compressed_head_and_tail() additionally guards the + # seam against any illegal user->user / assistant->assistant + # adjacency, defending provider role-alternation rules. + if partial and tail: + compressed = rejoin_compressed_head_and_tail(compressed, tail) self.conversation_history = compressed # _compress_context ends the old session and creates a new child # session on the agent (run_agent.py::_compress_context). Sync the diff --git a/gateway/run.py b/gateway/run.py index bb618e185..5cdc5894c 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -12449,6 +12449,12 @@ class GatewayRunner: Accepts an optional focus topic: ``/compress `` guides the summariser to preserve information related to *focus* while being more aggressive about discarding everything else. + + Also accepts the boundary-aware form ``/compress here [N]``: + summarize everything except the most recent ``N`` exchanges + (default 2), kept verbatim. Inspired by Claude Code's Rewind + "Summarize up to here" action (v2.1.139, May 2026, + https://code.claude.com/docs/en/whats-new/2026-w20). """ source = event.source session_entry = self.session_store.get_or_create_session(source) @@ -12457,8 +12463,15 @@ class GatewayRunner: if not history or len(history) < 4: return t("gateway.compress.not_enough") - # Extract optional focus topic from command args - focus_topic = (event.get_command_args() or "").strip() or None + # Parse args: either a focus topic (full compress) or the + # boundary-aware "here [N]" form (partial compress). + from hermes_cli.partial_compress import ( + parse_partial_compress_args, + rejoin_compressed_head_and_tail, + split_history_for_partial_compress, + ) + _raw_args = (event.get_command_args() or "").strip() + partial, keep_last, focus_topic = parse_partial_compress_args(_raw_args) try: from run_agent import AIAgent @@ -12479,6 +12492,19 @@ class GatewayRunner: if m.get("role") in {"user", "assistant"} and m.get("content") ] + # Boundary-aware split: only the head is summarized; the most + # recent `keep_last` exchanges are preserved verbatim. The + # split snaps the tail to a user-turn start so the rejoined + # transcript keeps role alternation valid. + tail: list = [] + head = msgs + if partial: + head, tail = split_history_for_partial_compress(msgs, keep_last) + if not tail: + # Degenerate split — fall back to full compression. + partial = False + head = msgs + tmp_agent = AIAgent( **runtime_kwargs, model=model, @@ -12502,15 +12528,20 @@ class GatewayRunner: ) compressor = tmp_agent.context_compressor - if not compressor.has_content_to_compress(msgs): + if not compressor.has_content_to_compress(head): return t("gateway.compress.nothing_to_do") loop = asyncio.get_running_loop() compressed, _ = await loop.run_in_executor( None, - lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens, focus_topic=focus_topic, force=True) + lambda: tmp_agent._compress_context(head, "", approx_tokens=approx_tokens, focus_topic=focus_topic, force=True) ) + # Re-append the verbatim tail after the compressed head, + # guarding the seam against illegal role adjacency. + if partial and tail: + compressed = rejoin_compressed_head_and_tail(compressed, tail) + # _compress_context already calls end_session() on the old session # (preserving its full transcript in SQLite) and creates a new # session_id for the continuation. Write the compressed messages diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index dc81ff7e8..a2db37be2 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -85,8 +85,8 @@ COMMAND_REGISTRY: list[CommandDef] = [ args_hint="", cli_only=True), CommandDef("branch", "Branch the current session (explore a different path)", "Session", aliases=("fork",), args_hint="[name]"), - CommandDef("compress", "Manually compress conversation context", "Session", - args_hint="[focus topic]"), + CommandDef("compress", "Compress conversation context (add 'here [N]' to keep recent N turns)", "Session", + args_hint="[here [N] | focus topic]"), CommandDef("rollback", "List or restore filesystem checkpoints", "Session", args_hint="[number]"), CommandDef("snapshot", "Create or restore state snapshots of Hermes config/state", "Session", diff --git a/hermes_cli/partial_compress.py b/hermes_cli/partial_compress.py new file mode 100644 index 000000000..dc1115d9f --- /dev/null +++ b/hermes_cli/partial_compress.py @@ -0,0 +1,235 @@ +"""Boundary-aware partial compression — "summarize up to here". + +Inspired by Claude Code's Rewind menu "Summarize up to here" action +(v2.1.139–v2.1.142, Week 20, May 2026): +https://code.claude.com/docs/en/whats-new/2026-w20 + +Hermes already has ``/compress`` (full-history compaction) and an +automatic token-budget tail-protection heuristic inside +``ContextCompressor``. What was missing is *user-chosen* boundary +control: "fold everything before this point into a summary, but keep +my most recent N exchanges exactly as they are." That is the value of +the Claude Code feature — the user decides the compression boundary +instead of leaving it to the token-budget heuristic. + +This module owns the pure, side-effect-free split logic so both the +CLI (``cli.py::_manual_compress``) and the gateway +(``gateway/run.py::_handle_compress_command``) share one +implementation. The slash-command surfaces handle compression of the +*head* via the existing ``_compress_context`` pipeline (preserving all +the session-rotation / lock / memory-notify machinery) and then +re-append the verbatim *tail* returned here. + +Design notes / invariants honored: + +* **Role alternation.** The compressed head ends with summary/handoff + content (assistant- or user-role, possibly a trailing todo snapshot). + The verbatim tail must begin with a ``user`` message so the rejoined + history keeps the user↔assistant alternation that providers validate. + :func:`split_history_for_partial_compress` snaps the tail boundary + backwards to the nearest ``user`` turn so the rejoin is always legal. + +* **No silent context mutation.** This is a manual, user-invoked + action. It rotates the session exactly like ``/compress`` does (via + the caller), so the prompt-cache reset is explicit and expected, not + silent. + +* **Conservative defaults.** ``keep_last`` counts *exchanges* (a user + turn plus its following assistant/tool turns), defaulting to 2. The + split never compresses if doing so would leave nothing in the head. +""" + +from __future__ import annotations + +from typing import Any, Dict, List, Optional, Tuple + +#: Default number of recent exchanges to preserve verbatim when the user +#: runs ``/compress here`` without an explicit count. +DEFAULT_KEEP_LAST = 2 + +#: Hard ceiling so a fat-fingered ``/compress here 9999`` doesn't turn +#: into a no-op surprise — clamp instead. +MAX_KEEP_LAST = 100 + + +def parse_partial_compress_args( + raw_args: str, +) -> Tuple[bool, int, Optional[str]]: + """Parse the argument string after ``/compress``. + + Recognizes the boundary-aware forms: + + * ``here`` → partial compress, keep ``DEFAULT_KEEP_LAST`` + * ``here 4`` → partial compress, keep 4 exchanges + * ``--keep 4`` → partial compress, keep 4 exchanges + * ``up to here`` → alias for ``here`` (matches Claude Code's + menu label "Summarize up to here") + + Anything else is treated as a focus topic for the existing full + ``/compress `` behavior. + + Returns ``(partial, keep_last, focus_topic)``: + + * ``partial`` — True when a boundary-aware form was requested. + * ``keep_last`` — exchanges to preserve verbatim (only meaningful + when ``partial`` is True). + * ``focus_topic`` — focus string for full compression, or None. + Always None when ``partial`` is True (the two modes are exclusive; + a focused partial compress is not a documented Claude Code + behavior and would muddy the UX). + """ + text = (raw_args or "").strip() + if not text: + return False, DEFAULT_KEEP_LAST, None + + lowered = text.lower() + + # Normalize the "up to here" alias to "here". + if lowered.startswith("up to here"): + lowered = lowered[len("up to ") :] + text = text[len("up to ") :] + + tokens = lowered.split() + + # Form: here [N] + if tokens and tokens[0] == "here": + keep = DEFAULT_KEEP_LAST + if len(tokens) >= 2: + keep = _coerce_keep(tokens[1]) + return True, keep, None + + # Form: --keep N (or --keep=N) + if tokens and tokens[0] in ("--keep", "-k") and len(tokens) >= 2: + return True, _coerce_keep(tokens[1]), None + if tokens and tokens[0].startswith("--keep="): + return True, _coerce_keep(tokens[0].split("=", 1)[1]), None + + # Otherwise: full compression with this as the focus topic. + return False, DEFAULT_KEEP_LAST, text or None + + +def _coerce_keep(value: str) -> int: + """Parse a keep-count token, clamping to [1, MAX_KEEP_LAST].""" + try: + n = int(value) + except (TypeError, ValueError): + return DEFAULT_KEEP_LAST + if n < 1: + return 1 + if n > MAX_KEEP_LAST: + return MAX_KEEP_LAST + return n + + +def split_history_for_partial_compress( + history: List[Dict[str, Any]], + keep_last: int, +) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]: + """Split ``history`` into ``(head, tail)`` for partial compression. + + ``head`` is the earlier portion that will be summarized; ``tail`` is + the most recent ``keep_last`` exchanges, preserved verbatim. + + An *exchange* is counted by ``user``-role messages: keeping N + exchanges means keeping everything from the Nth-most-recent ``user`` + message onward. This guarantees the tail starts on a ``user`` turn, + so when the caller rejoins ``compressed_head + tail`` the + user↔assistant alternation stays valid (the compressed head's + trailing content is followed by a fresh user turn). + + Returns ``(head, tail)``. If the split would leave the head empty + (not enough history to compress meaningfully), returns + ``(history, [])`` — signaling the caller to fall back to full + compression or report "nothing to do". + """ + if keep_last < 1: + keep_last = 1 + + n = len(history) + if n == 0: + return [], [] + + # Walk backwards collecting the indices of the most recent `keep_last` + # user-message starts. The tail begins at the earliest such index. + user_starts: List[int] = [] + for idx in range(n - 1, -1, -1): + if history[idx].get("role") == "user": + user_starts.append(idx) + if len(user_starts) >= keep_last: + break + + if not user_starts: + # No user turns at all (degenerate) — nothing sensible to keep + # as a "recent exchange"; treat as full compression. + return list(history), [] + + boundary = user_starts[-1] # earliest of the kept user starts + + head = history[:boundary] + tail = history[boundary:] + + # If everything is in the tail (nothing left to compress), signal the + # caller to fall back to full compression rather than producing a + # no-op that rotates the session for no benefit. + if not head: + return list(history), [] + + return head, tail + + +def rejoin_compressed_head_and_tail( + compressed_head: List[Dict[str, Any]], + tail: List[Dict[str, Any]], +) -> List[Dict[str, Any]]: + """Concatenate a compressed head with the verbatim tail, defending + the seam against an illegal user→user / assistant→assistant adjacency. + + In normal operation the compressed head ends with the head's own + protected verbatim tail (the ``ContextCompressor`` always preserves a + recent window), which terminates on an ``assistant``/``tool`` turn — + so ``assistant → user`` at the seam is already valid. But the head + compressor's exact output shape is not contractually guaranteed (a + plugin context engine could return something that ends on a ``user`` + turn, or a degenerate single-summary message). Rather than trust the + seam, this helper inspects the boundary and, if the last head message + and the first tail message share a ``user``/``assistant`` role, folds + the tail's first message content onto the head's last message so the + rejoined list never violates provider role-alternation rules. + + ``tool`` messages are left alone — consecutive ``tool`` entries are + the one legal repetition (parallel tool results). + """ + if not tail: + return list(compressed_head) + if not compressed_head: + return list(tail) + + head = list(compressed_head) + rest = list(tail) + + last = head[-1] + first = rest[0] + last_role = last.get("role") + first_role = first.get("role") + + if last_role == first_role and last_role in ("user", "assistant"): + # Illegal adjacency. Merge the tail's first message text into the + # head's last message so alternation is preserved. Only string + # contents are merged inline; structured/multimodal contents fall + # back to dropping the redundant standalone (the content is + # preserved by concatenation when both are strings). + last_content = last.get("content") + first_content = first.get("content") + if isinstance(last_content, str) and isinstance(first_content, str): + merged = dict(last) + merged["content"] = f"{last_content}\n\n{first_content}" + head[-1] = merged + rest = rest[1:] + else: + # Can't safely string-merge multimodal content. Insert a + # minimal bridging turn so the seam alternates rather than + # losing data. + bridge_role = "assistant" if first_role == "user" else "user" + head.append({"role": bridge_role, "content": ""}) + + return head + rest diff --git a/tests/cli/test_compress_here.py b/tests/cli/test_compress_here.py new file mode 100644 index 000000000..115a12539 --- /dev/null +++ b/tests/cli/test_compress_here.py @@ -0,0 +1,119 @@ +"""Tests for /compress here [N] — boundary-aware partial compression. + +Verifies the CLI handler (_manual_compress) splits the history, compresses +only the head, and re-appends the verbatim tail. Inspired by Claude Code's +Rewind "Summarize up to here" action (v2.1.139, May 2026). +""" + +from unittest.mock import MagicMock, patch + +from tests.cli.test_cli_init import _make_cli + + +def _make_history() -> list[dict[str, str]]: + # 8 messages = 4 exchanges. + h: list[dict[str, str]] = [] + for i in range(4): + h.append({"role": "user", "content": f"u{i}"}) + h.append({"role": "assistant", "content": f"a{i}"}) + return h + + +def _wire_agent(shell, compressed_head): + shell.agent = MagicMock() + shell.agent.compression_enabled = True + shell.agent._cached_system_prompt = "" + shell.agent.session_id = None + shell.agent.tools = None + shell.agent._compress_context.return_value = (compressed_head, "") + + +def test_compress_here_compresses_head_only(capsys): + """/compress here 2 passes only the head to _compress_context.""" + shell = _make_cli() + history = _make_history() + shell.conversation_history = history + # Pretend compression collapses the head into a single summary message. + summary = [{"role": "user", "content": "[summary of earlier turns]"}] + _wire_agent(shell, summary) + + with patch("agent.model_metadata.estimate_request_tokens_rough", return_value=100): + shell._manual_compress("/compress here 2") + + # _compress_context should have been called with the HEAD only + # (everything before the last 2 user-starts = first 4 messages). + shell.agent._compress_context.assert_called_once() + call = shell.agent._compress_context.call_args + passed_head = call.args[0] + assert passed_head == history[:4] + # focus_topic must be None in partial mode (modes are exclusive). + assert call.kwargs.get("focus_topic") is None + + +def test_compress_here_reappends_verbatim_tail(capsys): + """The most recent exchanges are preserved verbatim after the summary.""" + shell = _make_cli() + history = _make_history() + shell.conversation_history = history + # Head compresses to an assistant-role summary so the seam + # (assistant -> user tail) is already valid — tail rides along whole. + summary = [{"role": "assistant", "content": "[summary]"}] + _wire_agent(shell, summary) + + with patch("agent.model_metadata.estimate_request_tokens_rough", return_value=100): + shell._manual_compress("/compress here 2") + + # Result = compressed head + verbatim tail (last 2 exchanges). + assert shell.conversation_history == summary + history[4:] + # Tail boundary keeps role alternation valid (tail starts on user). + assert history[4]["role"] == "user" + # No consecutive same-role user/assistant messages anywhere. + roles = [m["role"] for m in shell.conversation_history + if m["role"] in ("user", "assistant")] + assert all(roles[i] != roles[i + 1] for i in range(len(roles) - 1)) + + +def test_compress_here_banner_mentions_summarizing_up_to_here(capsys): + shell = _make_cli() + history = _make_history() + shell.conversation_history = history + _wire_agent(shell, [{"role": "user", "content": "[summary]"}]) + + with patch("agent.model_metadata.estimate_request_tokens_rough", return_value=100): + shell._manual_compress("/compress here") + + out = capsys.readouterr().out + assert "Summarizing up to here" in out + assert "verbatim" in out + + +def test_bare_compress_still_full(capsys): + """/compress with no args compresses the whole history (full mode).""" + shell = _make_cli() + history = _make_history() + shell.conversation_history = history + _wire_agent(shell, list(history)) + + with patch("agent.model_metadata.estimate_request_tokens_rough", return_value=100): + shell._manual_compress("/compress") + + call = shell.agent._compress_context.call_args + # Full mode passes the entire history as the head. + assert call.args[0] == history + out = capsys.readouterr().out + assert "Summarizing up to here" not in out + + +def test_focus_still_works(capsys): + """/compress keeps the existing focus behavior.""" + shell = _make_cli() + history = _make_history() + shell.conversation_history = history + _wire_agent(shell, list(history)) + + with patch("agent.model_metadata.estimate_request_tokens_rough", return_value=100): + shell._manual_compress("/compress database schema") + + call = shell.agent._compress_context.call_args + assert call.args[0] == history + assert call.kwargs.get("focus_topic") == "database schema" diff --git a/tests/cli/test_partial_compress.py b/tests/cli/test_partial_compress.py new file mode 100644 index 000000000..a6cc30ff3 --- /dev/null +++ b/tests/cli/test_partial_compress.py @@ -0,0 +1,198 @@ +"""Tests for hermes_cli.partial_compress — the pure split/parse helpers +behind ``/compress here [N]`` (boundary-aware "summarize up to here"). + +Inspired by Claude Code's Rewind "Summarize up to here" action. +""" + +from hermes_cli.partial_compress import ( + DEFAULT_KEEP_LAST, + MAX_KEEP_LAST, + parse_partial_compress_args, + rejoin_compressed_head_and_tail, + split_history_for_partial_compress, +) + + +def _history(n_pairs: int) -> list[dict[str, str]]: + """Build n_pairs of (user, assistant) exchanges.""" + h: list[dict[str, str]] = [] + for i in range(n_pairs): + h.append({"role": "user", "content": f"u{i}"}) + h.append({"role": "assistant", "content": f"a{i}"}) + return h + + +# ── parse_partial_compress_args ────────────────────────────────────── + + +def test_empty_args_is_full_compress(): + partial, keep, focus = parse_partial_compress_args("") + assert partial is False + assert keep == DEFAULT_KEEP_LAST + assert focus is None + + +def test_here_defaults_keep_last(): + partial, keep, focus = parse_partial_compress_args("here") + assert partial is True + assert keep == DEFAULT_KEEP_LAST + assert focus is None + + +def test_here_with_count(): + partial, keep, focus = parse_partial_compress_args("here 4") + assert partial is True + assert keep == 4 + assert focus is None + + +def test_up_to_here_alias(): + partial, keep, focus = parse_partial_compress_args("up to here 3") + assert partial is True + assert keep == 3 + assert focus is None + + +def test_keep_flag_forms(): + for arg in ("--keep 5", "-k 5", "--keep=5"): + partial, keep, focus = parse_partial_compress_args(arg) + assert partial is True, arg + assert keep == 5, arg + assert focus is None, arg + + +def test_focus_topic_when_not_boundary_form(): + partial, keep, focus = parse_partial_compress_args("database schema") + assert partial is False + assert focus == "database schema" + + +def test_here_count_clamped_low_and_high(): + _, keep_low, _ = parse_partial_compress_args("here 0") + assert keep_low == 1 + _, keep_high, _ = parse_partial_compress_args(f"here {MAX_KEEP_LAST + 50}") + assert keep_high == MAX_KEEP_LAST + + +def test_here_garbage_count_falls_back_to_default(): + partial, keep, focus = parse_partial_compress_args("here lots") + assert partial is True + assert keep == DEFAULT_KEEP_LAST + + +# ── split_history_for_partial_compress ─────────────────────────────── + + +def test_split_keeps_last_n_exchanges(): + h = _history(5) # 10 messages: u0 a0 u1 a1 u2 a2 u3 a3 u4 a4 + head, tail = split_history_for_partial_compress(h, keep_last=2) + # Keep last 2 user-starts → tail begins at u3 (index 6). + assert tail == h[6:] + assert head == h[:6] + # Tail must begin on a user turn (role-alternation safety). + assert tail[0]["role"] == "user" + + +def test_split_default_keep(): + h = _history(4) # 8 messages + head, tail = split_history_for_partial_compress(h, keep_last=DEFAULT_KEEP_LAST) + assert tail[0]["role"] == "user" + assert head + tail == h + assert len(head) > 0 + + +def test_split_tail_always_starts_on_user(): + # Tool messages interleaved — tail must still snap to a user turn. + h = [ + {"role": "user", "content": "u0"}, + {"role": "assistant", "content": "a0"}, + {"role": "user", "content": "u1"}, + {"role": "assistant", "content": "a1"}, + {"role": "tool", "content": "t1"}, + {"role": "assistant", "content": "a1b"}, + {"role": "user", "content": "u2"}, + {"role": "assistant", "content": "a2"}, + ] + head, tail = split_history_for_partial_compress(h, keep_last=1) + assert tail[0]["role"] == "user" + assert tail[0]["content"] == "u2" + assert head + tail == h + + +def test_split_degenerate_returns_no_tail(): + # keep_last larger than the number of exchanges → nothing to compress. + h = _history(2) # 4 messages, 2 user turns + head, tail = split_history_for_partial_compress(h, keep_last=5) + # Boundary lands at the first user turn → head empty → signal full. + assert tail == [] + assert head == h + + +def test_split_empty_history(): + head, tail = split_history_for_partial_compress([], keep_last=2) + assert head == [] + assert tail == [] + + +def test_split_rejoin_preserves_all_messages(): + h = _history(6) + head, tail = split_history_for_partial_compress(h, keep_last=3) + assert head + tail == h + + +# ── rejoin_compressed_head_and_tail (seam-alternation guard) ───────── + + +def _roles(msgs): + return [m["role"] for m in msgs if m["role"] in ("user", "assistant")] + + +def _no_consecutive_dupes(msgs): + r = _roles(msgs) + return all(r[i] != r[i + 1] for i in range(len(r) - 1)) + + +def test_rejoin_valid_seam_assistant_then_user(): + # Normal case: head ends on assistant, tail starts on user → valid. + head = [{"role": "user", "content": "[summary]"}, + {"role": "assistant", "content": "ack"}] + tail = [{"role": "user", "content": "next"}, + {"role": "assistant", "content": "reply"}] + out = rejoin_compressed_head_and_tail(head, tail) + assert out == head + tail + assert _no_consecutive_dupes(out) + + +def test_rejoin_user_user_seam_merges(): + # Degenerate head ending on a user summary; tail starts on user. + head = [{"role": "user", "content": "[summary of head]"}] + tail = [{"role": "user", "content": "latest question"}, + {"role": "assistant", "content": "answer"}] + out = rejoin_compressed_head_and_tail(head, tail) + assert _no_consecutive_dupes(out), out + # The two user messages were merged into one. + assert out[0]["content"] == "[summary of head]\n\nlatest question" + assert out[1] == {"role": "assistant", "content": "answer"} + + +def test_rejoin_assistant_assistant_seam_merges(): + head = [{"role": "user", "content": "q"}, + {"role": "assistant", "content": "head end"}] + tail = [{"role": "assistant", "content": "tail start"}, + {"role": "user", "content": "u"}] + out = rejoin_compressed_head_and_tail(head, tail) + assert _no_consecutive_dupes(out), out + assert out[-2]["content"] == "head end\n\ntail start" + + +def test_rejoin_empty_tail_returns_head(): + head = [{"role": "user", "content": "x"}] + assert rejoin_compressed_head_and_tail(head, []) == head + + +def test_rejoin_tool_seam_left_alone(): + # tool->tool is the one legal repetition; don't merge. + head = [{"role": "user", "content": "q"}, {"role": "tool", "content": "t1"}] + tail = [{"role": "user", "content": "u"}] + out = rejoin_compressed_head_and_tail(head, tail) + assert out == head + tail diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md index 776d53089..3c8d7bbc1 100644 --- a/website/docs/reference/slash-commands.md +++ b/website/docs/reference/slash-commands.md @@ -43,7 +43,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in | `/retry` | Retry the last message (resend to agent) | | `/undo` | Remove the last user/assistant exchange | | `/title` | Set a title for the current session (usage: /title My Session Name) | -| `/compress [focus topic]` | Manually compress conversation context (flush memories + summarize). Optional focus topic narrows what the summary preserves. | +| `/compress [here [N] \| focus topic]` | Manually compress conversation context (flush memories + summarize). `/compress here [N]` summarizes everything except the most recent N exchanges (default 2), kept verbatim — pick your own compression boundary. A focus topic narrows what a full summary preserves. | | `/rollback` | List or restore filesystem checkpoints (usage: /rollback [number]) | | `/snapshot [create\|restore \|prune]` (alias: `/snap`) | Create or restore state snapshots of Hermes config/state. `create [label]` saves a snapshot, `restore ` reverts to it, `prune [N]` removes old snapshots, or list all with no args. | | `/stop` | Kill all running background processes | @@ -206,7 +206,7 @@ The messaging gateway supports the following built-in commands inside Telegram, | `/retry` | Retry the last message. | | `/undo` | Remove the last exchange. | | `/sethome` (alias: `/set-home`) | Mark the current chat as the platform home channel for deliveries. | -| `/compress [focus topic]` | Manually compress conversation context. Optional focus topic narrows what the summary preserves. | +| `/compress [here [N] \| focus topic]` | Manually compress conversation context. `/compress here [N]` keeps the most recent N exchanges (default 2) verbatim and summarizes the rest. A focus topic narrows what a full summary preserves. | | `/topic [off\|help\|session-id]` | **Telegram DM only.** Manage user-managed multi-session topic mode. `/topic` enables it or shows status; `/topic off` disables it and clears bindings; `/topic help` shows usage; `/topic ` inside a topic restores a previous session. See [Multi-session DM mode](/user-guide/messaging/telegram#multi-session-dm-mode-topic). | | `/title [name]` | Set or show the session title. | | `/resume [name]` | Resume a previously named session. |