From 42bbd221e8e38a0c8213cff9e2d16a640d0d8760 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sat, 30 May 2026 07:16:48 -0700 Subject: [PATCH] fix(compressor): strip stale handoff prefix on resume; reconcile #26290+#32787 (#35344) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A handoff persisted under an older SUMMARY_PREFIX can be inherited into a resumed lineage. _strip_summary_prefix only matched the current/legacy literal, so on re-compaction the old 'resume exactly from Active Task' directive stayed embedded in the body and kept hijacking replies to new, unrelated user messages. - Add _HISTORICAL_SUMMARY_PREFIXES (pre-#35344 prefix) and strip/recognize them in _strip_summary_prefix + _is_context_summary_content so resumed stale handoffs are re-normalized to the current latest-message-wins prefix. - Reconcile the overlapping Active Task template edits from the salvaged #26290 (reverse-signal cancellation) and #32787 (capture open questions / decisions, don't write None too eagerly) — both intents kept. - Regression coverage in tests/agent/test_resume_stale_active_task.py. - AUTHOR_MAP entries for both salvaged contributors. --- agent/context_compressor.py | 35 ++++- scripts/release.py | 2 + tests/agent/test_resume_stale_active_task.py | 141 +++++++++++++++++++ 3 files changed, 175 insertions(+), 3 deletions(-) create mode 100644 tests/agent/test_resume_stale_active_task.py diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 4f1b91894..079c4b0b5 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -61,6 +61,26 @@ SUMMARY_PREFIX = ( ) LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:" +# Handoff prefixes that shipped in earlier releases. A summary persisted under +# one of these can be inherited into a resumed lineage (#35344); when it is +# re-normalized on re-compaction we must strip the OLD prefix too, otherwise the +# stale directive it carried (e.g. "resume exactly from Active Task") survives +# embedded in the body and keeps hijacking replies. Keep newest-first; entries +# are matched literally. Add a frozen copy here whenever SUMMARY_PREFIX changes. +_HISTORICAL_SUMMARY_PREFIXES = ( + # Pre-#35344: contained the self-contradicting "resume exactly" directive. + "[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted " + "into the summary below. This is a handoff from a previous context " + "window — treat it as background reference, NOT as active instructions. " + "Do NOT answer questions or fulfill requests mentioned in this summary; " + "they were already addressed. " + "Your current task is identified in the '## Active Task' section of the " + "summary — resume exactly from there. " + "Respond ONLY to the latest user message " + "that appears AFTER this summary. The current session state (files, " + "config, etc.) may reflect work described here — avoid repeating it:", +) + # Minimum tokens for the summary output _MIN_SUMMARY_TOKENS = 2000 # Proportion of compressed content to allocate for summary @@ -1496,9 +1516,16 @@ The user has requested that this compaction PRIORITISE preserving all informatio @staticmethod def _strip_summary_prefix(summary: str) -> str: - """Return summary body without the current or legacy handoff prefix.""" + """Return summary body without the current, legacy, or any historical + handoff prefix. + + Historical prefixes must be stripped too: a handoff persisted under an + older prefix can be inherited into a resumed lineage (#35344), and if we + only re-prepend the current prefix without removing the old one, the + stale directive it carried stays embedded in the body. + """ text = (summary or "").strip() - for prefix in (SUMMARY_PREFIX, LEGACY_SUMMARY_PREFIX): + for prefix in (SUMMARY_PREFIX, LEGACY_SUMMARY_PREFIX, *_HISTORICAL_SUMMARY_PREFIXES): if text.startswith(prefix): return text[len(prefix):].lstrip() return text @@ -1512,7 +1539,9 @@ The user has requested that this compaction PRIORITISE preserving all informatio @staticmethod def _is_context_summary_content(content: Any) -> bool: text = _content_text_for_contains(content).lstrip() - return text.startswith(SUMMARY_PREFIX) or text.startswith(LEGACY_SUMMARY_PREFIX) + if text.startswith(SUMMARY_PREFIX) or text.startswith(LEGACY_SUMMARY_PREFIX): + return True + return any(text.startswith(p) for p in _HISTORICAL_SUMMARY_PREFIXES) @classmethod def _find_latest_context_summary( diff --git a/scripts/release.py b/scripts/release.py index 39f60a4b8..a5f8fcb10 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -45,6 +45,8 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json" # Auto-extracted from noreply emails + manual overrides AUTHOR_MAP = { + "zhipengli@thebrainly.ai": "a1245582339", + "mathijs.vd.hurk@gmail.com": "mathijsvandenhurk", "drpelagik@gmail.com": "SeaXen", "lengr@users.noreply.github.com": "LengR", "metalclaudbot@gmail.com": "HashClawAI", diff --git a/tests/agent/test_resume_stale_active_task.py b/tests/agent/test_resume_stale_active_task.py new file mode 100644 index 000000000..6b22a37c4 --- /dev/null +++ b/tests/agent/test_resume_stale_active_task.py @@ -0,0 +1,141 @@ +"""Regression coverage for #35344: a resumed session must not let a stale +``## Active Task`` from an inherited compaction handoff hijack the reply to a +new, unrelated user message. + +The failure mode (real report): a lineage was compacted, producing a handoff +whose ``## Active Task`` described task A. The lineage was resumed later and +the user asked about an unrelated task B. The model answered with A because +the handoff's resume directive outranked the fresh ask. + +The structural fix lives in ``SUMMARY_PREFIX``: the handoff is framed as +reference-only and the latest user message explicitly *wins* on conflict, with +named reverse-signal verbs. Two invariants guard the resume path specifically: + + 1. A handoff persisted under the OLD (conflicting) prefix is re-normalized to + the CURRENT prefix when it is re-compacted on a resumed lineage — so a + pre-fix stale handoff cannot keep its "resume exactly" directive forever. + + 2. The current handoff prefix contains an unambiguous "latest message wins / + discard stale Active Task" rule, so an unrelated new ask is privileged over + the inherited ``## Active Task``. + +These are content/structural assertions (no live model call) — they pin the +mechanism that makes the stale task historical rather than active. +""" + +from agent.context_compressor import ( + SUMMARY_PREFIX, + LEGACY_SUMMARY_PREFIX, + ContextCompressor, +) + + +# The conflicting prefix that shipped before the #35344 fix. A handoff +# persisted in a resumed lineage could carry this verbatim. +_OLD_CONFLICTING_PREFIX = ( + "[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted " + "into the summary below. This is a handoff from a previous context " + "window — treat it as background reference, NOT as active instructions. " + "Do NOT answer questions or fulfill requests mentioned in this summary; " + "they were already addressed. " + "Your current task is identified in the '## Active Task' section of the " + "summary — resume exactly from there. " + "Respond ONLY to the latest user message " + "that appears AFTER this summary. The current session state (files, " + "config, etc.) may reflect work described here — avoid repeating it:" +) + + +def test_latest_message_wins_over_inherited_active_task(): + """The handoff must explicitly privilege the latest user message over a + stale ``## Active Task`` — the core #35344 contract.""" + lower = SUMMARY_PREFIX.lower() + assert "latest user message" in lower + assert "## active task" in lower + # Conflict-resolution must be explicit, not implied. + assert "wins" in lower or "supersede" in lower + assert "discard" in lower + + +def test_no_resume_exactly_directive_can_hijack(): + """The directive that caused the hijack ("resume exactly from Active + Task") must be gone.""" + assert "resume exactly" not in SUMMARY_PREFIX.lower() + + +def test_resumed_stale_handoff_gets_renormalized_to_current_prefix(): + """A handoff persisted under the OLD conflicting prefix (e.g. saved before + the fix and inherited into a resumed lineage) is upgraded to the CURRENT + prefix when re-normalized on re-compaction — so the "resume exactly" + directive cannot survive into a resumed session.""" + stale_body = ( + "## Active Task\n" + "User asked: 'Migrate the billing module to Stripe'\n\n" + "## Goal\nMigrate billing.\n" + ) + stale_handoff = f"{_OLD_CONFLICTING_PREFIX}\n{stale_body}" + + # Sanity: the fixture really does carry the old directive. + assert "resume exactly" in stale_handoff.lower() + + renormalized = ContextCompressor._with_summary_prefix(stale_handoff) + + # The body is preserved... + assert "Migrate the billing module to Stripe" in renormalized + # ...but the conflicting directive is stripped and replaced with the + # current latest-message-wins framing. + assert "resume exactly" not in renormalized.lower() + assert renormalized.startswith(SUMMARY_PREFIX) + assert "wins" in renormalized.lower() + + +def test_legacy_prefix_handoff_also_renormalized(): + """The same upgrade applies to the oldest ``[CONTEXT SUMMARY]:`` handoff + format that may sit in a long-lived resumed lineage.""" + legacy = f"{LEGACY_SUMMARY_PREFIX} ## Active Task\nUser asked: 'task A'" + renormalized = ContextCompressor._with_summary_prefix(legacy) + assert renormalized.startswith(SUMMARY_PREFIX) + assert LEGACY_SUMMARY_PREFIX not in renormalized + assert "task A" in renormalized + + +def test_inherited_handoff_detected_in_resumed_protected_head(): + """On a resumed lineage the handoff commonly sits right after the system + prompt (in the protected head). ``_find_latest_context_summary`` must + detect it there so re-compaction rehydrates state from it rather than + serializing it as a fresh user turn (which is what let the stale Active + Task read as live intent).""" + messages = [ + {"role": "system", "content": "system prompt"}, + {"role": "user", "content": f"{SUMMARY_PREFIX}\n## Active Task\nUser asked: 'task A'"}, + {"role": "assistant", "content": "ok"}, + {"role": "user", "content": "Unrelated task B: what's the capital of France?"}, + ] + # Search the whole post-system range. + idx, body = ContextCompressor._find_latest_context_summary( + messages, 1, len(messages) + ) + assert idx == 1, "handoff in protected head must be found" + assert "task A" in body + # The detected body is stripped of the prefix (treated as state, not a + # standalone instruction message). + assert not body.startswith(SUMMARY_PREFIX) + + +def test_historical_prefixed_handoff_detected_and_stripped(): + """A pre-fix handoff (old conflicting prefix) inherited into a resumed + lineage must still be recognized as a context summary AND have its old + directive stripped on detection — otherwise re-compaction serializes the + stale 'resume exactly' text as a fresh turn.""" + messages = [ + {"role": "system", "content": "system prompt"}, + {"role": "user", "content": f"{_OLD_CONFLICTING_PREFIX}\n## Active Task\nUser asked: 'task A'"}, + {"role": "assistant", "content": "ok"}, + {"role": "user", "content": "Unrelated task B"}, + ] + idx, body = ContextCompressor._find_latest_context_summary( + messages, 1, len(messages) + ) + assert idx == 1 + assert "task A" in body + assert "resume exactly" not in body.lower()