fix(compressor): strip stale handoff prefix on resume; reconcile #26290+#32787 (#35344)

A handoff persisted under an older SUMMARY_PREFIX can be inherited into a
resumed lineage. _strip_summary_prefix only matched the current/legacy
literal, so on re-compaction the old 'resume exactly from Active Task'
directive stayed embedded in the body and kept hijacking replies to new,
unrelated user messages.

- Add _HISTORICAL_SUMMARY_PREFIXES (pre-#35344 prefix) and strip/recognize
  them in _strip_summary_prefix + _is_context_summary_content so resumed
  stale handoffs are re-normalized to the current latest-message-wins prefix.
- Reconcile the overlapping Active Task template edits from the salvaged
  #26290 (reverse-signal cancellation) and #32787 (capture open questions /
  decisions, don't write None too eagerly) — both intents kept.
- Regression coverage in tests/agent/test_resume_stale_active_task.py.
- AUTHOR_MAP entries for both salvaged contributors.
This commit is contained in:
Teknium
2026-05-30 07:16:48 -07:00
parent 56b8dccf25
commit 42bbd221e8
3 changed files with 175 additions and 3 deletions

View File

@ -61,6 +61,26 @@ SUMMARY_PREFIX = (
) )
LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:" LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:"
# Handoff prefixes that shipped in earlier releases. A summary persisted under
# one of these can be inherited into a resumed lineage (#35344); when it is
# re-normalized on re-compaction we must strip the OLD prefix too, otherwise the
# stale directive it carried (e.g. "resume exactly from Active Task") survives
# embedded in the body and keeps hijacking replies. Keep newest-first; entries
# are matched literally. Add a frozen copy here whenever SUMMARY_PREFIX changes.
_HISTORICAL_SUMMARY_PREFIXES = (
# Pre-#35344: contained the self-contradicting "resume exactly" directive.
"[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted "
"into the summary below. This is a handoff from a previous context "
"window — treat it as background reference, NOT as active instructions. "
"Do NOT answer questions or fulfill requests mentioned in this summary; "
"they were already addressed. "
"Your current task is identified in the '## Active Task' section of the "
"summary — resume exactly from there. "
"Respond ONLY to the latest user message "
"that appears AFTER this summary. The current session state (files, "
"config, etc.) may reflect work described here — avoid repeating it:",
)
# Minimum tokens for the summary output # Minimum tokens for the summary output
_MIN_SUMMARY_TOKENS = 2000 _MIN_SUMMARY_TOKENS = 2000
# Proportion of compressed content to allocate for summary # Proportion of compressed content to allocate for summary
@ -1496,9 +1516,16 @@ The user has requested that this compaction PRIORITISE preserving all informatio
@staticmethod @staticmethod
def _strip_summary_prefix(summary: str) -> str: def _strip_summary_prefix(summary: str) -> str:
"""Return summary body without the current or legacy handoff prefix.""" """Return summary body without the current, legacy, or any historical
handoff prefix.
Historical prefixes must be stripped too: a handoff persisted under an
older prefix can be inherited into a resumed lineage (#35344), and if we
only re-prepend the current prefix without removing the old one, the
stale directive it carried stays embedded in the body.
"""
text = (summary or "").strip() text = (summary or "").strip()
for prefix in (SUMMARY_PREFIX, LEGACY_SUMMARY_PREFIX): for prefix in (SUMMARY_PREFIX, LEGACY_SUMMARY_PREFIX, *_HISTORICAL_SUMMARY_PREFIXES):
if text.startswith(prefix): if text.startswith(prefix):
return text[len(prefix):].lstrip() return text[len(prefix):].lstrip()
return text return text
@ -1512,7 +1539,9 @@ The user has requested that this compaction PRIORITISE preserving all informatio
@staticmethod @staticmethod
def _is_context_summary_content(content: Any) -> bool: def _is_context_summary_content(content: Any) -> bool:
text = _content_text_for_contains(content).lstrip() text = _content_text_for_contains(content).lstrip()
return text.startswith(SUMMARY_PREFIX) or text.startswith(LEGACY_SUMMARY_PREFIX) if text.startswith(SUMMARY_PREFIX) or text.startswith(LEGACY_SUMMARY_PREFIX):
return True
return any(text.startswith(p) for p in _HISTORICAL_SUMMARY_PREFIXES)
@classmethod @classmethod
def _find_latest_context_summary( def _find_latest_context_summary(

View File

@ -45,6 +45,8 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
# Auto-extracted from noreply emails + manual overrides # Auto-extracted from noreply emails + manual overrides
AUTHOR_MAP = { AUTHOR_MAP = {
"zhipengli@thebrainly.ai": "a1245582339",
"mathijs.vd.hurk@gmail.com": "mathijsvandenhurk",
"drpelagik@gmail.com": "SeaXen", "drpelagik@gmail.com": "SeaXen",
"lengr@users.noreply.github.com": "LengR", "lengr@users.noreply.github.com": "LengR",
"metalclaudbot@gmail.com": "HashClawAI", "metalclaudbot@gmail.com": "HashClawAI",

View File

@ -0,0 +1,141 @@
"""Regression coverage for #35344: a resumed session must not let a stale
``## Active Task`` from an inherited compaction handoff hijack the reply to a
new, unrelated user message.
The failure mode (real report): a lineage was compacted, producing a handoff
whose ``## Active Task`` described task A. The lineage was resumed later and
the user asked about an unrelated task B. The model answered with A because
the handoff's resume directive outranked the fresh ask.
The structural fix lives in ``SUMMARY_PREFIX``: the handoff is framed as
reference-only and the latest user message explicitly *wins* on conflict, with
named reverse-signal verbs. Two invariants guard the resume path specifically:
1. A handoff persisted under the OLD (conflicting) prefix is re-normalized to
the CURRENT prefix when it is re-compacted on a resumed lineage — so a
pre-fix stale handoff cannot keep its "resume exactly" directive forever.
2. The current handoff prefix contains an unambiguous "latest message wins /
discard stale Active Task" rule, so an unrelated new ask is privileged over
the inherited ``## Active Task``.
These are content/structural assertions (no live model call) — they pin the
mechanism that makes the stale task historical rather than active.
"""
from agent.context_compressor import (
SUMMARY_PREFIX,
LEGACY_SUMMARY_PREFIX,
ContextCompressor,
)
# The conflicting prefix that shipped before the #35344 fix. A handoff
# persisted in a resumed lineage could carry this verbatim.
_OLD_CONFLICTING_PREFIX = (
"[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted "
"into the summary below. This is a handoff from a previous context "
"window — treat it as background reference, NOT as active instructions. "
"Do NOT answer questions or fulfill requests mentioned in this summary; "
"they were already addressed. "
"Your current task is identified in the '## Active Task' section of the "
"summary — resume exactly from there. "
"Respond ONLY to the latest user message "
"that appears AFTER this summary. The current session state (files, "
"config, etc.) may reflect work described here — avoid repeating it:"
)
def test_latest_message_wins_over_inherited_active_task():
"""The handoff must explicitly privilege the latest user message over a
stale ``## Active Task`` — the core #35344 contract."""
lower = SUMMARY_PREFIX.lower()
assert "latest user message" in lower
assert "## active task" in lower
# Conflict-resolution must be explicit, not implied.
assert "wins" in lower or "supersede" in lower
assert "discard" in lower
def test_no_resume_exactly_directive_can_hijack():
"""The directive that caused the hijack ("resume exactly from Active
Task") must be gone."""
assert "resume exactly" not in SUMMARY_PREFIX.lower()
def test_resumed_stale_handoff_gets_renormalized_to_current_prefix():
"""A handoff persisted under the OLD conflicting prefix (e.g. saved before
the fix and inherited into a resumed lineage) is upgraded to the CURRENT
prefix when re-normalized on re-compaction — so the "resume exactly"
directive cannot survive into a resumed session."""
stale_body = (
"## Active Task\n"
"User asked: 'Migrate the billing module to Stripe'\n\n"
"## Goal\nMigrate billing.\n"
)
stale_handoff = f"{_OLD_CONFLICTING_PREFIX}\n{stale_body}"
# Sanity: the fixture really does carry the old directive.
assert "resume exactly" in stale_handoff.lower()
renormalized = ContextCompressor._with_summary_prefix(stale_handoff)
# The body is preserved...
assert "Migrate the billing module to Stripe" in renormalized
# ...but the conflicting directive is stripped and replaced with the
# current latest-message-wins framing.
assert "resume exactly" not in renormalized.lower()
assert renormalized.startswith(SUMMARY_PREFIX)
assert "wins" in renormalized.lower()
def test_legacy_prefix_handoff_also_renormalized():
"""The same upgrade applies to the oldest ``[CONTEXT SUMMARY]:`` handoff
format that may sit in a long-lived resumed lineage."""
legacy = f"{LEGACY_SUMMARY_PREFIX} ## Active Task\nUser asked: 'task A'"
renormalized = ContextCompressor._with_summary_prefix(legacy)
assert renormalized.startswith(SUMMARY_PREFIX)
assert LEGACY_SUMMARY_PREFIX not in renormalized
assert "task A" in renormalized
def test_inherited_handoff_detected_in_resumed_protected_head():
"""On a resumed lineage the handoff commonly sits right after the system
prompt (in the protected head). ``_find_latest_context_summary`` must
detect it there so re-compaction rehydrates state from it rather than
serializing it as a fresh user turn (which is what let the stale Active
Task read as live intent)."""
messages = [
{"role": "system", "content": "system prompt"},
{"role": "user", "content": f"{SUMMARY_PREFIX}\n## Active Task\nUser asked: 'task A'"},
{"role": "assistant", "content": "ok"},
{"role": "user", "content": "Unrelated task B: what's the capital of France?"},
]
# Search the whole post-system range.
idx, body = ContextCompressor._find_latest_context_summary(
messages, 1, len(messages)
)
assert idx == 1, "handoff in protected head must be found"
assert "task A" in body
# The detected body is stripped of the prefix (treated as state, not a
# standalone instruction message).
assert not body.startswith(SUMMARY_PREFIX)
def test_historical_prefixed_handoff_detected_and_stripped():
"""A pre-fix handoff (old conflicting prefix) inherited into a resumed
lineage must still be recognized as a context summary AND have its old
directive stripped on detection — otherwise re-compaction serializes the
stale 'resume exactly' text as a fresh turn."""
messages = [
{"role": "system", "content": "system prompt"},
{"role": "user", "content": f"{_OLD_CONFLICTING_PREFIX}\n## Active Task\nUser asked: 'task A'"},
{"role": "assistant", "content": "ok"},
{"role": "user", "content": "Unrelated task B"},
]
idx, body = ContextCompressor._find_latest_context_summary(
messages, 1, len(messages)
)
assert idx == 1
assert "task A" in body
assert "resume exactly" not in body.lower()