fix(compression): notify gateway users when summary generation fails
When auxiliary compression's summary LLM call fails (e.g. model 404,
auxiliary model misconfigured), the compressor still drops the selected
turns and inserts a static fallback placeholder — the dropped context
is unrecoverable.
Previously the only signal of this was a WARNING in agent.log. Gateway
users (Telegram/Discord/etc.) had no way to know context was lost
because the existing _emit_warning path requires a status_callback,
and the gateway hygiene path uses a temporary _hyg_agent with
quiet_mode=True and no callback wired up.
Changes:
- ContextCompressor: track _last_summary_fallback_used and
_last_summary_dropped_count on each compress() call. Cleared at the
start of compress() and on session reset.
- gateway/run.py hygiene: after auto-compress, inspect the temp
agent's compressor; if fallback was used, send a visible ⚠️ warning
to the user via the platform adapter (TG/Discord/etc.) including
dropped count and the underlying error.
- gateway/run.py /compress: append the same warning to the manual
compress reply so users running /compress see the failure too.
Acceptance:
- Summary success: no user-visible warning (unchanged).
- Summary failure on gateway hygiene: user receives a TG/Discord
message with dropped count + error + remediation hint.
- Summary failure on /compress: warning appended to the command reply.
- CLI status_callback / _emit_warning path is untouched.
- Test coverage: two new tests verify the tracking fields are set on
failure and cleared on subsequent success.
This commit is contained in:
@ -338,6 +338,8 @@ class ContextCompressor(ContextEngine):
|
||||
self._context_probe_persistable = False
|
||||
self._previous_summary = None
|
||||
self._last_summary_error = None
|
||||
self._last_summary_dropped_count = 0
|
||||
self._last_summary_fallback_used = False
|
||||
self._last_compression_savings_pct = 100.0
|
||||
self._ineffective_compression_count = 0
|
||||
|
||||
@ -441,6 +443,11 @@ class ContextCompressor(ContextEngine):
|
||||
self._ineffective_compression_count: int = 0
|
||||
self._summary_failure_cooldown_until: float = 0.0
|
||||
self._last_summary_error: Optional[str] = None
|
||||
# When summary generation fails and a static fallback is inserted,
|
||||
# record how many turns were unrecoverably dropped so callers
|
||||
# (gateway hygiene, /compress) can surface a visible warning.
|
||||
self._last_summary_dropped_count: int = 0
|
||||
self._last_summary_fallback_used: bool = False
|
||||
|
||||
def update_from_response(self, usage: Dict[str, Any]):
|
||||
"""Update tracked token usage from API response."""
|
||||
@ -1196,6 +1203,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
related to this topic and be more aggressive about compressing
|
||||
everything else. Inspired by Claude Code's ``/compact``.
|
||||
"""
|
||||
# Reset per-call summary failure state — callers inspect these fields
|
||||
# after compress() returns to decide whether to surface a warning.
|
||||
self._last_summary_dropped_count = 0
|
||||
self._last_summary_fallback_used = False
|
||||
n_messages = len(messages)
|
||||
# Only need head + 3 tail messages minimum (token budget decides the real tail size)
|
||||
_min_for_compress = self.protect_first_n + 3 + 1
|
||||
@ -1274,6 +1285,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
if not self.quiet_mode:
|
||||
logger.warning("Summary generation failed — inserting static fallback context marker")
|
||||
n_dropped = compress_end - compress_start
|
||||
self._last_summary_dropped_count = n_dropped
|
||||
self._last_summary_fallback_used = True
|
||||
summary = (
|
||||
f"{SUMMARY_PREFIX}\n"
|
||||
f"Summary generation was unavailable. {n_dropped} conversation turns were "
|
||||
|
||||
@ -4800,6 +4800,34 @@ class GatewayRunner:
|
||||
"compression",
|
||||
f"{_new_tokens:,}",
|
||||
)
|
||||
|
||||
# If summary generation failed, the
|
||||
# compressor inserted a static fallback
|
||||
# placeholder and the dropped turns are
|
||||
# gone for good. Surface a visible
|
||||
# warning to the gateway user — agent.log
|
||||
# alone is invisible on TG/Discord/etc.
|
||||
_comp = getattr(_hyg_agent, "context_compressor", None)
|
||||
if _comp is not None and getattr(_comp, "_last_summary_fallback_used", False):
|
||||
_dropped = getattr(_comp, "_last_summary_dropped_count", 0)
|
||||
_err = getattr(_comp, "_last_summary_error", None) or "unknown error"
|
||||
_warn_msg = (
|
||||
"⚠️ Context compression summary failed "
|
||||
f"({_err}). {_dropped} historical message(s) "
|
||||
"were removed and replaced with a placeholder. "
|
||||
"Earlier context is no longer recoverable. "
|
||||
"Consider /reset for a clean session, or check "
|
||||
"your auxiliary.compression model configuration."
|
||||
)
|
||||
try:
|
||||
_adapter = self.adapters.get(source.platform)
|
||||
if _adapter and source.chat_id:
|
||||
await _adapter.send(source.chat_id, _warn_msg)
|
||||
except Exception as _werr:
|
||||
logger.warning(
|
||||
"Failed to deliver compression-failure warning to user: %s",
|
||||
_werr,
|
||||
)
|
||||
finally:
|
||||
self._cleanup_agent_resources(_hyg_agent)
|
||||
|
||||
@ -7343,6 +7371,12 @@ class GatewayRunner:
|
||||
approx_tokens,
|
||||
new_tokens,
|
||||
)
|
||||
# Detect summary-generation failure so we can surface a
|
||||
# visible warning to the user even on the manual /compress
|
||||
# path (otherwise the failure is silently logged).
|
||||
_summary_failed = bool(getattr(compressor, "_last_summary_fallback_used", False))
|
||||
_dropped_count = int(getattr(compressor, "_last_summary_dropped_count", 0) or 0)
|
||||
_summary_err = getattr(compressor, "_last_summary_error", None)
|
||||
finally:
|
||||
self._cleanup_agent_resources(tmp_agent)
|
||||
lines = [f"🗜️ {summary['headline']}"]
|
||||
@ -7351,6 +7385,13 @@ class GatewayRunner:
|
||||
lines.append(summary["token_line"])
|
||||
if summary["note"]:
|
||||
lines.append(summary["note"])
|
||||
if _summary_failed:
|
||||
lines.append(
|
||||
f"⚠️ Summary generation failed ({_summary_err or 'unknown error'}). "
|
||||
f"{_dropped_count} historical message(s) were removed and replaced "
|
||||
"with a placeholder; earlier context is no longer recoverable. "
|
||||
"Consider checking your auxiliary.compression model configuration."
|
||||
)
|
||||
return "\n".join(lines)
|
||||
except Exception as e:
|
||||
logger.warning("Manual compress failed: %s", e)
|
||||
|
||||
@ -242,6 +242,72 @@ class TestSummaryFailureCooldown:
|
||||
assert mock_call.call_count == 1
|
||||
|
||||
|
||||
class TestSummaryFailureTrackingForGatewayWarning:
|
||||
"""When summary generation fails, the compressor must record dropped count
|
||||
+ fallback flag so gateway hygiene & /compress can surface a visible
|
||||
warning instead of silently dropping context."""
|
||||
|
||||
def test_compress_records_fallback_and_dropped_count_on_summary_failure(self):
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||
|
||||
msgs = [
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "user", "content": "msg 1"},
|
||||
{"role": "assistant", "content": "msg 2"},
|
||||
{"role": "user", "content": "msg 3"},
|
||||
{"role": "assistant", "content": "msg 4"},
|
||||
{"role": "user", "content": "msg 5"},
|
||||
{"role": "assistant", "content": "msg 6"},
|
||||
{"role": "user", "content": "msg 7"},
|
||||
]
|
||||
|
||||
# Simulate summary LLM call failing — covers the 404 / model-not-found
|
||||
# case from issue (auxiliary compression model misconfigured).
|
||||
with patch("agent.context_compressor.call_llm", side_effect=Exception("404 model not found")):
|
||||
result = c.compress(msgs)
|
||||
|
||||
assert c._last_summary_fallback_used is True
|
||||
assert c._last_summary_dropped_count > 0
|
||||
assert c._last_summary_error is not None
|
||||
# Result must still be well-formed (fallback summary present).
|
||||
assert any(
|
||||
isinstance(m.get("content"), str) and "Summary generation was unavailable" in m["content"]
|
||||
for m in result
|
||||
)
|
||||
|
||||
def test_compress_clears_fallback_flag_on_subsequent_success(self):
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = "summary text"
|
||||
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||
|
||||
msgs = [
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "user", "content": "msg 1"},
|
||||
{"role": "assistant", "content": "msg 2"},
|
||||
{"role": "user", "content": "msg 3"},
|
||||
{"role": "assistant", "content": "msg 4"},
|
||||
{"role": "user", "content": "msg 5"},
|
||||
{"role": "assistant", "content": "msg 6"},
|
||||
{"role": "user", "content": "msg 7"},
|
||||
]
|
||||
|
||||
# First call fails, second succeeds — flag must reset on second compress.
|
||||
with patch("agent.context_compressor.call_llm", side_effect=Exception("boom")):
|
||||
c.compress(msgs)
|
||||
assert c._last_summary_fallback_used is True
|
||||
|
||||
# Reset cooldown to allow retry on second compress
|
||||
c._summary_failure_cooldown_until = 0.0
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
c.compress(msgs)
|
||||
assert c._last_summary_fallback_used is False
|
||||
assert c._last_summary_dropped_count == 0
|
||||
|
||||
|
||||
class TestSummaryPrefixNormalization:
|
||||
def test_legacy_prefix_is_replaced(self):
|
||||
summary = ContextCompressor._with_summary_prefix("[CONTEXT SUMMARY]: did work")
|
||||
|
||||
Reference in New Issue
Block a user