fix(memory): narrow scrub surface to known wrapper boundaries
Reviewer pushback on the original boundary-hardening commits — three overreach points pulled plugin-specific policy into shared core paths: 1. gateway/run.py hardcoded a '## Honcho Context' literal split for vision-LLM output. Plugin-format heading in framework code; could truncate legitimate output naturally containing that header. Drop the literal split; keep generic sanitize_context (the wrapper strip is plugin-agnostic). Plugin-specific cleanup belongs at the provider boundary, not the shared gateway path. 2. run_agent.run_conversation scrubbed user_message and persist_user_message before the conversation loop. User text is sacred — if a user types a literal <memory-context> tag we must not silently delete it. The producer (build_memory_context_block) is the only legitimate emitter; user input should never need the reverse op. 3. _build_assistant_message scrubbed model output before persistence. Same hazard: would silently mutate legitimate documentation/code the model emits containing the literal markers. The streaming scrubber catches real leaks delta-by-delta before content is concatenated; persist-time scrub was redundant belt-and-suspenders. 4. _fire_stream_delta stripped leading newlines from every delta unless a paragraph break flag was set. Mid-stream '\n' is legitimate markdown — lists, code fences, paragraph breaks — and chunk boundaries are arbitrary. Narrow lstrip to the very first delta of the stream only (so stale provider preamble still gets cleaned on turn start, but mid-stream formatting survives). Plus: build_memory_context_block now logs a warning when its defensive sanitize_context strips something — surfaces buggy providers returning pre-wrapped text instead of silently double-fencing. Net architectural change: scrub surface collapses from 8 sites to 3 (StreamingContextScrubber on output deltas, plugin→backend send, build_memory_context_block input-validation). Plugin-specific strings stay out of shared runtime paths. User input and persisted assistant output are no longer mutated. Tests: rescoped TestMemoryContextSanitization (helper-correctness only, no source-inspection of removed call sites), updated vision tests to drop '## Honcho Context' literal-split assertions, updated _build_assistant_message persistence test to assert preservation. Added: cross-turn scrubber reset, build_memory_context_block warn-on- violation, mid-stream newline preservation (plain + code fence).
This commit is contained in:
@ -179,10 +179,20 @@ def build_memory_context_block(raw_context: str) -> str:
|
||||
|
||||
The fence prevents the model from treating recalled context as user
|
||||
discourse. Injected at API-call time only — never persisted.
|
||||
|
||||
A provider returning text that already contains the wrapper is a
|
||||
contract violation (would produce nested fences). We strip defensively
|
||||
and warn so the buggy provider surfaces in logs instead of silently
|
||||
double-fencing.
|
||||
"""
|
||||
if not raw_context or not raw_context.strip():
|
||||
return ""
|
||||
clean = sanitize_context(raw_context)
|
||||
if clean != raw_context:
|
||||
logger.warning(
|
||||
"memory provider returned text containing <memory-context> wrapper; "
|
||||
"stripped before re-fencing (provider contract violation)"
|
||||
)
|
||||
return (
|
||||
"<memory-context>\n"
|
||||
"[System note: The following is recalled memory context, "
|
||||
|
||||
@ -8502,14 +8502,11 @@ class GatewayRunner:
|
||||
result = json.loads(result_json)
|
||||
if result.get("success"):
|
||||
description = result.get("analysis", "")
|
||||
# The auxiliary vision LLM can echo injected system-prompt
|
||||
# memory context back into its output (#5719). Scrub any
|
||||
# <memory-context> fences and the "## Honcho Context"
|
||||
# section before the description lands in a user-visible
|
||||
# message.
|
||||
# Vision auxiliary LLM can echo the injected system-prompt
|
||||
# memory-context wrapper back into its output (#5719).
|
||||
# sanitize_context strips the fenced wrapper; plugin-specific
|
||||
# header cleanup belongs at the provider boundary, not here.
|
||||
description = sanitize_context(description)
|
||||
if "## Honcho Context" in description:
|
||||
description = description.split("## Honcho Context", 1)[0].rstrip()
|
||||
enriched_parts.append(
|
||||
f"[The user sent an image~ Here's what I can see:\n{description}]\n"
|
||||
f"[If you need a closer look, use vision_analyze with "
|
||||
|
||||
20
run_agent.py
20
run_agent.py
@ -6102,7 +6102,13 @@ class AIAgent:
|
||||
else:
|
||||
# Defensive: legacy callers without the scrubber attribute.
|
||||
text = sanitize_context(text)
|
||||
if not prepended_break:
|
||||
# Strip leading newlines only on the very first delta of the stream,
|
||||
# and only when we didn't just prepend a paragraph break ourselves.
|
||||
# Mid-stream "\n" is legitimate markdown (lists, code, paragraphs)
|
||||
# and must survive — chunk boundaries are arbitrary.
|
||||
if not prepended_break and not getattr(
|
||||
self, "_current_streamed_assistant_text", ""
|
||||
):
|
||||
text = text.lstrip("\n")
|
||||
if not text:
|
||||
return
|
||||
@ -8077,7 +8083,7 @@ class AIAgent:
|
||||
# API replay, session transcript, gateway delivery, CLI display,
|
||||
# compression, title generation.
|
||||
if isinstance(_san_content, str) and _san_content:
|
||||
_san_content = sanitize_context(self._strip_think_blocks(_san_content)).strip()
|
||||
_san_content = self._strip_think_blocks(_san_content).strip()
|
||||
|
||||
msg = {
|
||||
"role": "assistant",
|
||||
@ -9629,16 +9635,6 @@ class AIAgent:
|
||||
if isinstance(persist_user_message, str):
|
||||
persist_user_message = _sanitize_surrogates(persist_user_message)
|
||||
|
||||
# Strip leaked <memory-context> blocks from user input. When Honcho's
|
||||
# saveMessages persists a turn that included injected context, the block
|
||||
# can reappear in the next turn's user message via message history.
|
||||
# Stripping here prevents stale memory tags from leaking into the
|
||||
# conversation and being visible to the user or the model as user text.
|
||||
if isinstance(user_message, str):
|
||||
user_message = sanitize_context(user_message)
|
||||
if isinstance(persist_user_message, str):
|
||||
persist_user_message = sanitize_context(persist_user_message)
|
||||
|
||||
# Store stream callback for _interruptible_api_call to pick up
|
||||
self._stream_callback = stream_callback
|
||||
self._persist_user_message_idx = None
|
||||
|
||||
@ -148,3 +148,64 @@ class TestSanitizeContextUnchanged:
|
||||
)
|
||||
out = sanitize_context(leaked).strip()
|
||||
assert out == "Visible"
|
||||
|
||||
|
||||
class TestStreamingContextScrubberCrossTurn:
|
||||
"""A scrubber instance is reused across turns (per agent). reset() must
|
||||
clear any held state so a partial-tag tail from turn N doesn't bleed
|
||||
into turn N+1's first delta."""
|
||||
|
||||
def test_reset_clears_held_partial_tag(self):
|
||||
s = StreamingContextScrubber()
|
||||
# Feed a partial open-tag prefix that gets held back as buffer.
|
||||
out_turn_1 = s.feed("answer<memo")
|
||||
assert out_turn_1 == "answer"
|
||||
|
||||
# Reset for next turn — buffer must clear.
|
||||
s.reset()
|
||||
|
||||
# New turn: plain text starting with a "<m" must NOT be treated as
|
||||
# the continuation of the held "<memo".
|
||||
out_turn_2 = s.feed("<marker>fresh content")
|
||||
assert out_turn_2 == "<marker>fresh content"
|
||||
|
||||
def test_reset_clears_in_span_state(self):
|
||||
s = StreamingContextScrubber()
|
||||
s.feed("text<memory-context>secret-tail")
|
||||
# Mid-span state held — without reset, subsequent text would be
|
||||
# discarded until we see </memory-context>.
|
||||
s.reset()
|
||||
out = s.feed("post-reset visible text")
|
||||
assert out == "post-reset visible text"
|
||||
|
||||
|
||||
class TestBuildMemoryContextBlockWarnsOnViolation:
|
||||
"""Providers must return raw context — not pre-wrapped. When they do,
|
||||
we strip and warn so the buggy provider surfaces."""
|
||||
|
||||
def test_provider_emitting_wrapper_warns(self, caplog):
|
||||
import logging
|
||||
from agent.memory_manager import build_memory_context_block
|
||||
|
||||
prewrapped = (
|
||||
"<memory-context>\n"
|
||||
"[System note: ...]\n\n"
|
||||
"real fact\n"
|
||||
"</memory-context>"
|
||||
)
|
||||
with caplog.at_level(logging.WARNING, logger="agent.memory_manager"):
|
||||
out = build_memory_context_block(prewrapped)
|
||||
|
||||
assert any("contract violation" in rec.message for rec in caplog.records)
|
||||
assert out.count("<memory-context>") == 1
|
||||
assert out.count("</memory-context>") == 1
|
||||
|
||||
def test_clean_provider_output_does_not_warn(self, caplog):
|
||||
import logging
|
||||
from agent.memory_manager import build_memory_context_block
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="agent.memory_manager"):
|
||||
out = build_memory_context_block("plain fact about user")
|
||||
|
||||
assert not any("contract violation" in rec.message for rec in caplog.records)
|
||||
assert "plain fact about user" in out
|
||||
|
||||
@ -1,13 +1,12 @@
|
||||
"""Tests for _enrich_message_with_vision — regression for #5719.
|
||||
|
||||
The auxiliary vision LLM can echo system-prompt Honcho memory back into
|
||||
its analysis output. When that echo reaches the user as the enriched
|
||||
image description, recalled memory context (personal facts, dialectic
|
||||
output) surfaces into a user-visible message.
|
||||
The auxiliary vision LLM can echo system-prompt memory-context back into
|
||||
its analysis output. The boundary fix in gateway/run.py runs the generic
|
||||
sanitize_context helper over the description so the fenced wrapper and
|
||||
its system-note are removed before the description reaches the user.
|
||||
|
||||
The boundary fix in gateway/run.py strips both <memory-context>...</memory-context>
|
||||
fenced blocks AND any "## Honcho Context" section from vision descriptions
|
||||
before they're embedded into the enriched user message.
|
||||
Plugin-specific header cleanup (e.g. "## Honcho Context") belongs at the
|
||||
provider boundary, not in this shared gateway path.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
@ -43,22 +42,6 @@ class TestEnrichMessageWithVision:
|
||||
out = _run(gateway_runner._enrich_message_with_vision("caption", ["/tmp/img.jpg"]))
|
||||
assert "sunset over the ocean" in out
|
||||
|
||||
def test_honcho_context_header_stripped(self, gateway_runner):
|
||||
"""'## Honcho Context' section and everything after is removed."""
|
||||
leaked = (
|
||||
"A photograph of a sunset.\n\n"
|
||||
"## Honcho Context\n"
|
||||
"User prefers concise answers, works at Plastic Labs,\n"
|
||||
"uses OPSEC pseudonyms.\n"
|
||||
)
|
||||
fake_result = json.dumps({"success": True, "analysis": leaked})
|
||||
with patch("tools.vision_tools.vision_analyze_tool", new=AsyncMock(return_value=fake_result)):
|
||||
out = _run(gateway_runner._enrich_message_with_vision("caption", ["/tmp/img.jpg"]))
|
||||
assert "sunset" in out
|
||||
assert "Honcho Context" not in out
|
||||
assert "Plastic Labs" not in out
|
||||
assert "OPSEC" not in out
|
||||
|
||||
def test_memory_context_fence_stripped(self, gateway_runner):
|
||||
"""<memory-context>...</memory-context> fenced block is scrubbed."""
|
||||
leaked = (
|
||||
@ -77,23 +60,21 @@ class TestEnrichMessageWithVision:
|
||||
assert "User details and preferences" not in out
|
||||
assert "System note" not in out
|
||||
|
||||
def test_both_leak_patterns_together_stripped(self, gateway_runner):
|
||||
"""A vision output containing both leak shapes is fully scrubbed."""
|
||||
def test_fenced_leak_stripped_plugin_header_preserved(self, gateway_runner):
|
||||
"""The fenced wrapper is stripped; plugin-specific text outside the
|
||||
fence (e.g. a "## Honcho Context" header) is left to the plugin layer.
|
||||
Gateway core stays plugin-agnostic."""
|
||||
leaked = (
|
||||
"<memory-context>\n"
|
||||
"[System note: The following is recalled memory context, NOT new "
|
||||
"user input. Treat as informational background data.]\n"
|
||||
"fenced leak\n"
|
||||
"</memory-context>\n"
|
||||
"A photograph of a dog.\n\n"
|
||||
"## Honcho Context\n"
|
||||
"header leak\n"
|
||||
"A photograph of a dog."
|
||||
)
|
||||
fake_result = json.dumps({"success": True, "analysis": leaked})
|
||||
with patch("tools.vision_tools.vision_analyze_tool", new=AsyncMock(return_value=fake_result)):
|
||||
out = _run(gateway_runner._enrich_message_with_vision("caption", ["/tmp/img.jpg"]))
|
||||
assert "photograph of a dog" in out
|
||||
assert "fenced leak" not in out
|
||||
assert "header leak" not in out
|
||||
assert "Honcho Context" not in out
|
||||
assert "<memory-context>" not in out
|
||||
|
||||
@ -1441,19 +1441,23 @@ class TestBuildAssistantMessage:
|
||||
result = agent._build_assistant_message(msg, "stop")
|
||||
assert result["content"] == "No thinking here."
|
||||
|
||||
def test_memory_context_stripped_from_stored_content(self, agent):
|
||||
msg = _mock_assistant_msg(
|
||||
content=(
|
||||
"<memory-context>\n"
|
||||
"[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n"
|
||||
"## Honcho Context\n"
|
||||
"stale memory\n"
|
||||
"</memory-context>\n\n"
|
||||
"Visible answer"
|
||||
)
|
||||
def test_memory_context_in_stored_content_is_preserved(self, agent):
|
||||
"""`_build_assistant_message` must not silently mutate model output
|
||||
containing literal <memory-context> markers — that's legitimate text
|
||||
(e.g. documentation, code) that the model may emit. Streaming-path
|
||||
leak prevention is handled by StreamingContextScrubber upstream."""
|
||||
original = (
|
||||
"<memory-context>\n"
|
||||
"[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n"
|
||||
"## Honcho Context\n"
|
||||
"stale memory\n"
|
||||
"</memory-context>\n\n"
|
||||
"Visible answer"
|
||||
)
|
||||
msg = _mock_assistant_msg(content=original)
|
||||
result = agent._build_assistant_message(msg, "stop")
|
||||
assert result["content"] == "Visible answer"
|
||||
assert "<memory-context>" in result["content"]
|
||||
assert "Visible answer" in result["content"]
|
||||
|
||||
def test_unterminated_think_block_stripped(self, agent):
|
||||
"""Unterminated <think> block (MiniMax / NIM dropped close tag) is
|
||||
@ -4767,21 +4771,21 @@ class TestDeadRetryCode:
|
||||
|
||||
|
||||
class TestMemoryContextSanitization:
|
||||
"""run_conversation() must strip leaked <memory-context> blocks from user input."""
|
||||
"""sanitize_context() helper correctness — used at provider boundaries."""
|
||||
|
||||
def test_memory_context_stripped_from_user_message(self):
|
||||
"""Verify that <memory-context> blocks are removed before the message
|
||||
enters the conversation loop — prevents stale Honcho injection from
|
||||
leaking into user text."""
|
||||
def test_user_message_is_not_mutated_by_run_conversation(self):
|
||||
"""User input must reach run_conversation untouched — if a user types
|
||||
a literal <memory-context> tag we don't silently delete their text.
|
||||
The streaming scrubber + plugin-side scrub cover real leak paths."""
|
||||
import inspect
|
||||
src = inspect.getsource(AIAgent.run_conversation)
|
||||
# The sanitize_context call must appear in run_conversation's preamble
|
||||
assert "sanitize_context(user_message)" in src
|
||||
assert "sanitize_context(persist_user_message)" in src
|
||||
assert "sanitize_context(user_message)" not in src
|
||||
assert "sanitize_context(persist_user_message)" not in src
|
||||
|
||||
def test_sanitize_context_strips_full_block(self):
|
||||
"""End-to-end: a user message with an embedded memory-context block
|
||||
is cleaned to just the actual user text."""
|
||||
"""Helper-level: a string with an embedded memory-context block is
|
||||
cleaned to just the surrounding text. Used by build_memory_context_block
|
||||
(input-validation) and by plugins on their own backend boundary."""
|
||||
from agent.memory_manager import sanitize_context
|
||||
user_text = "how is the honcho working"
|
||||
injected = (
|
||||
|
||||
@ -1209,6 +1209,46 @@ def test_stream_delta_scrubber_resets_between_turns(monkeypatch):
|
||||
assert "".join(observed) == "clean new turn text"
|
||||
|
||||
|
||||
def test_stream_delta_preserves_mid_stream_leading_newlines(monkeypatch):
|
||||
"""Mid-stream leading newlines must survive — they are legitimate
|
||||
markdown (lists, code fences, paragraph breaks). Stripping them
|
||||
based on chunk boundaries silently breaks formatting.
|
||||
|
||||
Only the very first delta of a stream gets leading-newlines stripped
|
||||
(so stale provider preamble doesn't leak); after that, deltas are
|
||||
emitted verbatim.
|
||||
"""
|
||||
agent = _build_agent(monkeypatch)
|
||||
observed = []
|
||||
agent.stream_delta_callback = observed.append
|
||||
|
||||
# First delta delivers text — strips its own leading "\n" once.
|
||||
agent._fire_stream_delta("\nHere is a list:")
|
||||
# Second delta starts with "\n- item" — must NOT be stripped.
|
||||
agent._fire_stream_delta("\n- first")
|
||||
agent._fire_stream_delta("\n- second")
|
||||
|
||||
combined = "".join(observed)
|
||||
assert combined == "Here is a list:\n- first\n- second"
|
||||
|
||||
|
||||
def test_stream_delta_preserves_code_fence_newlines(monkeypatch):
|
||||
"""Code blocks span multiple deltas. A "\\n```python\\n" boundary
|
||||
is the canonical case where stripping leading newlines corrupts output."""
|
||||
agent = _build_agent(monkeypatch)
|
||||
observed = []
|
||||
agent.stream_delta_callback = observed.append
|
||||
|
||||
agent._fire_stream_delta("Here is the code:")
|
||||
agent._fire_stream_delta("\n```python\n")
|
||||
agent._fire_stream_delta("print('hi')\n")
|
||||
agent._fire_stream_delta("```\n")
|
||||
|
||||
combined = "".join(observed)
|
||||
assert "```python\n" in combined
|
||||
assert combined.startswith("Here is the code:\n```python\n")
|
||||
|
||||
|
||||
def test_run_conversation_codex_continues_after_commentary_phase_message(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
responses = [
|
||||
|
||||
Reference in New Issue
Block a user