A streamed preamble ("Let me search...") finalized at a tool boundary
routed through _try_fresh_final, which unconditionally set
_final_response_sent=True even though it is a NON-final segment. The
gateway then reads that flag as "final delivered" and suppresses the
genuine final answer produced on the next API call, so the user silently
gets nothing. Only reproduces with fresh_final_after_seconds > 0.
- _try_fresh_final / _send_or_edit take is_turn_final; the segment-break
call site passes is_turn_final=got_done so only the turn-final answer
marks final-delivered.
- _reset_segment_state clears the final-delivery flags at every tool
boundary as defense-in-depth against any future premature setter.
- Failing-first regression + happy-path no-duplicate test.
411 lines
18 KiB
Python
411 lines
18 KiB
Python
"""Regression tests for the fresh-final-for-long-lived-previews path.
|
|
|
|
Ported from openclaw/openclaw#72038. When a streamed preview has been
|
|
visible long enough that the platform's edit timestamp would be
|
|
noticeably stale by completion time, the stream consumer delivers the
|
|
final reply as a brand-new message and best-effort deletes the old
|
|
preview. This makes Telegram's visible timestamp reflect completion
|
|
time instead of first-token time.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
from types import SimpleNamespace
|
|
from unittest.mock import AsyncMock, MagicMock
|
|
|
|
import pytest
|
|
|
|
from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig
|
|
|
|
|
|
def _make_adapter(*, supports_delete: bool = True) -> MagicMock:
|
|
"""Build a minimal MagicMock adapter wired for send/edit/delete."""
|
|
adapter = MagicMock()
|
|
adapter.REQUIRES_EDIT_FINALIZE = False
|
|
adapter.MAX_MESSAGE_LENGTH = 4096
|
|
adapter.send = AsyncMock(return_value=SimpleNamespace(
|
|
success=True, message_id="initial_preview",
|
|
))
|
|
adapter.edit_message = AsyncMock(return_value=SimpleNamespace(
|
|
success=True, message_id="initial_preview",
|
|
))
|
|
if supports_delete:
|
|
adapter.delete_message = AsyncMock(return_value=True)
|
|
else:
|
|
# Adapter without the optional delete_message method — fresh-final
|
|
# should still work, it just leaves the stale preview in place.
|
|
del adapter.delete_message # type: ignore[attr-defined]
|
|
return adapter
|
|
|
|
|
|
class TestFreshFinalForLongLivedPreviews:
|
|
"""openclaw#72038 port — send fresh final when preview is old."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_disabled_by_default_still_edits_in_place(self):
|
|
"""``fresh_final_after_seconds=0`` preserves the legacy edit path."""
|
|
adapter = _make_adapter()
|
|
consumer = GatewayStreamConsumer(
|
|
adapter=adapter,
|
|
chat_id="chat",
|
|
config=StreamConsumerConfig(fresh_final_after_seconds=0.0),
|
|
)
|
|
await consumer._send_or_edit("hello")
|
|
# Pretend the preview has been visible for a long time.
|
|
consumer._message_created_ts = 0.0 # far in the past
|
|
await consumer._send_or_edit("hello world", finalize=True)
|
|
# Should edit, not send a fresh message.
|
|
assert adapter.send.call_count == 1 # only the initial send
|
|
adapter.edit_message.assert_called_once()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_short_lived_preview_edits_in_place(self):
|
|
"""Finalizing a preview younger than the threshold → normal edit."""
|
|
adapter = _make_adapter()
|
|
consumer = GatewayStreamConsumer(
|
|
adapter=adapter,
|
|
chat_id="chat",
|
|
config=StreamConsumerConfig(fresh_final_after_seconds=60.0),
|
|
)
|
|
await consumer._send_or_edit("hello")
|
|
# Preview is "new" — leave _message_created_ts at its real value.
|
|
await consumer._send_or_edit("hello world", finalize=True)
|
|
assert adapter.send.call_count == 1
|
|
adapter.edit_message.assert_called_once()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_long_lived_preview_sends_fresh_final(self):
|
|
"""Finalizing a preview older than the threshold → fresh send."""
|
|
adapter = _make_adapter()
|
|
adapter.send.side_effect = [
|
|
SimpleNamespace(success=True, message_id="initial_preview"),
|
|
SimpleNamespace(success=True, message_id="fresh_final"),
|
|
]
|
|
consumer = GatewayStreamConsumer(
|
|
adapter=adapter,
|
|
chat_id="chat",
|
|
config=StreamConsumerConfig(fresh_final_after_seconds=60.0),
|
|
)
|
|
await consumer._send_or_edit("hello")
|
|
# Force the preview to look stale (visible for > 60s).
|
|
consumer._message_created_ts = 0.0 # zero = ~uptime seconds old
|
|
await consumer._send_or_edit("hello world", finalize=True)
|
|
# Fresh send happened; no edit of the old preview.
|
|
assert adapter.send.call_count == 2
|
|
adapter.edit_message.assert_not_called()
|
|
# The old preview was deleted as cleanup.
|
|
adapter.delete_message.assert_awaited_once_with("chat", "initial_preview")
|
|
# State was updated to the new message id.
|
|
assert consumer._message_id == "fresh_final"
|
|
assert consumer._final_response_sent is True
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_fresh_final_without_delete_support_is_best_effort(self):
|
|
"""Adapter lacking ``delete_message`` still gets the fresh send."""
|
|
adapter = _make_adapter(supports_delete=False)
|
|
adapter.send.side_effect = [
|
|
SimpleNamespace(success=True, message_id="initial_preview"),
|
|
SimpleNamespace(success=True, message_id="fresh_final"),
|
|
]
|
|
consumer = GatewayStreamConsumer(
|
|
adapter=adapter,
|
|
chat_id="chat",
|
|
config=StreamConsumerConfig(fresh_final_after_seconds=60.0),
|
|
)
|
|
await consumer._send_or_edit("hello")
|
|
consumer._message_created_ts = 0.0
|
|
await consumer._send_or_edit("hello world", finalize=True)
|
|
assert adapter.send.call_count == 2
|
|
adapter.edit_message.assert_not_called()
|
|
# No delete attempt — just the fresh send.
|
|
assert consumer._message_id == "fresh_final"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_fresh_final_fallback_to_edit_on_send_failure(self):
|
|
"""If the fresh send fails, fall back to the normal edit path."""
|
|
adapter = _make_adapter()
|
|
adapter.send.side_effect = [
|
|
SimpleNamespace(success=True, message_id="initial_preview"),
|
|
SimpleNamespace(success=False, error="network"),
|
|
]
|
|
consumer = GatewayStreamConsumer(
|
|
adapter=adapter,
|
|
chat_id="chat",
|
|
config=StreamConsumerConfig(fresh_final_after_seconds=60.0),
|
|
)
|
|
await consumer._send_or_edit("hello")
|
|
consumer._message_created_ts = 0.0
|
|
ok = await consumer._send_or_edit("hello world", finalize=True)
|
|
# Fresh send was attempted and failed → edit happened instead.
|
|
assert adapter.send.call_count == 2
|
|
adapter.edit_message.assert_called_once()
|
|
assert ok is True
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_only_finalize_triggers_fresh_final(self):
|
|
"""Intermediate edits (``finalize=False``) never switch to fresh send."""
|
|
adapter = _make_adapter()
|
|
consumer = GatewayStreamConsumer(
|
|
adapter=adapter,
|
|
chat_id="chat",
|
|
config=StreamConsumerConfig(fresh_final_after_seconds=60.0),
|
|
)
|
|
await consumer._send_or_edit("hello")
|
|
consumer._message_created_ts = 0.0 # stale
|
|
await consumer._send_or_edit("hello partial") # no finalize
|
|
assert adapter.send.call_count == 1
|
|
adapter.edit_message.assert_called_once()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_no_edit_sentinel_is_not_affected(self):
|
|
"""Platforms with the ``__no_edit__`` sentinel never go fresh-final."""
|
|
adapter = _make_adapter()
|
|
adapter.send.return_value = SimpleNamespace(success=True, message_id=None)
|
|
consumer = GatewayStreamConsumer(
|
|
adapter=adapter,
|
|
chat_id="chat",
|
|
config=StreamConsumerConfig(fresh_final_after_seconds=60.0),
|
|
)
|
|
await consumer._send_or_edit("hello")
|
|
assert consumer._message_id == "__no_edit__"
|
|
assert consumer._message_created_ts is None
|
|
# Even with finalize=True, no fresh send — the sentinel gates it.
|
|
assert consumer._should_send_fresh_final() is False
|
|
|
|
|
|
class TestSegmentBreakDoesNotMarkFinalSent:
|
|
"""Regression for #29346 — silent response loss after tool calls.
|
|
|
|
When ``fresh_final_after_seconds > 0`` and a streamed *preamble* ("Let me
|
|
search…") has aged past the threshold, finalizing it at a tool boundary
|
|
used to route through ``_try_fresh_final``, which unconditionally set
|
|
``_final_response_sent = True`` even though this is a NON-final segment.
|
|
The gateway (run.py:18128) then reads that flag as "final delivered" and
|
|
suppresses the genuine final answer (which arrives on a later API call and
|
|
does not re-stream), so the user gets nothing.
|
|
|
|
The fix scopes the final-delivery flags to the turn-final segment and
|
|
clears them at every tool/segment boundary, so a preamble can never mark
|
|
the turn as delivered.
|
|
"""
|
|
|
|
@staticmethod
|
|
def _delivered_texts(adapter) -> list[str]:
|
|
"""Every text the adapter actually put on screen (sends + edits)."""
|
|
texts = [c.kwargs.get("content", "") for c in adapter.send.call_args_list]
|
|
texts += [c.kwargs.get("content", "") for c in adapter.edit_message.call_args_list]
|
|
return texts
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_preamble_fresh_final_at_tool_boundary_does_not_mark_final(self):
|
|
"""Real-aging reproduction (exercises the actual _should_send_fresh_final
|
|
age gate, not a monkeypatch): a preamble ages past the threshold, then a
|
|
tool boundary finalizes it via fresh-final. The genuine final answer is
|
|
produced on a later API call and is NOT streamed through this consumer
|
|
(the #29346 repro), so the consumer must NOT believe the final was sent."""
|
|
adapter = _make_adapter()
|
|
consumer = GatewayStreamConsumer(
|
|
adapter=adapter,
|
|
chat_id="chat",
|
|
config=StreamConsumerConfig(
|
|
edit_interval=0.01, buffer_threshold=5, cursor=" ▉",
|
|
fresh_final_after_seconds=0.001, # tiny → real aging fires
|
|
),
|
|
)
|
|
consumer.on_delta("Let me search the web for that.")
|
|
task = asyncio.create_task(consumer.run())
|
|
await asyncio.sleep(0.05) # preamble sent + aged well past 0.001s
|
|
consumer.on_delta(None) # tool boundary → segment-break fresh-final
|
|
await asyncio.sleep(0.05)
|
|
consumer.finish()
|
|
await task
|
|
|
|
# Fresh-final actually engaged (preamble preview + a fresh resend), yet
|
|
# the turn is NOT marked delivered — no genuine final ever streamed.
|
|
assert adapter.send.call_count >= 2
|
|
assert consumer.final_response_sent is False
|
|
assert consumer.final_content_delivered is False
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_final_answer_after_preamble_is_delivered_exactly_once(self):
|
|
"""P0 user-visible contract: when the real final answer DOES stream in
|
|
after the preamble + tool boundary, the user gets it exactly once AND
|
|
the consumer marks it delivered (so the gateway correctly suppresses a
|
|
redundant send)."""
|
|
adapter = _make_adapter()
|
|
consumer = GatewayStreamConsumer(
|
|
adapter=adapter,
|
|
chat_id="chat",
|
|
config=StreamConsumerConfig(
|
|
edit_interval=0.01, buffer_threshold=5, cursor=" ▉",
|
|
fresh_final_after_seconds=0.001,
|
|
),
|
|
)
|
|
consumer.on_delta("Let me search the web for that.")
|
|
task = asyncio.create_task(consumer.run())
|
|
await asyncio.sleep(0.05)
|
|
consumer.on_delta(None) # tool boundary
|
|
consumer.on_delta("The answer is 42.") # genuine final answer streams
|
|
await asyncio.sleep(0.05)
|
|
consumer.finish()
|
|
await task
|
|
|
|
# The real final answer was delivered → suppression must engage.
|
|
assert consumer.final_response_sent is True
|
|
# And it reached the user exactly once (no duplicate fresh send).
|
|
final_sends = [
|
|
c for c in adapter.send.call_args_list
|
|
if "answer is 42" in c.kwargs.get("content", "")
|
|
]
|
|
assert len(final_sends) <= 1
|
|
assert any("answer is 42" in t for t in self._delivered_texts(adapter))
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_genuine_final_answer_without_tools_marks_delivered(self):
|
|
"""P1 happy path: a single answer streamed straight to completion (no
|
|
tool boundary) still sets final_response_sent so the gateway suppresses
|
|
the redundant final send."""
|
|
adapter = _make_adapter()
|
|
consumer = GatewayStreamConsumer(
|
|
adapter=adapter,
|
|
chat_id="chat",
|
|
config=StreamConsumerConfig(
|
|
edit_interval=0.01, buffer_threshold=5, cursor=" ▉",
|
|
fresh_final_after_seconds=60.0,
|
|
),
|
|
)
|
|
consumer.on_delta("Here is the full answer.")
|
|
task = asyncio.create_task(consumer.run())
|
|
await asyncio.sleep(0.05)
|
|
consumer.finish()
|
|
await task
|
|
assert consumer.final_response_sent is True
|
|
assert any("Here is the full answer." in t for t in self._delivered_texts(adapter))
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_no_edit_adapter_delivers_final_after_preamble(self):
|
|
"""No-edit adapters (Signal/SMS/webhook → __no_edit__) accumulate and
|
|
deliver rather than fresh-final. A preamble before a tool call must not
|
|
swallow the genuine final answer — it must reach the user."""
|
|
adapter = _make_adapter()
|
|
adapter.send.return_value = SimpleNamespace(success=True, message_id=None)
|
|
consumer = GatewayStreamConsumer(
|
|
adapter=adapter,
|
|
chat_id="chat",
|
|
config=StreamConsumerConfig(
|
|
edit_interval=0.01, buffer_threshold=5, cursor=" ▉",
|
|
fresh_final_after_seconds=0.001,
|
|
),
|
|
)
|
|
consumer.on_delta("Let me search the web for that.")
|
|
task = asyncio.create_task(consumer.run())
|
|
await asyncio.sleep(0.05)
|
|
consumer.on_delta(None) # tool boundary
|
|
consumer.on_delta("The answer is 42.") # genuine final answer
|
|
await asyncio.sleep(0.05)
|
|
consumer.finish()
|
|
await task
|
|
# The final answer reached the user, not swallowed by the preamble.
|
|
assert any(
|
|
"answer is 42" in c.kwargs.get("content", "")
|
|
for c in adapter.send.call_args_list
|
|
)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_multi_tool_call_turn_delivers_final_once(self):
|
|
"""Two tool boundaries before the final answer: flags stay clear across
|
|
both boundaries and the genuine final is delivered exactly once and
|
|
marked sent."""
|
|
adapter = _make_adapter()
|
|
consumer = GatewayStreamConsumer(
|
|
adapter=adapter,
|
|
chat_id="chat",
|
|
config=StreamConsumerConfig(
|
|
edit_interval=0.01, buffer_threshold=5, cursor=" ▉",
|
|
fresh_final_after_seconds=0.001,
|
|
),
|
|
)
|
|
consumer.on_delta("Let me check a couple of things.")
|
|
task = asyncio.create_task(consumer.run())
|
|
await asyncio.sleep(0.05)
|
|
consumer.on_delta(None) # tool boundary 1
|
|
consumer.on_delta("Now cross-referencing.")
|
|
await asyncio.sleep(0.05)
|
|
consumer.on_delta(None) # tool boundary 2
|
|
consumer.on_delta("The answer is 42.") # genuine final answer
|
|
await asyncio.sleep(0.05)
|
|
consumer.finish()
|
|
await task
|
|
|
|
assert consumer.final_response_sent is True
|
|
final_sends = [
|
|
c for c in adapter.send.call_args_list
|
|
if "answer is 42" in c.kwargs.get("content", "")
|
|
]
|
|
assert len(final_sends) <= 1
|
|
assert any("answer is 42" in t for t in self._delivered_texts(adapter))
|
|
|
|
|
|
class TestStreamConsumerConfigFreshFinalField:
|
|
"""The dataclass field must exist and default to 0 (disabled)."""
|
|
|
|
def test_default_is_disabled(self):
|
|
cfg = StreamConsumerConfig()
|
|
assert cfg.fresh_final_after_seconds == 0.0
|
|
|
|
def test_field_is_configurable(self):
|
|
cfg = StreamConsumerConfig(fresh_final_after_seconds=120.0)
|
|
assert cfg.fresh_final_after_seconds == 120.0
|
|
|
|
|
|
class TestStreamingConfigFreshFinalField:
|
|
"""The gateway-level StreamingConfig carries the setting."""
|
|
|
|
def test_default_enables_with_60s(self):
|
|
from gateway.config import StreamingConfig
|
|
cfg = StreamingConfig()
|
|
assert cfg.fresh_final_after_seconds == 60.0
|
|
|
|
def test_from_dict_uses_default_when_missing(self):
|
|
from gateway.config import StreamingConfig
|
|
cfg = StreamingConfig.from_dict({"enabled": True})
|
|
assert cfg.fresh_final_after_seconds == 60.0
|
|
|
|
def test_from_dict_respects_explicit_zero(self):
|
|
from gateway.config import StreamingConfig
|
|
cfg = StreamingConfig.from_dict({
|
|
"enabled": True,
|
|
"fresh_final_after_seconds": 0,
|
|
})
|
|
assert cfg.fresh_final_after_seconds == 0.0
|
|
|
|
def test_to_dict_round_trip(self):
|
|
from gateway.config import StreamingConfig
|
|
original = StreamingConfig(fresh_final_after_seconds=90.0)
|
|
restored = StreamingConfig.from_dict(original.to_dict())
|
|
assert restored.fresh_final_after_seconds == 90.0
|
|
|
|
|
|
class TestTelegramAdapterDeleteMessage:
|
|
"""Contract: Telegram adapter implements ``delete_message``."""
|
|
|
|
def test_delete_message_method_exists(self):
|
|
telegram = pytest.importorskip("gateway.platforms.telegram")
|
|
import inspect
|
|
cls = telegram.TelegramAdapter
|
|
assert hasattr(cls, "delete_message"), (
|
|
"TelegramAdapter.delete_message is required for the fresh-final "
|
|
"cleanup path (openclaw/openclaw#72038 port)."
|
|
)
|
|
sig = inspect.signature(cls.delete_message)
|
|
params = list(sig.parameters)
|
|
assert params[:3] == ["self", "chat_id", "message_id"]
|
|
|
|
def test_base_adapter_default_returns_false(self):
|
|
"""BasePlatformAdapter.delete_message default = no-op returning False."""
|
|
from gateway.platforms.base import BasePlatformAdapter
|
|
import inspect
|
|
sig = inspect.signature(BasePlatformAdapter.delete_message)
|
|
assert list(sig.parameters)[:3] == ["self", "chat_id", "message_id"]
|