Follow-up to the salvaged #34452 turn-completion explainer: - Register display.turn_completion_explainer: True in DEFAULT_CONFIG so the setting is discoverable, matching the file_mutation_verifier precedent. - Shorten the repeated footer prefix from 'Turn ended without a usable reply: ' to 'No reply: ' so the 10 reason variants don't all open with the same 8-word boilerplate. - Update the 7 assertions that referenced the old prefix.
182 lines
7.1 KiB
Python
182 lines
7.1 KiB
Python
"""Tests for the end-of-turn completion explainer (#34452).
|
|
|
|
When a turn ends abnormally after tools (empty content after retries, a
|
|
partial/truncated stream, exhausted retries, or an iteration/budget limit)
|
|
the user should get a single user-visible explanation of why the reply
|
|
stopped instead of a blank or fragmentary response box. Normal short
|
|
replies (e.g. ``Done.``) must stay quiet.
|
|
|
|
These tests exercise:
|
|
1. ``_format_turn_completion_explanation`` — the pure reason→message map.
|
|
2. ``_turn_completion_explainer_enabled`` — the env/config seam.
|
|
3. An end-to-end ``run_conversation`` turn that exhausts empty-response
|
|
retries and verifies the explanation reaches ``final_response``.
|
|
|
|
All assertions work under the mocked OpenAI SDK used elsewhere in this
|
|
suite (we patch ``run_agent.OpenAI`` and drive ``agent.client``), so they
|
|
pass identically in CI and locally.
|
|
"""
|
|
|
|
import os
|
|
import uuid
|
|
from types import SimpleNamespace
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
from run_agent import AIAgent
|
|
|
|
|
|
# --------------------------------------------------------------------------
|
|
# Fixtures (mirrors tests/run_agent/test_tool_call_guardrail_runtime.py)
|
|
# --------------------------------------------------------------------------
|
|
def _mock_response(content="Hello", finish_reason="stop", tool_calls=None):
|
|
msg = SimpleNamespace(content=content, tool_calls=tool_calls)
|
|
choice = SimpleNamespace(message=msg, finish_reason=finish_reason)
|
|
return SimpleNamespace(choices=[choice], model="test/model", usage=None)
|
|
|
|
|
|
def _make_agent(max_iterations: int = 10, config: dict | None = None) -> AIAgent:
|
|
with (
|
|
patch("run_agent.get_tool_definitions", return_value=[]),
|
|
patch("run_agent.check_toolset_requirements", return_value={}),
|
|
patch("hermes_cli.config.load_config", return_value=config or {}),
|
|
patch("run_agent.OpenAI"),
|
|
):
|
|
agent = AIAgent(
|
|
api_key="test-key-1234567890",
|
|
base_url="https://openrouter.ai/api/v1",
|
|
max_iterations=max_iterations,
|
|
quiet_mode=True,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
)
|
|
agent.client = MagicMock()
|
|
agent._cached_system_prompt = "You are helpful."
|
|
agent._use_prompt_caching = False
|
|
agent.tool_delay = 0
|
|
agent.compression_enabled = False
|
|
agent.save_trajectories = False
|
|
# No fallback chain so empty responses exhaust deterministically.
|
|
agent._fallback_chain = []
|
|
return agent
|
|
|
|
|
|
# --------------------------------------------------------------------------
|
|
# 1. Pure formatter
|
|
# --------------------------------------------------------------------------
|
|
def test_explanation_quiet_for_normal_text_response():
|
|
"""A healthy text_response exit must NOT produce any explanation."""
|
|
out = AIAgent._format_turn_completion_explanation(
|
|
"text_response(finish_reason=stop)"
|
|
)
|
|
assert out == ""
|
|
|
|
|
|
def test_explanation_quiet_for_empty_reason():
|
|
assert AIAgent._format_turn_completion_explanation("") == ""
|
|
assert AIAgent._format_turn_completion_explanation("unknown") == ""
|
|
# guardrail_halt surfaces its own message; explainer stays out of the way.
|
|
assert AIAgent._format_turn_completion_explanation("guardrail_halt") == ""
|
|
|
|
|
|
def test_explanation_for_empty_response_exhausted():
|
|
out = AIAgent._format_turn_completion_explanation("empty_response_exhausted")
|
|
assert out # non-empty
|
|
assert "empty content" in out
|
|
assert "continue" in out.lower()
|
|
|
|
|
|
def test_explanation_for_partial_stream_recovery():
|
|
out = AIAgent._format_turn_completion_explanation("partial_stream_recovery")
|
|
assert "partial" in out.lower()
|
|
assert "continue" in out.lower()
|
|
|
|
|
|
def test_explanation_for_max_iterations_reached_prefix_match():
|
|
"""``max_iterations_reached(...)`` carries a parenthetical suffix."""
|
|
out = AIAgent._format_turn_completion_explanation(
|
|
"max_iterations_reached(10/10)"
|
|
)
|
|
assert "iteration" in out.lower()
|
|
|
|
|
|
def test_explanation_for_all_retries_exhausted():
|
|
out = AIAgent._format_turn_completion_explanation(
|
|
"all_retries_exhausted_no_response"
|
|
)
|
|
assert "retries" in out.lower()
|
|
|
|
|
|
# --------------------------------------------------------------------------
|
|
# 2. Enable/disable seam
|
|
# --------------------------------------------------------------------------
|
|
def test_explainer_enabled_by_default():
|
|
agent = _make_agent()
|
|
with patch.dict(os.environ, {}, clear=False):
|
|
os.environ.pop("HERMES_TURN_COMPLETION_EXPLAINER", None)
|
|
with patch("hermes_cli.config.load_config", return_value={}):
|
|
assert agent._turn_completion_explainer_enabled() is True
|
|
|
|
|
|
def test_explainer_disabled_via_env():
|
|
agent = _make_agent()
|
|
with patch.dict(
|
|
os.environ, {"HERMES_TURN_COMPLETION_EXPLAINER": "0"}, clear=False
|
|
):
|
|
assert agent._turn_completion_explainer_enabled() is False
|
|
|
|
|
|
def test_explainer_disabled_via_config():
|
|
agent = _make_agent()
|
|
with patch.dict(os.environ, {}, clear=False):
|
|
os.environ.pop("HERMES_TURN_COMPLETION_EXPLAINER", None)
|
|
with patch(
|
|
"hermes_cli.config.load_config",
|
|
return_value={"display": {"turn_completion_explainer": False}},
|
|
):
|
|
assert agent._turn_completion_explainer_enabled() is False
|
|
|
|
|
|
# --------------------------------------------------------------------------
|
|
# 3. End-to-end: empty-response exhaustion surfaces the explanation
|
|
# --------------------------------------------------------------------------
|
|
def test_run_conversation_empty_exhausted_surfaces_explanation():
|
|
"""Four empty responses in a row should exhaust retries and the final
|
|
response should be the actionable explanation, not a bare '(empty)'."""
|
|
agent = _make_agent(max_iterations=10)
|
|
# 4 empty responses: retries 1..3 then the terminal on the 4th.
|
|
agent.client.chat.completions.create.side_effect = [
|
|
_mock_response(content="", finish_reason="stop") for _ in range(8)
|
|
]
|
|
|
|
with (
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
result = agent.run_conversation("do something")
|
|
|
|
assert result["turn_exit_reason"] == "empty_response_exhausted"
|
|
# The user must NOT be left with a bare sentinel; the explanation wins.
|
|
assert result["final_response"] != "(empty)"
|
|
assert result["final_response"].strip() != ""
|
|
assert "No reply:" in result["final_response"]
|
|
|
|
|
|
def test_run_conversation_normal_reply_stays_quiet():
|
|
"""A normal short reply like 'Done.' must NOT get an explainer footer."""
|
|
agent = _make_agent(max_iterations=10)
|
|
agent.client.chat.completions.create.side_effect = [
|
|
_mock_response(content="Done.", finish_reason="stop"),
|
|
]
|
|
|
|
with (
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
result = agent.run_conversation("do something")
|
|
|
|
assert result["turn_exit_reason"].startswith("text_response")
|
|
assert result["final_response"] == "Done."
|
|
assert "No reply:" not in result["final_response"]
|