fix(agent): explain abnormal turn endings instead of blank/partial reply
When a turn ends abnormally after substantive tool calls (empty content after retries, a partial/truncated stream, exhausted retries, or an iteration/budget limit), the CLI/TUI response area was left blank or showed only a fragment (e.g. "The") with no consolidated reason. The internal turn_exit_reason values (empty_response_exhausted, partial_stream_recovery, etc.) were never surfaced to the user. Add a turn-completion explainer that mirrors the existing file-mutation verifier footer: at turn end, map an abnormal turn_exit_reason to a short, actionable message and either replace the bare "(empty)" sentinel or append the reason after a partial fragment. Normal text_response exits (e.g. a terse "Done.") stay quiet. Gated by display.turn_completion_explainer (default on) with HERMES_TURN_COMPLETION_EXPLAINER env override, matching the file-mutation verifier seam. Closes #34452
This commit is contained in:
@ -4480,6 +4480,55 @@ def run_conversation(
|
||||
except Exception as _ver_err:
|
||||
logger.debug("file-mutation verifier footer failed: %s", _ver_err)
|
||||
|
||||
# Turn-completion explainer.
|
||||
# When a turn ends abnormally after substantive work — empty content
|
||||
# after retries, a partial/truncated stream, a still-pending tool
|
||||
# result, or an iteration/budget limit — the user otherwise gets a
|
||||
# blank or fragmentary response box with no consolidated reason why
|
||||
# the agent stopped (#34452). Surface a single user-visible
|
||||
# explanation derived from ``_turn_exit_reason``, mirroring the
|
||||
# file-mutation verifier footer pattern above.
|
||||
#
|
||||
# Gate carefully so healthy turns stay quiet:
|
||||
# - ``text_response(...)`` exits never produce an explanation
|
||||
# (handled inside the formatter), so a terse ``Done.`` is silent.
|
||||
# - We only ACT when there is no genuinely usable reply this turn:
|
||||
# an empty response, the "(empty)" terminal sentinel, or a
|
||||
# suspiciously short partial fragment with no terminating
|
||||
# punctuation (e.g. "The"). A real short answer keeps its text.
|
||||
if not interrupted:
|
||||
try:
|
||||
if agent._turn_completion_explainer_enabled():
|
||||
_stripped = (final_response or "").strip()
|
||||
_is_empty_terminal = _stripped == "" or _stripped == "(empty)"
|
||||
# A short fragment that is not a normal text_response exit
|
||||
# and lacks sentence-ending punctuation is treated as a
|
||||
# truncated partial (the "The" case from #34452).
|
||||
_is_partial_fragment = (
|
||||
not _is_empty_terminal
|
||||
and not str(_turn_exit_reason).startswith("text_response")
|
||||
and len(_stripped) <= 24
|
||||
and _stripped[-1:] not in {".", "!", "?", "。", "!", "?", "`", ")"}
|
||||
)
|
||||
if _is_empty_terminal or _is_partial_fragment:
|
||||
_explanation = agent._format_turn_completion_explanation(
|
||||
_turn_exit_reason
|
||||
)
|
||||
if _explanation:
|
||||
if _is_empty_terminal:
|
||||
# Replace the bare "(empty)"/blank sentinel with
|
||||
# the actionable explanation.
|
||||
final_response = _explanation
|
||||
else:
|
||||
# Keep the partial fragment, append the reason so
|
||||
# the user sees both what arrived and why it
|
||||
# stopped.
|
||||
final_response = (
|
||||
_stripped + "\n\n" + _explanation
|
||||
)
|
||||
except Exception as _exp_err:
|
||||
logger.debug("turn-completion explainer failed: %s", _exp_err)
|
||||
|
||||
_response_transformed = False
|
||||
|
||||
# Plugin hook: transform_llm_output
|
||||
|
||||
120
run_agent.py
120
run_agent.py
@ -2138,6 +2138,126 @@ class AIAgent:
|
||||
lines.append(f" • … and {remaining} more")
|
||||
return "\n".join(lines)
|
||||
|
||||
def _turn_completion_explainer_enabled(self) -> bool:
|
||||
"""Check whether the end-of-turn completion explainer footer is on.
|
||||
|
||||
Config path: ``display.turn_completion_explainer`` (bool, default
|
||||
True). ``HERMES_TURN_COMPLETION_EXPLAINER`` env var overrides
|
||||
config. Exposed as a method so tests can patch a single seam,
|
||||
mirroring ``_file_mutation_verifier_enabled``.
|
||||
"""
|
||||
try:
|
||||
import os as _os
|
||||
env = _os.environ.get("HERMES_TURN_COMPLETION_EXPLAINER")
|
||||
if env is not None:
|
||||
return env.strip().lower() not in {"0", "false", "no", "off"}
|
||||
# Read from the persisted config.yaml so gateway and CLI share
|
||||
# the same setting. Import lazily to avoid a startup-time cycle.
|
||||
try:
|
||||
from hermes_cli.config import load_config as _load_config
|
||||
_cfg = _load_config() or {}
|
||||
except Exception:
|
||||
_cfg = {}
|
||||
_display = _cfg.get("display") if isinstance(_cfg, dict) else None
|
||||
if isinstance(_display, dict) and "turn_completion_explainer" in _display:
|
||||
return bool(_display.get("turn_completion_explainer"))
|
||||
except Exception:
|
||||
pass
|
||||
return True # safe default: explainer on
|
||||
|
||||
@staticmethod
|
||||
def _format_turn_completion_explanation(turn_exit_reason: str) -> str:
|
||||
"""Render a user-facing explanation for an abnormal turn ending.
|
||||
|
||||
Maps the internal ``turn_exit_reason`` to a short, actionable
|
||||
message so a turn that produced no usable assistant reply (empty
|
||||
content after retries, a partial/truncated stream, a still-pending
|
||||
tool result, or an iteration/budget limit) is never silent from
|
||||
the UI's perspective — the symptom users report in #34452.
|
||||
|
||||
Returns an empty string for reasons that are NOT abnormal (e.g.
|
||||
a normal ``text_response(...)`` exit), so callers can concatenate
|
||||
or substitute unconditionally without warning on healthy turns
|
||||
like a terse ``Done.``.
|
||||
"""
|
||||
if not turn_exit_reason:
|
||||
return ""
|
||||
reason = str(turn_exit_reason)
|
||||
|
||||
# Normal completion — stay quiet. ``text_response(...)`` is the
|
||||
# healthy terminal; anything that produced a real reply is fine.
|
||||
if reason.startswith("text_response"):
|
||||
return ""
|
||||
|
||||
prefix = "⚠️ Turn ended without a usable reply: "
|
||||
if reason == "empty_response_exhausted":
|
||||
return (
|
||||
prefix
|
||||
+ "the model returned empty content after retries and any "
|
||||
"fallback providers. Try `continue`, switch model/provider, "
|
||||
"or inspect the tool output above."
|
||||
)
|
||||
if reason == "all_retries_exhausted_no_response":
|
||||
return (
|
||||
prefix
|
||||
+ "all API retries were exhausted before a response was "
|
||||
"produced (provider errors / rate limits). Try `continue` "
|
||||
"or switch provider."
|
||||
)
|
||||
if reason == "partial_stream_recovery":
|
||||
return (
|
||||
prefix
|
||||
+ "streaming stopped early and only a partial response was "
|
||||
"recovered. Send `continue` to resume from where it stopped."
|
||||
)
|
||||
if reason == "fallback_prior_turn_content":
|
||||
return (
|
||||
prefix
|
||||
+ "no new content was produced this turn; showing recovered "
|
||||
"prior context. Send `continue` to retry."
|
||||
)
|
||||
if reason == "interrupted_during_api_call":
|
||||
return (
|
||||
prefix
|
||||
+ "the request was interrupted mid-call before a reply was "
|
||||
"received. Send `continue` to retry."
|
||||
)
|
||||
if reason == "budget_exhausted":
|
||||
return (
|
||||
prefix
|
||||
+ "the per-turn iteration/cost budget was exhausted before a "
|
||||
"final answer. Send `continue` to keep going."
|
||||
)
|
||||
if reason == "ollama_runtime_context_too_small":
|
||||
return (
|
||||
prefix
|
||||
+ "the local model's context window was too small to finish. "
|
||||
"Increase the context size or use a larger model."
|
||||
)
|
||||
if reason.startswith("max_iterations_reached"):
|
||||
return (
|
||||
prefix
|
||||
+ "the maximum tool-iteration limit was reached before a "
|
||||
"final answer. Send `continue` to keep going, or raise "
|
||||
"`max_iterations`."
|
||||
)
|
||||
if reason.startswith("error_near_max_iterations"):
|
||||
return (
|
||||
prefix
|
||||
+ "an error occurred near the iteration limit before a final "
|
||||
"answer. Check the tool output above, then send `continue`."
|
||||
)
|
||||
if reason == "pending_tool_result":
|
||||
return (
|
||||
prefix
|
||||
+ "the turn stopped while a tool result was still pending and "
|
||||
"the model produced no follow-up text. Send `continue` to "
|
||||
"let it summarize."
|
||||
)
|
||||
# Unknown/diagnostic-only reasons (e.g. "unknown", guardrail_halt
|
||||
# which already surfaces its own message) — don't second-guess.
|
||||
return ""
|
||||
|
||||
def _apply_pending_steer_to_tool_results(self, messages: list, num_tool_msgs: int) -> None:
|
||||
"""Forwarder — see ``agent.agent_runtime_helpers.apply_pending_steer_to_tool_results``."""
|
||||
from agent.agent_runtime_helpers import apply_pending_steer_to_tool_results
|
||||
|
||||
@ -3046,7 +3046,11 @@ class TestRunConversation:
|
||||
|
||||
mock_compress.assert_not_called() # no compression triggered
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "(empty)"
|
||||
# #34452: the bare "(empty)" sentinel is now replaced by a
|
||||
# user-visible end-of-turn explanation so the failure isn't silent.
|
||||
assert result["final_response"] != "(empty)"
|
||||
assert "without a usable reply" in result["final_response"]
|
||||
assert result["turn_exit_reason"] == "empty_response_exhausted"
|
||||
assert result["api_calls"] == 6 # 1 original + 2 prefill + 3 retries
|
||||
|
||||
def test_reasoning_only_response_prefill_then_empty(self, agent):
|
||||
@ -3066,7 +3070,9 @@ class TestRunConversation:
|
||||
):
|
||||
result = agent.run_conversation("answer me")
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "(empty)"
|
||||
# #34452: explanation replaces the bare "(empty)" sentinel.
|
||||
assert result["final_response"] != "(empty)"
|
||||
assert "without a usable reply" in result["final_response"]
|
||||
assert result["api_calls"] == 6 # 1 original + 2 prefill + 3 retries
|
||||
|
||||
def test_reasoning_only_prefill_succeeds_on_continuation(self, agent):
|
||||
@ -3113,7 +3119,9 @@ class TestRunConversation:
|
||||
):
|
||||
result = agent.run_conversation("answer me")
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "(empty)"
|
||||
# #34452: explanation replaces the bare "(empty)" sentinel.
|
||||
assert result["final_response"] != "(empty)"
|
||||
assert "without a usable reply" in result["final_response"]
|
||||
assert result["api_calls"] == 4 # 1 original + 3 retries
|
||||
|
||||
def test_truly_empty_response_succeeds_on_nudge(self, agent):
|
||||
@ -3209,7 +3217,9 @@ class TestRunConversation:
|
||||
):
|
||||
result = agent.run_conversation("answer me")
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "(empty)"
|
||||
# #34452: explanation replaces the bare "(empty)" sentinel.
|
||||
assert result["final_response"] != "(empty)"
|
||||
assert "without a usable reply" in result["final_response"]
|
||||
|
||||
def test_empty_response_emits_status_for_gateway(self, agent):
|
||||
"""_emit_status is called during empty retries so gateway users see feedback."""
|
||||
@ -3235,7 +3245,10 @@ class TestRunConversation:
|
||||
):
|
||||
result = agent.run_conversation("answer me")
|
||||
|
||||
assert result["final_response"] == "(empty)"
|
||||
# #34452: explanation replaces the bare "(empty)" sentinel, but the
|
||||
# status emissions during retries are unchanged.
|
||||
assert result["final_response"] != "(empty)"
|
||||
assert "without a usable reply" in result["final_response"]
|
||||
# Should have emitted retry statuses (3 retries) + final failure
|
||||
retry_msgs = [m for m in status_messages if "retrying" in m.lower()]
|
||||
assert len(retry_msgs) == 3, f"Expected 3 retry status messages, got {len(retry_msgs)}: {status_messages}"
|
||||
|
||||
181
tests/run_agent/test_turn_completion_explainer.py
Normal file
181
tests/run_agent/test_turn_completion_explainer.py
Normal file
@ -0,0 +1,181 @@
|
||||
"""Tests for the end-of-turn completion explainer (#34452).
|
||||
|
||||
When a turn ends abnormally after tools (empty content after retries, a
|
||||
partial/truncated stream, exhausted retries, or an iteration/budget limit)
|
||||
the user should get a single user-visible explanation of why the reply
|
||||
stopped instead of a blank or fragmentary response box. Normal short
|
||||
replies (e.g. ``Done.``) must stay quiet.
|
||||
|
||||
These tests exercise:
|
||||
1. ``_format_turn_completion_explanation`` — the pure reason→message map.
|
||||
2. ``_turn_completion_explainer_enabled`` — the env/config seam.
|
||||
3. An end-to-end ``run_conversation`` turn that exhausts empty-response
|
||||
retries and verifies the explanation reaches ``final_response``.
|
||||
|
||||
All assertions work under the mocked OpenAI SDK used elsewhere in this
|
||||
suite (we patch ``run_agent.OpenAI`` and drive ``agent.client``), so they
|
||||
pass identically in CI and locally.
|
||||
"""
|
||||
|
||||
import os
|
||||
import uuid
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from run_agent import AIAgent
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Fixtures (mirrors tests/run_agent/test_tool_call_guardrail_runtime.py)
|
||||
# --------------------------------------------------------------------------
|
||||
def _mock_response(content="Hello", finish_reason="stop", tool_calls=None):
|
||||
msg = SimpleNamespace(content=content, tool_calls=tool_calls)
|
||||
choice = SimpleNamespace(message=msg, finish_reason=finish_reason)
|
||||
return SimpleNamespace(choices=[choice], model="test/model", usage=None)
|
||||
|
||||
|
||||
def _make_agent(max_iterations: int = 10, config: dict | None = None) -> AIAgent:
|
||||
with (
|
||||
patch("run_agent.get_tool_definitions", return_value=[]),
|
||||
patch("run_agent.check_toolset_requirements", return_value={}),
|
||||
patch("hermes_cli.config.load_config", return_value=config or {}),
|
||||
patch("run_agent.OpenAI"),
|
||||
):
|
||||
agent = AIAgent(
|
||||
api_key="test-key-1234567890",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
max_iterations=max_iterations,
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
agent.client = MagicMock()
|
||||
agent._cached_system_prompt = "You are helpful."
|
||||
agent._use_prompt_caching = False
|
||||
agent.tool_delay = 0
|
||||
agent.compression_enabled = False
|
||||
agent.save_trajectories = False
|
||||
# No fallback chain so empty responses exhaust deterministically.
|
||||
agent._fallback_chain = []
|
||||
return agent
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# 1. Pure formatter
|
||||
# --------------------------------------------------------------------------
|
||||
def test_explanation_quiet_for_normal_text_response():
|
||||
"""A healthy text_response exit must NOT produce any explanation."""
|
||||
out = AIAgent._format_turn_completion_explanation(
|
||||
"text_response(finish_reason=stop)"
|
||||
)
|
||||
assert out == ""
|
||||
|
||||
|
||||
def test_explanation_quiet_for_empty_reason():
|
||||
assert AIAgent._format_turn_completion_explanation("") == ""
|
||||
assert AIAgent._format_turn_completion_explanation("unknown") == ""
|
||||
# guardrail_halt surfaces its own message; explainer stays out of the way.
|
||||
assert AIAgent._format_turn_completion_explanation("guardrail_halt") == ""
|
||||
|
||||
|
||||
def test_explanation_for_empty_response_exhausted():
|
||||
out = AIAgent._format_turn_completion_explanation("empty_response_exhausted")
|
||||
assert out # non-empty
|
||||
assert "empty content" in out
|
||||
assert "continue" in out.lower()
|
||||
|
||||
|
||||
def test_explanation_for_partial_stream_recovery():
|
||||
out = AIAgent._format_turn_completion_explanation("partial_stream_recovery")
|
||||
assert "partial" in out.lower()
|
||||
assert "continue" in out.lower()
|
||||
|
||||
|
||||
def test_explanation_for_max_iterations_reached_prefix_match():
|
||||
"""``max_iterations_reached(...)`` carries a parenthetical suffix."""
|
||||
out = AIAgent._format_turn_completion_explanation(
|
||||
"max_iterations_reached(10/10)"
|
||||
)
|
||||
assert "iteration" in out.lower()
|
||||
|
||||
|
||||
def test_explanation_for_all_retries_exhausted():
|
||||
out = AIAgent._format_turn_completion_explanation(
|
||||
"all_retries_exhausted_no_response"
|
||||
)
|
||||
assert "retries" in out.lower()
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# 2. Enable/disable seam
|
||||
# --------------------------------------------------------------------------
|
||||
def test_explainer_enabled_by_default():
|
||||
agent = _make_agent()
|
||||
with patch.dict(os.environ, {}, clear=False):
|
||||
os.environ.pop("HERMES_TURN_COMPLETION_EXPLAINER", None)
|
||||
with patch("hermes_cli.config.load_config", return_value={}):
|
||||
assert agent._turn_completion_explainer_enabled() is True
|
||||
|
||||
|
||||
def test_explainer_disabled_via_env():
|
||||
agent = _make_agent()
|
||||
with patch.dict(
|
||||
os.environ, {"HERMES_TURN_COMPLETION_EXPLAINER": "0"}, clear=False
|
||||
):
|
||||
assert agent._turn_completion_explainer_enabled() is False
|
||||
|
||||
|
||||
def test_explainer_disabled_via_config():
|
||||
agent = _make_agent()
|
||||
with patch.dict(os.environ, {}, clear=False):
|
||||
os.environ.pop("HERMES_TURN_COMPLETION_EXPLAINER", None)
|
||||
with patch(
|
||||
"hermes_cli.config.load_config",
|
||||
return_value={"display": {"turn_completion_explainer": False}},
|
||||
):
|
||||
assert agent._turn_completion_explainer_enabled() is False
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# 3. End-to-end: empty-response exhaustion surfaces the explanation
|
||||
# --------------------------------------------------------------------------
|
||||
def test_run_conversation_empty_exhausted_surfaces_explanation():
|
||||
"""Four empty responses in a row should exhaust retries and the final
|
||||
response should be the actionable explanation, not a bare '(empty)'."""
|
||||
agent = _make_agent(max_iterations=10)
|
||||
# 4 empty responses: retries 1..3 then the terminal on the 4th.
|
||||
agent.client.chat.completions.create.side_effect = [
|
||||
_mock_response(content="", finish_reason="stop") for _ in range(8)
|
||||
]
|
||||
|
||||
with (
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
patch.object(agent, "_cleanup_task_resources"),
|
||||
):
|
||||
result = agent.run_conversation("do something")
|
||||
|
||||
assert result["turn_exit_reason"] == "empty_response_exhausted"
|
||||
# The user must NOT be left with a bare sentinel; the explanation wins.
|
||||
assert result["final_response"] != "(empty)"
|
||||
assert result["final_response"].strip() != ""
|
||||
assert "without a usable reply" in result["final_response"]
|
||||
|
||||
|
||||
def test_run_conversation_normal_reply_stays_quiet():
|
||||
"""A normal short reply like 'Done.' must NOT get an explainer footer."""
|
||||
agent = _make_agent(max_iterations=10)
|
||||
agent.client.chat.completions.create.side_effect = [
|
||||
_mock_response(content="Done.", finish_reason="stop"),
|
||||
]
|
||||
|
||||
with (
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
patch.object(agent, "_cleanup_task_resources"),
|
||||
):
|
||||
result = agent.run_conversation("do something")
|
||||
|
||||
assert result["turn_exit_reason"].startswith("text_response")
|
||||
assert result["final_response"] == "Done."
|
||||
assert "without a usable reply" not in result["final_response"]
|
||||
Reference in New Issue
Block a user