From 59b0ea98c8956a2fd1e875a673423d30175b7f9b Mon Sep 17 00:00:00 2001 From: Bartok9 Date: Fri, 29 May 2026 03:40:51 -0400 Subject: [PATCH] fix(agent): explain abnormal turn endings instead of blank/partial reply When a turn ends abnormally after substantive tool calls (empty content after retries, a partial/truncated stream, exhausted retries, or an iteration/budget limit), the CLI/TUI response area was left blank or showed only a fragment (e.g. "The") with no consolidated reason. The internal turn_exit_reason values (empty_response_exhausted, partial_stream_recovery, etc.) were never surfaced to the user. Add a turn-completion explainer that mirrors the existing file-mutation verifier footer: at turn end, map an abnormal turn_exit_reason to a short, actionable message and either replace the bare "(empty)" sentinel or append the reason after a partial fragment. Normal text_response exits (e.g. a terse "Done.") stay quiet. Gated by display.turn_completion_explainer (default on) with HERMES_TURN_COMPLETION_EXPLAINER env override, matching the file-mutation verifier seam. Closes #34452 --- agent/conversation_loop.py | 49 +++++ run_agent.py | 120 ++++++++++++ tests/run_agent/test_run_agent.py | 23 ++- .../test_turn_completion_explainer.py | 181 ++++++++++++++++++ 4 files changed, 368 insertions(+), 5 deletions(-) create mode 100644 tests/run_agent/test_turn_completion_explainer.py diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py index e23a513aa..cf77d9a1b 100644 --- a/agent/conversation_loop.py +++ b/agent/conversation_loop.py @@ -4480,6 +4480,55 @@ def run_conversation( except Exception as _ver_err: logger.debug("file-mutation verifier footer failed: %s", _ver_err) + # Turn-completion explainer. + # When a turn ends abnormally after substantive work — empty content + # after retries, a partial/truncated stream, a still-pending tool + # result, or an iteration/budget limit — the user otherwise gets a + # blank or fragmentary response box with no consolidated reason why + # the agent stopped (#34452). Surface a single user-visible + # explanation derived from ``_turn_exit_reason``, mirroring the + # file-mutation verifier footer pattern above. + # + # Gate carefully so healthy turns stay quiet: + # - ``text_response(...)`` exits never produce an explanation + # (handled inside the formatter), so a terse ``Done.`` is silent. + # - We only ACT when there is no genuinely usable reply this turn: + # an empty response, the "(empty)" terminal sentinel, or a + # suspiciously short partial fragment with no terminating + # punctuation (e.g. "The"). A real short answer keeps its text. + if not interrupted: + try: + if agent._turn_completion_explainer_enabled(): + _stripped = (final_response or "").strip() + _is_empty_terminal = _stripped == "" or _stripped == "(empty)" + # A short fragment that is not a normal text_response exit + # and lacks sentence-ending punctuation is treated as a + # truncated partial (the "The" case from #34452). + _is_partial_fragment = ( + not _is_empty_terminal + and not str(_turn_exit_reason).startswith("text_response") + and len(_stripped) <= 24 + and _stripped[-1:] not in {".", "!", "?", "。", "!", "?", "`", ")"} + ) + if _is_empty_terminal or _is_partial_fragment: + _explanation = agent._format_turn_completion_explanation( + _turn_exit_reason + ) + if _explanation: + if _is_empty_terminal: + # Replace the bare "(empty)"/blank sentinel with + # the actionable explanation. + final_response = _explanation + else: + # Keep the partial fragment, append the reason so + # the user sees both what arrived and why it + # stopped. + final_response = ( + _stripped + "\n\n" + _explanation + ) + except Exception as _exp_err: + logger.debug("turn-completion explainer failed: %s", _exp_err) + _response_transformed = False # Plugin hook: transform_llm_output diff --git a/run_agent.py b/run_agent.py index 55df748a5..a737fbd78 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2138,6 +2138,126 @@ class AIAgent: lines.append(f" • … and {remaining} more") return "\n".join(lines) + def _turn_completion_explainer_enabled(self) -> bool: + """Check whether the end-of-turn completion explainer footer is on. + + Config path: ``display.turn_completion_explainer`` (bool, default + True). ``HERMES_TURN_COMPLETION_EXPLAINER`` env var overrides + config. Exposed as a method so tests can patch a single seam, + mirroring ``_file_mutation_verifier_enabled``. + """ + try: + import os as _os + env = _os.environ.get("HERMES_TURN_COMPLETION_EXPLAINER") + if env is not None: + return env.strip().lower() not in {"0", "false", "no", "off"} + # Read from the persisted config.yaml so gateway and CLI share + # the same setting. Import lazily to avoid a startup-time cycle. + try: + from hermes_cli.config import load_config as _load_config + _cfg = _load_config() or {} + except Exception: + _cfg = {} + _display = _cfg.get("display") if isinstance(_cfg, dict) else None + if isinstance(_display, dict) and "turn_completion_explainer" in _display: + return bool(_display.get("turn_completion_explainer")) + except Exception: + pass + return True # safe default: explainer on + + @staticmethod + def _format_turn_completion_explanation(turn_exit_reason: str) -> str: + """Render a user-facing explanation for an abnormal turn ending. + + Maps the internal ``turn_exit_reason`` to a short, actionable + message so a turn that produced no usable assistant reply (empty + content after retries, a partial/truncated stream, a still-pending + tool result, or an iteration/budget limit) is never silent from + the UI's perspective — the symptom users report in #34452. + + Returns an empty string for reasons that are NOT abnormal (e.g. + a normal ``text_response(...)`` exit), so callers can concatenate + or substitute unconditionally without warning on healthy turns + like a terse ``Done.``. + """ + if not turn_exit_reason: + return "" + reason = str(turn_exit_reason) + + # Normal completion — stay quiet. ``text_response(...)`` is the + # healthy terminal; anything that produced a real reply is fine. + if reason.startswith("text_response"): + return "" + + prefix = "⚠️ Turn ended without a usable reply: " + if reason == "empty_response_exhausted": + return ( + prefix + + "the model returned empty content after retries and any " + "fallback providers. Try `continue`, switch model/provider, " + "or inspect the tool output above." + ) + if reason == "all_retries_exhausted_no_response": + return ( + prefix + + "all API retries were exhausted before a response was " + "produced (provider errors / rate limits). Try `continue` " + "or switch provider." + ) + if reason == "partial_stream_recovery": + return ( + prefix + + "streaming stopped early and only a partial response was " + "recovered. Send `continue` to resume from where it stopped." + ) + if reason == "fallback_prior_turn_content": + return ( + prefix + + "no new content was produced this turn; showing recovered " + "prior context. Send `continue` to retry." + ) + if reason == "interrupted_during_api_call": + return ( + prefix + + "the request was interrupted mid-call before a reply was " + "received. Send `continue` to retry." + ) + if reason == "budget_exhausted": + return ( + prefix + + "the per-turn iteration/cost budget was exhausted before a " + "final answer. Send `continue` to keep going." + ) + if reason == "ollama_runtime_context_too_small": + return ( + prefix + + "the local model's context window was too small to finish. " + "Increase the context size or use a larger model." + ) + if reason.startswith("max_iterations_reached"): + return ( + prefix + + "the maximum tool-iteration limit was reached before a " + "final answer. Send `continue` to keep going, or raise " + "`max_iterations`." + ) + if reason.startswith("error_near_max_iterations"): + return ( + prefix + + "an error occurred near the iteration limit before a final " + "answer. Check the tool output above, then send `continue`." + ) + if reason == "pending_tool_result": + return ( + prefix + + "the turn stopped while a tool result was still pending and " + "the model produced no follow-up text. Send `continue` to " + "let it summarize." + ) + # Unknown/diagnostic-only reasons (e.g. "unknown", guardrail_halt + # which already surfaces its own message) — don't second-guess. + return "" + def _apply_pending_steer_to_tool_results(self, messages: list, num_tool_msgs: int) -> None: """Forwarder — see ``agent.agent_runtime_helpers.apply_pending_steer_to_tool_results``.""" from agent.agent_runtime_helpers import apply_pending_steer_to_tool_results diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 07ff74930..0da60572c 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -3046,7 +3046,11 @@ class TestRunConversation: mock_compress.assert_not_called() # no compression triggered assert result["completed"] is True - assert result["final_response"] == "(empty)" + # #34452: the bare "(empty)" sentinel is now replaced by a + # user-visible end-of-turn explanation so the failure isn't silent. + assert result["final_response"] != "(empty)" + assert "without a usable reply" in result["final_response"] + assert result["turn_exit_reason"] == "empty_response_exhausted" assert result["api_calls"] == 6 # 1 original + 2 prefill + 3 retries def test_reasoning_only_response_prefill_then_empty(self, agent): @@ -3066,7 +3070,9 @@ class TestRunConversation: ): result = agent.run_conversation("answer me") assert result["completed"] is True - assert result["final_response"] == "(empty)" + # #34452: explanation replaces the bare "(empty)" sentinel. + assert result["final_response"] != "(empty)" + assert "without a usable reply" in result["final_response"] assert result["api_calls"] == 6 # 1 original + 2 prefill + 3 retries def test_reasoning_only_prefill_succeeds_on_continuation(self, agent): @@ -3113,7 +3119,9 @@ class TestRunConversation: ): result = agent.run_conversation("answer me") assert result["completed"] is True - assert result["final_response"] == "(empty)" + # #34452: explanation replaces the bare "(empty)" sentinel. + assert result["final_response"] != "(empty)" + assert "without a usable reply" in result["final_response"] assert result["api_calls"] == 4 # 1 original + 3 retries def test_truly_empty_response_succeeds_on_nudge(self, agent): @@ -3209,7 +3217,9 @@ class TestRunConversation: ): result = agent.run_conversation("answer me") assert result["completed"] is True - assert result["final_response"] == "(empty)" + # #34452: explanation replaces the bare "(empty)" sentinel. + assert result["final_response"] != "(empty)" + assert "without a usable reply" in result["final_response"] def test_empty_response_emits_status_for_gateway(self, agent): """_emit_status is called during empty retries so gateway users see feedback.""" @@ -3235,7 +3245,10 @@ class TestRunConversation: ): result = agent.run_conversation("answer me") - assert result["final_response"] == "(empty)" + # #34452: explanation replaces the bare "(empty)" sentinel, but the + # status emissions during retries are unchanged. + assert result["final_response"] != "(empty)" + assert "without a usable reply" in result["final_response"] # Should have emitted retry statuses (3 retries) + final failure retry_msgs = [m for m in status_messages if "retrying" in m.lower()] assert len(retry_msgs) == 3, f"Expected 3 retry status messages, got {len(retry_msgs)}: {status_messages}" diff --git a/tests/run_agent/test_turn_completion_explainer.py b/tests/run_agent/test_turn_completion_explainer.py new file mode 100644 index 000000000..b120272b0 --- /dev/null +++ b/tests/run_agent/test_turn_completion_explainer.py @@ -0,0 +1,181 @@ +"""Tests for the end-of-turn completion explainer (#34452). + +When a turn ends abnormally after tools (empty content after retries, a +partial/truncated stream, exhausted retries, or an iteration/budget limit) +the user should get a single user-visible explanation of why the reply +stopped instead of a blank or fragmentary response box. Normal short +replies (e.g. ``Done.``) must stay quiet. + +These tests exercise: + 1. ``_format_turn_completion_explanation`` — the pure reason→message map. + 2. ``_turn_completion_explainer_enabled`` — the env/config seam. + 3. An end-to-end ``run_conversation`` turn that exhausts empty-response + retries and verifies the explanation reaches ``final_response``. + +All assertions work under the mocked OpenAI SDK used elsewhere in this +suite (we patch ``run_agent.OpenAI`` and drive ``agent.client``), so they +pass identically in CI and locally. +""" + +import os +import uuid +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +from run_agent import AIAgent + + +# -------------------------------------------------------------------------- +# Fixtures (mirrors tests/run_agent/test_tool_call_guardrail_runtime.py) +# -------------------------------------------------------------------------- +def _mock_response(content="Hello", finish_reason="stop", tool_calls=None): + msg = SimpleNamespace(content=content, tool_calls=tool_calls) + choice = SimpleNamespace(message=msg, finish_reason=finish_reason) + return SimpleNamespace(choices=[choice], model="test/model", usage=None) + + +def _make_agent(max_iterations: int = 10, config: dict | None = None) -> AIAgent: + with ( + patch("run_agent.get_tool_definitions", return_value=[]), + patch("run_agent.check_toolset_requirements", return_value={}), + patch("hermes_cli.config.load_config", return_value=config or {}), + patch("run_agent.OpenAI"), + ): + agent = AIAgent( + api_key="test-key-1234567890", + base_url="https://openrouter.ai/api/v1", + max_iterations=max_iterations, + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + agent.client = MagicMock() + agent._cached_system_prompt = "You are helpful." + agent._use_prompt_caching = False + agent.tool_delay = 0 + agent.compression_enabled = False + agent.save_trajectories = False + # No fallback chain so empty responses exhaust deterministically. + agent._fallback_chain = [] + return agent + + +# -------------------------------------------------------------------------- +# 1. Pure formatter +# -------------------------------------------------------------------------- +def test_explanation_quiet_for_normal_text_response(): + """A healthy text_response exit must NOT produce any explanation.""" + out = AIAgent._format_turn_completion_explanation( + "text_response(finish_reason=stop)" + ) + assert out == "" + + +def test_explanation_quiet_for_empty_reason(): + assert AIAgent._format_turn_completion_explanation("") == "" + assert AIAgent._format_turn_completion_explanation("unknown") == "" + # guardrail_halt surfaces its own message; explainer stays out of the way. + assert AIAgent._format_turn_completion_explanation("guardrail_halt") == "" + + +def test_explanation_for_empty_response_exhausted(): + out = AIAgent._format_turn_completion_explanation("empty_response_exhausted") + assert out # non-empty + assert "empty content" in out + assert "continue" in out.lower() + + +def test_explanation_for_partial_stream_recovery(): + out = AIAgent._format_turn_completion_explanation("partial_stream_recovery") + assert "partial" in out.lower() + assert "continue" in out.lower() + + +def test_explanation_for_max_iterations_reached_prefix_match(): + """``max_iterations_reached(...)`` carries a parenthetical suffix.""" + out = AIAgent._format_turn_completion_explanation( + "max_iterations_reached(10/10)" + ) + assert "iteration" in out.lower() + + +def test_explanation_for_all_retries_exhausted(): + out = AIAgent._format_turn_completion_explanation( + "all_retries_exhausted_no_response" + ) + assert "retries" in out.lower() + + +# -------------------------------------------------------------------------- +# 2. Enable/disable seam +# -------------------------------------------------------------------------- +def test_explainer_enabled_by_default(): + agent = _make_agent() + with patch.dict(os.environ, {}, clear=False): + os.environ.pop("HERMES_TURN_COMPLETION_EXPLAINER", None) + with patch("hermes_cli.config.load_config", return_value={}): + assert agent._turn_completion_explainer_enabled() is True + + +def test_explainer_disabled_via_env(): + agent = _make_agent() + with patch.dict( + os.environ, {"HERMES_TURN_COMPLETION_EXPLAINER": "0"}, clear=False + ): + assert agent._turn_completion_explainer_enabled() is False + + +def test_explainer_disabled_via_config(): + agent = _make_agent() + with patch.dict(os.environ, {}, clear=False): + os.environ.pop("HERMES_TURN_COMPLETION_EXPLAINER", None) + with patch( + "hermes_cli.config.load_config", + return_value={"display": {"turn_completion_explainer": False}}, + ): + assert agent._turn_completion_explainer_enabled() is False + + +# -------------------------------------------------------------------------- +# 3. End-to-end: empty-response exhaustion surfaces the explanation +# -------------------------------------------------------------------------- +def test_run_conversation_empty_exhausted_surfaces_explanation(): + """Four empty responses in a row should exhaust retries and the final + response should be the actionable explanation, not a bare '(empty)'.""" + agent = _make_agent(max_iterations=10) + # 4 empty responses: retries 1..3 then the terminal on the 4th. + agent.client.chat.completions.create.side_effect = [ + _mock_response(content="", finish_reason="stop") for _ in range(8) + ] + + with ( + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("do something") + + assert result["turn_exit_reason"] == "empty_response_exhausted" + # The user must NOT be left with a bare sentinel; the explanation wins. + assert result["final_response"] != "(empty)" + assert result["final_response"].strip() != "" + assert "without a usable reply" in result["final_response"] + + +def test_run_conversation_normal_reply_stays_quiet(): + """A normal short reply like 'Done.' must NOT get an explainer footer.""" + agent = _make_agent(max_iterations=10) + agent.client.chat.completions.create.side_effect = [ + _mock_response(content="Done.", finish_reason="stop"), + ] + + with ( + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("do something") + + assert result["turn_exit_reason"].startswith("text_response") + assert result["final_response"] == "Done." + assert "without a usable reply" not in result["final_response"]