diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py index 21199b9a2..bb6c6229c 100644 --- a/agent/conversation_loop.py +++ b/agent/conversation_loop.py @@ -1492,7 +1492,8 @@ def run_conversation( if retry_count >= max_retries: # Try fallback before giving up - agent._buffer_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...") + if agent._has_pending_fallback(): + agent._buffer_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...") if agent._try_activate_fallback(): retry_count = 0 compression_attempts = 0 @@ -3094,12 +3095,17 @@ def run_conversation( ) and not is_context_length_error if is_client_error: - # Try fallback before aborting — a different provider - # may not have the same issue (rate limit, auth, etc.) - if classified.reason == FailoverReason.content_policy_blocked: - agent._buffer_status("⚠️ Provider safety filter blocked this request — trying fallback...") - else: - agent._buffer_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...") + # Try fallback before aborting — a different provider may + # not have the same issue (rate limit, auth, etc.). Only + # announce the attempt when a fallback chain actually + # exists; otherwise "trying fallback..." is a lie and the + # session looks like it's recovering when it's about to + # abort silently (#35314, #17446). + if agent._has_pending_fallback(): + if classified.reason == FailoverReason.content_policy_blocked: + agent._buffer_status("⚠️ Provider safety filter blocked this request — trying fallback...") + else: + agent._buffer_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...") if agent._try_activate_fallback(): retry_count = 0 compression_attempts = 0 @@ -3242,7 +3248,8 @@ def run_conversation( retry_count = 0 continue # Try fallback before giving up entirely - agent._buffer_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...") + if agent._has_pending_fallback(): + agent._buffer_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...") if agent._try_activate_fallback(): retry_count = 0 compression_attempts = 0 diff --git a/gateway/run.py b/gateway/run.py index 09f6f990b..f86b5c98c 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1730,6 +1730,14 @@ class GatewayRunner: self._running_agents: Dict[str, Any] = {} self._running_agents_ts: Dict[str, float] = {} # start timestamp per session self._pending_messages: Dict[str, str] = {} # Queued messages during interrupt + # Last successfully-resolved (non-empty) model, keyed by session. Used + # as a fallback when a fresh config read transiently returns an empty + # model (e.g. an mtime-keyed config-cache miss during a post-interrupt + # recovery turn). Without this, the agent is built with model="" and + # every API call fails HTTP 400 "No models provided" — the session + # goes silent until the user manually re-sends. See #35314. The "*" + # key holds a process-wide last-known-good for first-seen sessions. + self._last_resolved_model: Dict[str, str] = {} # Overflow buffer for explicit /queue commands. The adapter-level # _pending_messages dict is a single slot per session (designed for # "next-turn" follow-ups where repeated sends collapse into one @@ -2488,6 +2496,32 @@ class GatewayRunner: except Exception: pass + # Final safety net (#35314): if resolution still produced an empty + # model — e.g. a transient config-cache miss during a post-interrupt + # recovery turn returned an empty user_config — reuse the last model we + # successfully resolved for this session (or, failing that, the most + # recent one resolved process-wide). Building an agent with model="" + # makes every API call fail HTTP 400 "No models provided" and the + # session goes silent until the user manually re-sends. getattr guards + # against bare test runners built via object.__new__. + _last_good = getattr(self, "_last_resolved_model", None) + if _last_good is not None: + if not model: + _recovered = _last_good.get(resolved_session_key or "") or _last_good.get("*") + if _recovered: + logger.warning( + "Empty model resolved for session=%s — recovering " + "last-known-good model %s (config read likely returned " + "empty; see #35314)", + resolved_session_key or "", _recovered, + ) + model = _recovered + elif model: + # Cache the good resolution for future recovery turns. + if resolved_session_key: + _last_good[resolved_session_key] = model + _last_good["*"] = model + return model, runtime_kwargs def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict: diff --git a/run_agent.py b/run_agent.py index 88b93a0b2..be6f466c9 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3550,6 +3550,18 @@ class AIAgent: from agent.chat_completion_helpers import try_activate_fallback return try_activate_fallback(self, reason) + def _has_pending_fallback(self) -> bool: + """Whether a fallback provider is actually available to switch to. + + Used to gate user-facing "trying fallback..." status so we don't + announce a fallback that will never be attempted (the user has no + fallback chain configured). Mirrors the early-return guard in + ``try_activate_fallback`` (#35314, #17446). + """ + chain = getattr(self, "_fallback_chain", None) or [] + index = getattr(self, "_fallback_index", 0) + return index < len(chain) + # ── Per-turn primary restoration ───────────────────────────────────── def _restore_primary_runtime(self) -> bool: diff --git a/tests/gateway/test_empty_model_recovery.py b/tests/gateway/test_empty_model_recovery.py new file mode 100644 index 000000000..2c4be4479 --- /dev/null +++ b/tests/gateway/test_empty_model_recovery.py @@ -0,0 +1,147 @@ +"""Regression tests for #35314 — empty model on the post-interrupt recovery turn. + +After a ``stream_interrupt_abort`` during an active gateway session, the recovery +turn was sometimes built with ``model=""`` (a transient config-cache miss returned +an empty ``user_config``). Every API call then failed HTTP 400 "No models +provided", "trying fallback..." was logged but never executed (the user had no +fallback configured), and the session went silent until the user re-sent. + +These tests pin two fixes: + 1. ``_resolve_session_agent_runtime`` caches the last successfully-resolved + model per session and recovers it when a fresh resolution comes back empty. + 2. ``_has_pending_fallback`` gates the "trying fallback..." status so it is only + announced when a fallback chain actually exists. +""" + +import threading + +import gateway.run as gateway_run + + +def _make_runner(): + runner = object.__new__(gateway_run.GatewayRunner) + runner._session_model_overrides = {} + runner._last_resolved_model = {} + runner._service_tier = None + runner._agent_cache = {} + runner._agent_cache_lock = threading.Lock() + return runner + + +def _patch_resolution(monkeypatch, *, model_from_config: str, provider: str = "openrouter"): + """Stub gateway model + runtime resolution to a known state.""" + monkeypatch.setattr(gateway_run, "_resolve_gateway_model", lambda cfg=None: model_from_config) + monkeypatch.setattr( + gateway_run, + "_resolve_runtime_agent_kwargs", + lambda: { + "provider": provider, + "api_key": "x", + "base_url": "https://openrouter.ai/api/v1", + "api_mode": "chat_completions", + }, + ) + + +def test_normal_turn_caches_last_resolved_model(monkeypatch): + _patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash") + runner = _make_runner() + sk = "agent:main:discord:dm:123" + + model, _ = runner._resolve_session_agent_runtime(session_key=sk, user_config={"model": {"default": "x"}}) + + assert model == "deepseek/deepseek-v4-flash" + # Cached per-session AND process-wide for first-seen-session recovery. + assert runner._last_resolved_model[sk] == "deepseek/deepseek-v4-flash" + assert runner._last_resolved_model["*"] == "deepseek/deepseek-v4-flash" + + +def test_empty_model_recovers_session_last_good(monkeypatch): + runner = _make_runner() + sk = "agent:main:discord:dm:123" + + # Turn 1: config has the model — cache it. + _patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash") + runner._resolve_session_agent_runtime(session_key=sk, user_config={"model": {"default": "x"}}) + + # Turn 2: simulate the transient empty config read (the #35314 race). + _patch_resolution(monkeypatch, model_from_config="", provider="") + model, _ = runner._resolve_session_agent_runtime(session_key=sk, user_config={}) + + assert model == "deepseek/deepseek-v4-flash", "recovery turn must reuse last-known-good, not build model=''" + + +def test_empty_model_new_session_recovers_global_last_good(monkeypatch): + runner = _make_runner() + + # Prime a different session so the process-wide "*" slot is populated. + _patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash") + runner._resolve_session_agent_runtime(session_key="agent:main:discord:dm:111", user_config={"model": {}}) + + # A brand-new session that hits an empty config read still recovers via "*". + _patch_resolution(monkeypatch, model_from_config="", provider="") + model, _ = runner._resolve_session_agent_runtime(session_key="agent:main:discord:dm:999", user_config={}) + + assert model == "deepseek/deepseek-v4-flash" + + +def test_cold_start_empty_model_does_not_crash(monkeypatch): + """No last-good anywhere + empty config → returns '' gracefully (no exception).""" + _patch_resolution(monkeypatch, model_from_config="", provider="") + runner = _make_runner() + + model, _ = runner._resolve_session_agent_runtime(session_key="agent:main:discord:dm:1", user_config={}) + + assert model == "" + + +def test_bare_runner_without_cache_attr_does_not_crash(monkeypatch): + """object.__new__ runners (test helpers / pitfall #17) lack _last_resolved_model. + + The getattr guard must tolerate the missing attribute. + """ + _patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash") + runner = object.__new__(gateway_run.GatewayRunner) + runner._session_model_overrides = {} + runner._service_tier = None + # Deliberately omit _last_resolved_model. + + model, _ = runner._resolve_session_agent_runtime(session_key="x", user_config={"model": {}}) + + assert model == "deepseek/deepseek-v4-flash" + + +# ── _has_pending_fallback gate ────────────────────────────────────────────── + + +def _bare_agent(): + import run_agent + + return object.__new__(run_agent.AIAgent) + + +def test_has_pending_fallback_empty_chain(): + agent = _bare_agent() + agent._fallback_chain = [] + agent._fallback_index = 0 + assert agent._has_pending_fallback() is False + + +def test_has_pending_fallback_with_chain(): + agent = _bare_agent() + agent._fallback_chain = [{"provider": "openai", "model": "gpt-5"}] + agent._fallback_index = 0 + assert agent._has_pending_fallback() is True + + +def test_has_pending_fallback_exhausted_chain(): + agent = _bare_agent() + agent._fallback_chain = [{"provider": "openai", "model": "gpt-5"}] + agent._fallback_index = 1 + assert agent._has_pending_fallback() is False + + +def test_has_pending_fallback_missing_attrs(): + """Bare agent with no fallback attributes set must default to False, not crash.""" + agent = _bare_agent() + assert agent._has_pending_fallback() is False