fix(gateway): recover model on post-interrupt turn; gate fallback status (#35381)

Empty model could reach the API on a recovery turn after stream_interrupt_abort, failing HTTP 400 "No models provided" with no recovery — the session went silent until the user manually re-sent (#35314). - gateway/run.py: cache last-successfully-resolved model per session (+ a process-wide slot); when a fresh config read returns an empty model on a recovery turn, reuse the last-known-good instead of building model="". - run_agent.py + agent/conversation_loop.py: only emit "trying fallback..." status when a fallback chain actually exists, so the UI stops announcing a fallback that will never run (also #17446). - tests: empty-model recovery + _has_pending_fallback gate.
2026-05-30 07:28:06 -07:00
parent 10dec7c6dc
commit 2b16b756a7
4 changed files with 208 additions and 8 deletions
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@ -1492,7 +1492,8 @@ def run_conversation(
                    
                    if retry_count >= max_retries:
                        # Try fallback before giving up
-                        agent._buffer_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...")
+                        if agent._has_pending_fallback():
+                            agent._buffer_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...")
                        if agent._try_activate_fallback():
                            retry_count = 0
                            compression_attempts = 0
@ -3094,12 +3095,17 @@ def run_conversation(
                ) and not is_context_length_error

                if is_client_error:
-                    # Try fallback before aborting — a different provider
-                    # may not have the same issue (rate limit, auth, etc.)
-                    if classified.reason == FailoverReason.content_policy_blocked:
-                        agent._buffer_status("⚠️ Provider safety filter blocked this request — trying fallback...")
-                    else:
-                        agent._buffer_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...")
+                    # Try fallback before aborting — a different provider may
+                    # not have the same issue (rate limit, auth, etc.). Only
+                    # announce the attempt when a fallback chain actually
+                    # exists; otherwise "trying fallback..." is a lie and the
+                    # session looks like it's recovering when it's about to
+                    # abort silently (#35314, #17446).
+                    if agent._has_pending_fallback():
+                        if classified.reason == FailoverReason.content_policy_blocked:
+                            agent._buffer_status("⚠️ Provider safety filter blocked this request — trying fallback...")
+                        else:
+                            agent._buffer_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...")
                    if agent._try_activate_fallback():
                        retry_count = 0
                        compression_attempts = 0
@ -3242,7 +3248,8 @@ def run_conversation(
                        retry_count = 0
                        continue
                    # Try fallback before giving up entirely
-                    agent._buffer_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...")
+                    if agent._has_pending_fallback():
+                        agent._buffer_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...")
                    if agent._try_activate_fallback():
                        retry_count = 0
                        compression_attempts = 0
--- a/gateway/run.py
+++ b/gateway/run.py
@ -1730,6 +1730,14 @@ class GatewayRunner:
        self._running_agents: Dict[str, Any] = {}
        self._running_agents_ts: Dict[str, float] = {}  # start timestamp per session
        self._pending_messages: Dict[str, str] = {}  # Queued messages during interrupt
+        # Last successfully-resolved (non-empty) model, keyed by session. Used
+        # as a fallback when a fresh config read transiently returns an empty
+        # model (e.g. an mtime-keyed config-cache miss during a post-interrupt
+        # recovery turn). Without this, the agent is built with model="" and
+        # every API call fails HTTP 400 "No models provided" — the session
+        # goes silent until the user manually re-sends. See #35314. The "*"
+        # key holds a process-wide last-known-good for first-seen sessions.
+        self._last_resolved_model: Dict[str, str] = {}
        # Overflow buffer for explicit /queue commands.  The adapter-level
        # _pending_messages dict is a single slot per session (designed for
        # "next-turn" follow-ups where repeated sends collapse into one
@ -2488,6 +2496,32 @@ class GatewayRunner:
            except Exception:
                pass

+        # Final safety net (#35314): if resolution still produced an empty
+        # model — e.g. a transient config-cache miss during a post-interrupt
+        # recovery turn returned an empty user_config — reuse the last model we
+        # successfully resolved for this session (or, failing that, the most
+        # recent one resolved process-wide). Building an agent with model=""
+        # makes every API call fail HTTP 400 "No models provided" and the
+        # session goes silent until the user manually re-sends. getattr guards
+        # against bare test runners built via object.__new__.
+        _last_good = getattr(self, "_last_resolved_model", None)
+        if _last_good is not None:
+            if not model:
+                _recovered = _last_good.get(resolved_session_key or "") or _last_good.get("*")
+                if _recovered:
+                    logger.warning(
+                        "Empty model resolved for session=%s — recovering "
+                        "last-known-good model %s (config read likely returned "
+                        "empty; see #35314)",
+                        resolved_session_key or "", _recovered,
+                    )
+                    model = _recovered
+            elif model:
+                # Cache the good resolution for future recovery turns.
+                if resolved_session_key:
+                    _last_good[resolved_session_key] = model
+                _last_good["*"] = model
+
        return model, runtime_kwargs

    def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict:
--- a/run_agent.py
+++ b/run_agent.py
@ -3550,6 +3550,18 @@ class AIAgent:
        from agent.chat_completion_helpers import try_activate_fallback
        return try_activate_fallback(self, reason)

+    def _has_pending_fallback(self) -> bool:
+        """Whether a fallback provider is actually available to switch to.
+
+        Used to gate user-facing "trying fallback..." status so we don't
+        announce a fallback that will never be attempted (the user has no
+        fallback chain configured).  Mirrors the early-return guard in
+        ``try_activate_fallback`` (#35314, #17446).
+        """
+        chain = getattr(self, "_fallback_chain", None) or []
+        index = getattr(self, "_fallback_index", 0)
+        return index < len(chain)
+
    # ── Per-turn primary restoration ─────────────────────────────────────

    def _restore_primary_runtime(self) -> bool:
--- a/tests/gateway/test_empty_model_recovery.py
+++ b/tests/gateway/test_empty_model_recovery.py
@ -0,0 +1,147 @@
+"""Regression tests for #35314 — empty model on the post-interrupt recovery turn.
+
+After a ``stream_interrupt_abort`` during an active gateway session, the recovery
+turn was sometimes built with ``model=""`` (a transient config-cache miss returned
+an empty ``user_config``). Every API call then failed HTTP 400 "No models
+provided", "trying fallback..." was logged but never executed (the user had no
+fallback configured), and the session went silent until the user re-sent.
+
+These tests pin two fixes:
+  1. ``_resolve_session_agent_runtime`` caches the last successfully-resolved
+     model per session and recovers it when a fresh resolution comes back empty.
+  2. ``_has_pending_fallback`` gates the "trying fallback..." status so it is only
+     announced when a fallback chain actually exists.
+"""
+
+import threading
+
+import gateway.run as gateway_run
+
+
+def _make_runner():
+    runner = object.__new__(gateway_run.GatewayRunner)
+    runner._session_model_overrides = {}
+    runner._last_resolved_model = {}
+    runner._service_tier = None
+    runner._agent_cache = {}
+    runner._agent_cache_lock = threading.Lock()
+    return runner
+
+
+def _patch_resolution(monkeypatch, *, model_from_config: str, provider: str = "openrouter"):
+    """Stub gateway model + runtime resolution to a known state."""
+    monkeypatch.setattr(gateway_run, "_resolve_gateway_model", lambda cfg=None: model_from_config)
+    monkeypatch.setattr(
+        gateway_run,
+        "_resolve_runtime_agent_kwargs",
+        lambda: {
+            "provider": provider,
+            "api_key": "x",
+            "base_url": "https://openrouter.ai/api/v1",
+            "api_mode": "chat_completions",
+        },
+    )
+
+
+def test_normal_turn_caches_last_resolved_model(monkeypatch):
+    _patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
+    runner = _make_runner()
+    sk = "agent:main:discord:dm:123"
+
+    model, _ = runner._resolve_session_agent_runtime(session_key=sk, user_config={"model": {"default": "x"}})
+
+    assert model == "deepseek/deepseek-v4-flash"
+    # Cached per-session AND process-wide for first-seen-session recovery.
+    assert runner._last_resolved_model[sk] == "deepseek/deepseek-v4-flash"
+    assert runner._last_resolved_model["*"] == "deepseek/deepseek-v4-flash"
+
+
+def test_empty_model_recovers_session_last_good(monkeypatch):
+    runner = _make_runner()
+    sk = "agent:main:discord:dm:123"
+
+    # Turn 1: config has the model — cache it.
+    _patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
+    runner._resolve_session_agent_runtime(session_key=sk, user_config={"model": {"default": "x"}})
+
+    # Turn 2: simulate the transient empty config read (the #35314 race).
+    _patch_resolution(monkeypatch, model_from_config="", provider="")
+    model, _ = runner._resolve_session_agent_runtime(session_key=sk, user_config={})
+
+    assert model == "deepseek/deepseek-v4-flash", "recovery turn must reuse last-known-good, not build model=''"
+
+
+def test_empty_model_new_session_recovers_global_last_good(monkeypatch):
+    runner = _make_runner()
+
+    # Prime a different session so the process-wide "*" slot is populated.
+    _patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
+    runner._resolve_session_agent_runtime(session_key="agent:main:discord:dm:111", user_config={"model": {}})
+
+    # A brand-new session that hits an empty config read still recovers via "*".
+    _patch_resolution(monkeypatch, model_from_config="", provider="")
+    model, _ = runner._resolve_session_agent_runtime(session_key="agent:main:discord:dm:999", user_config={})
+
+    assert model == "deepseek/deepseek-v4-flash"
+
+
+def test_cold_start_empty_model_does_not_crash(monkeypatch):
+    """No last-good anywhere + empty config → returns '' gracefully (no exception)."""
+    _patch_resolution(monkeypatch, model_from_config="", provider="")
+    runner = _make_runner()
+
+    model, _ = runner._resolve_session_agent_runtime(session_key="agent:main:discord:dm:1", user_config={})
+
+    assert model == ""
+
+
+def test_bare_runner_without_cache_attr_does_not_crash(monkeypatch):
+    """object.__new__ runners (test helpers / pitfall #17) lack _last_resolved_model.
+
+    The getattr guard must tolerate the missing attribute.
+    """
+    _patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
+    runner = object.__new__(gateway_run.GatewayRunner)
+    runner._session_model_overrides = {}
+    runner._service_tier = None
+    # Deliberately omit _last_resolved_model.
+
+    model, _ = runner._resolve_session_agent_runtime(session_key="x", user_config={"model": {}})
+
+    assert model == "deepseek/deepseek-v4-flash"
+
+
+# ── _has_pending_fallback gate ──────────────────────────────────────────────
+
+
+def _bare_agent():
+    import run_agent
+
+    return object.__new__(run_agent.AIAgent)
+
+
+def test_has_pending_fallback_empty_chain():
+    agent = _bare_agent()
+    agent._fallback_chain = []
+    agent._fallback_index = 0
+    assert agent._has_pending_fallback() is False
+
+
+def test_has_pending_fallback_with_chain():
+    agent = _bare_agent()
+    agent._fallback_chain = [{"provider": "openai", "model": "gpt-5"}]
+    agent._fallback_index = 0
+    assert agent._has_pending_fallback() is True
+
+
+def test_has_pending_fallback_exhausted_chain():
+    agent = _bare_agent()
+    agent._fallback_chain = [{"provider": "openai", "model": "gpt-5"}]
+    agent._fallback_index = 1
+    assert agent._has_pending_fallback() is False
+
+
+def test_has_pending_fallback_missing_attrs():
+    """Bare agent with no fallback attributes set must default to False, not crash."""
+    agent = _bare_agent()
+    assert agent._has_pending_fallback() is False