fix(gateway): recover model on post-interrupt turn; gate fallback status (#35381)

Empty model could reach the API on a recovery turn after stream_interrupt_abort,
failing HTTP 400 "No models provided" with no recovery — the session went
silent until the user manually re-sent (#35314).

- gateway/run.py: cache last-successfully-resolved model per session (+ a
  process-wide slot); when a fresh config read returns an empty model on a
  recovery turn, reuse the last-known-good instead of building model="".
- run_agent.py + agent/conversation_loop.py: only emit "trying fallback..."
  status when a fallback chain actually exists, so the UI stops announcing a
  fallback that will never run (also #17446).
- tests: empty-model recovery + _has_pending_fallback gate.
This commit is contained in:
Teknium
2026-05-30 07:28:06 -07:00
committed by GitHub
parent 10dec7c6dc
commit 2b16b756a7
4 changed files with 208 additions and 8 deletions

View File

@ -1492,7 +1492,8 @@ def run_conversation(
if retry_count >= max_retries:
# Try fallback before giving up
agent._buffer_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...")
if agent._has_pending_fallback():
agent._buffer_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...")
if agent._try_activate_fallback():
retry_count = 0
compression_attempts = 0
@ -3094,12 +3095,17 @@ def run_conversation(
) and not is_context_length_error
if is_client_error:
# Try fallback before aborting — a different provider
# may not have the same issue (rate limit, auth, etc.)
if classified.reason == FailoverReason.content_policy_blocked:
agent._buffer_status("⚠️ Provider safety filter blocked this request — trying fallback...")
else:
agent._buffer_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...")
# Try fallback before aborting — a different provider may
# not have the same issue (rate limit, auth, etc.). Only
# announce the attempt when a fallback chain actually
# exists; otherwise "trying fallback..." is a lie and the
# session looks like it's recovering when it's about to
# abort silently (#35314, #17446).
if agent._has_pending_fallback():
if classified.reason == FailoverReason.content_policy_blocked:
agent._buffer_status("⚠️ Provider safety filter blocked this request — trying fallback...")
else:
agent._buffer_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...")
if agent._try_activate_fallback():
retry_count = 0
compression_attempts = 0
@ -3242,7 +3248,8 @@ def run_conversation(
retry_count = 0
continue
# Try fallback before giving up entirely
agent._buffer_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...")
if agent._has_pending_fallback():
agent._buffer_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...")
if agent._try_activate_fallback():
retry_count = 0
compression_attempts = 0

View File

@ -1730,6 +1730,14 @@ class GatewayRunner:
self._running_agents: Dict[str, Any] = {}
self._running_agents_ts: Dict[str, float] = {} # start timestamp per session
self._pending_messages: Dict[str, str] = {} # Queued messages during interrupt
# Last successfully-resolved (non-empty) model, keyed by session. Used
# as a fallback when a fresh config read transiently returns an empty
# model (e.g. an mtime-keyed config-cache miss during a post-interrupt
# recovery turn). Without this, the agent is built with model="" and
# every API call fails HTTP 400 "No models provided" — the session
# goes silent until the user manually re-sends. See #35314. The "*"
# key holds a process-wide last-known-good for first-seen sessions.
self._last_resolved_model: Dict[str, str] = {}
# Overflow buffer for explicit /queue commands. The adapter-level
# _pending_messages dict is a single slot per session (designed for
# "next-turn" follow-ups where repeated sends collapse into one
@ -2488,6 +2496,32 @@ class GatewayRunner:
except Exception:
pass
# Final safety net (#35314): if resolution still produced an empty
# model — e.g. a transient config-cache miss during a post-interrupt
# recovery turn returned an empty user_config — reuse the last model we
# successfully resolved for this session (or, failing that, the most
# recent one resolved process-wide). Building an agent with model=""
# makes every API call fail HTTP 400 "No models provided" and the
# session goes silent until the user manually re-sends. getattr guards
# against bare test runners built via object.__new__.
_last_good = getattr(self, "_last_resolved_model", None)
if _last_good is not None:
if not model:
_recovered = _last_good.get(resolved_session_key or "") or _last_good.get("*")
if _recovered:
logger.warning(
"Empty model resolved for session=%s — recovering "
"last-known-good model %s (config read likely returned "
"empty; see #35314)",
resolved_session_key or "", _recovered,
)
model = _recovered
elif model:
# Cache the good resolution for future recovery turns.
if resolved_session_key:
_last_good[resolved_session_key] = model
_last_good["*"] = model
return model, runtime_kwargs
def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict:

View File

@ -3550,6 +3550,18 @@ class AIAgent:
from agent.chat_completion_helpers import try_activate_fallback
return try_activate_fallback(self, reason)
def _has_pending_fallback(self) -> bool:
"""Whether a fallback provider is actually available to switch to.
Used to gate user-facing "trying fallback..." status so we don't
announce a fallback that will never be attempted (the user has no
fallback chain configured). Mirrors the early-return guard in
``try_activate_fallback`` (#35314, #17446).
"""
chain = getattr(self, "_fallback_chain", None) or []
index = getattr(self, "_fallback_index", 0)
return index < len(chain)
# ── Per-turn primary restoration ─────────────────────────────────────
def _restore_primary_runtime(self) -> bool:

View File

@ -0,0 +1,147 @@
"""Regression tests for #35314 — empty model on the post-interrupt recovery turn.
After a ``stream_interrupt_abort`` during an active gateway session, the recovery
turn was sometimes built with ``model=""`` (a transient config-cache miss returned
an empty ``user_config``). Every API call then failed HTTP 400 "No models
provided", "trying fallback..." was logged but never executed (the user had no
fallback configured), and the session went silent until the user re-sent.
These tests pin two fixes:
1. ``_resolve_session_agent_runtime`` caches the last successfully-resolved
model per session and recovers it when a fresh resolution comes back empty.
2. ``_has_pending_fallback`` gates the "trying fallback..." status so it is only
announced when a fallback chain actually exists.
"""
import threading
import gateway.run as gateway_run
def _make_runner():
runner = object.__new__(gateway_run.GatewayRunner)
runner._session_model_overrides = {}
runner._last_resolved_model = {}
runner._service_tier = None
runner._agent_cache = {}
runner._agent_cache_lock = threading.Lock()
return runner
def _patch_resolution(monkeypatch, *, model_from_config: str, provider: str = "openrouter"):
"""Stub gateway model + runtime resolution to a known state."""
monkeypatch.setattr(gateway_run, "_resolve_gateway_model", lambda cfg=None: model_from_config)
monkeypatch.setattr(
gateway_run,
"_resolve_runtime_agent_kwargs",
lambda: {
"provider": provider,
"api_key": "x",
"base_url": "https://openrouter.ai/api/v1",
"api_mode": "chat_completions",
},
)
def test_normal_turn_caches_last_resolved_model(monkeypatch):
_patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
runner = _make_runner()
sk = "agent:main:discord:dm:123"
model, _ = runner._resolve_session_agent_runtime(session_key=sk, user_config={"model": {"default": "x"}})
assert model == "deepseek/deepseek-v4-flash"
# Cached per-session AND process-wide for first-seen-session recovery.
assert runner._last_resolved_model[sk] == "deepseek/deepseek-v4-flash"
assert runner._last_resolved_model["*"] == "deepseek/deepseek-v4-flash"
def test_empty_model_recovers_session_last_good(monkeypatch):
runner = _make_runner()
sk = "agent:main:discord:dm:123"
# Turn 1: config has the model — cache it.
_patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
runner._resolve_session_agent_runtime(session_key=sk, user_config={"model": {"default": "x"}})
# Turn 2: simulate the transient empty config read (the #35314 race).
_patch_resolution(monkeypatch, model_from_config="", provider="")
model, _ = runner._resolve_session_agent_runtime(session_key=sk, user_config={})
assert model == "deepseek/deepseek-v4-flash", "recovery turn must reuse last-known-good, not build model=''"
def test_empty_model_new_session_recovers_global_last_good(monkeypatch):
runner = _make_runner()
# Prime a different session so the process-wide "*" slot is populated.
_patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
runner._resolve_session_agent_runtime(session_key="agent:main:discord:dm:111", user_config={"model": {}})
# A brand-new session that hits an empty config read still recovers via "*".
_patch_resolution(monkeypatch, model_from_config="", provider="")
model, _ = runner._resolve_session_agent_runtime(session_key="agent:main:discord:dm:999", user_config={})
assert model == "deepseek/deepseek-v4-flash"
def test_cold_start_empty_model_does_not_crash(monkeypatch):
"""No last-good anywhere + empty config → returns '' gracefully (no exception)."""
_patch_resolution(monkeypatch, model_from_config="", provider="")
runner = _make_runner()
model, _ = runner._resolve_session_agent_runtime(session_key="agent:main:discord:dm:1", user_config={})
assert model == ""
def test_bare_runner_without_cache_attr_does_not_crash(monkeypatch):
"""object.__new__ runners (test helpers / pitfall #17) lack _last_resolved_model.
The getattr guard must tolerate the missing attribute.
"""
_patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
runner = object.__new__(gateway_run.GatewayRunner)
runner._session_model_overrides = {}
runner._service_tier = None
# Deliberately omit _last_resolved_model.
model, _ = runner._resolve_session_agent_runtime(session_key="x", user_config={"model": {}})
assert model == "deepseek/deepseek-v4-flash"
# ── _has_pending_fallback gate ──────────────────────────────────────────────
def _bare_agent():
import run_agent
return object.__new__(run_agent.AIAgent)
def test_has_pending_fallback_empty_chain():
agent = _bare_agent()
agent._fallback_chain = []
agent._fallback_index = 0
assert agent._has_pending_fallback() is False
def test_has_pending_fallback_with_chain():
agent = _bare_agent()
agent._fallback_chain = [{"provider": "openai", "model": "gpt-5"}]
agent._fallback_index = 0
assert agent._has_pending_fallback() is True
def test_has_pending_fallback_exhausted_chain():
agent = _bare_agent()
agent._fallback_chain = [{"provider": "openai", "model": "gpt-5"}]
agent._fallback_index = 1
assert agent._has_pending_fallback() is False
def test_has_pending_fallback_missing_attrs():
"""Bare agent with no fallback attributes set must default to False, not crash."""
agent = _bare_agent()
assert agent._has_pending_fallback() is False