fix(gateway): recover model on post-interrupt turn; gate fallback status (#35381)
Empty model could reach the API on a recovery turn after stream_interrupt_abort, failing HTTP 400 "No models provided" with no recovery — the session went silent until the user manually re-sent (#35314). - gateway/run.py: cache last-successfully-resolved model per session (+ a process-wide slot); when a fresh config read returns an empty model on a recovery turn, reuse the last-known-good instead of building model="". - run_agent.py + agent/conversation_loop.py: only emit "trying fallback..." status when a fallback chain actually exists, so the UI stops announcing a fallback that will never run (also #17446). - tests: empty-model recovery + _has_pending_fallback gate.
This commit is contained in:
@ -1492,7 +1492,8 @@ def run_conversation(
|
||||
|
||||
if retry_count >= max_retries:
|
||||
# Try fallback before giving up
|
||||
agent._buffer_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...")
|
||||
if agent._has_pending_fallback():
|
||||
agent._buffer_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...")
|
||||
if agent._try_activate_fallback():
|
||||
retry_count = 0
|
||||
compression_attempts = 0
|
||||
@ -3094,12 +3095,17 @@ def run_conversation(
|
||||
) and not is_context_length_error
|
||||
|
||||
if is_client_error:
|
||||
# Try fallback before aborting — a different provider
|
||||
# may not have the same issue (rate limit, auth, etc.)
|
||||
if classified.reason == FailoverReason.content_policy_blocked:
|
||||
agent._buffer_status("⚠️ Provider safety filter blocked this request — trying fallback...")
|
||||
else:
|
||||
agent._buffer_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...")
|
||||
# Try fallback before aborting — a different provider may
|
||||
# not have the same issue (rate limit, auth, etc.). Only
|
||||
# announce the attempt when a fallback chain actually
|
||||
# exists; otherwise "trying fallback..." is a lie and the
|
||||
# session looks like it's recovering when it's about to
|
||||
# abort silently (#35314, #17446).
|
||||
if agent._has_pending_fallback():
|
||||
if classified.reason == FailoverReason.content_policy_blocked:
|
||||
agent._buffer_status("⚠️ Provider safety filter blocked this request — trying fallback...")
|
||||
else:
|
||||
agent._buffer_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...")
|
||||
if agent._try_activate_fallback():
|
||||
retry_count = 0
|
||||
compression_attempts = 0
|
||||
@ -3242,7 +3248,8 @@ def run_conversation(
|
||||
retry_count = 0
|
||||
continue
|
||||
# Try fallback before giving up entirely
|
||||
agent._buffer_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...")
|
||||
if agent._has_pending_fallback():
|
||||
agent._buffer_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...")
|
||||
if agent._try_activate_fallback():
|
||||
retry_count = 0
|
||||
compression_attempts = 0
|
||||
|
||||
@ -1730,6 +1730,14 @@ class GatewayRunner:
|
||||
self._running_agents: Dict[str, Any] = {}
|
||||
self._running_agents_ts: Dict[str, float] = {} # start timestamp per session
|
||||
self._pending_messages: Dict[str, str] = {} # Queued messages during interrupt
|
||||
# Last successfully-resolved (non-empty) model, keyed by session. Used
|
||||
# as a fallback when a fresh config read transiently returns an empty
|
||||
# model (e.g. an mtime-keyed config-cache miss during a post-interrupt
|
||||
# recovery turn). Without this, the agent is built with model="" and
|
||||
# every API call fails HTTP 400 "No models provided" — the session
|
||||
# goes silent until the user manually re-sends. See #35314. The "*"
|
||||
# key holds a process-wide last-known-good for first-seen sessions.
|
||||
self._last_resolved_model: Dict[str, str] = {}
|
||||
# Overflow buffer for explicit /queue commands. The adapter-level
|
||||
# _pending_messages dict is a single slot per session (designed for
|
||||
# "next-turn" follow-ups where repeated sends collapse into one
|
||||
@ -2488,6 +2496,32 @@ class GatewayRunner:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Final safety net (#35314): if resolution still produced an empty
|
||||
# model — e.g. a transient config-cache miss during a post-interrupt
|
||||
# recovery turn returned an empty user_config — reuse the last model we
|
||||
# successfully resolved for this session (or, failing that, the most
|
||||
# recent one resolved process-wide). Building an agent with model=""
|
||||
# makes every API call fail HTTP 400 "No models provided" and the
|
||||
# session goes silent until the user manually re-sends. getattr guards
|
||||
# against bare test runners built via object.__new__.
|
||||
_last_good = getattr(self, "_last_resolved_model", None)
|
||||
if _last_good is not None:
|
||||
if not model:
|
||||
_recovered = _last_good.get(resolved_session_key or "") or _last_good.get("*")
|
||||
if _recovered:
|
||||
logger.warning(
|
||||
"Empty model resolved for session=%s — recovering "
|
||||
"last-known-good model %s (config read likely returned "
|
||||
"empty; see #35314)",
|
||||
resolved_session_key or "", _recovered,
|
||||
)
|
||||
model = _recovered
|
||||
elif model:
|
||||
# Cache the good resolution for future recovery turns.
|
||||
if resolved_session_key:
|
||||
_last_good[resolved_session_key] = model
|
||||
_last_good["*"] = model
|
||||
|
||||
return model, runtime_kwargs
|
||||
|
||||
def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict:
|
||||
|
||||
12
run_agent.py
12
run_agent.py
@ -3550,6 +3550,18 @@ class AIAgent:
|
||||
from agent.chat_completion_helpers import try_activate_fallback
|
||||
return try_activate_fallback(self, reason)
|
||||
|
||||
def _has_pending_fallback(self) -> bool:
|
||||
"""Whether a fallback provider is actually available to switch to.
|
||||
|
||||
Used to gate user-facing "trying fallback..." status so we don't
|
||||
announce a fallback that will never be attempted (the user has no
|
||||
fallback chain configured). Mirrors the early-return guard in
|
||||
``try_activate_fallback`` (#35314, #17446).
|
||||
"""
|
||||
chain = getattr(self, "_fallback_chain", None) or []
|
||||
index = getattr(self, "_fallback_index", 0)
|
||||
return index < len(chain)
|
||||
|
||||
# ── Per-turn primary restoration ─────────────────────────────────────
|
||||
|
||||
def _restore_primary_runtime(self) -> bool:
|
||||
|
||||
147
tests/gateway/test_empty_model_recovery.py
Normal file
147
tests/gateway/test_empty_model_recovery.py
Normal file
@ -0,0 +1,147 @@
|
||||
"""Regression tests for #35314 — empty model on the post-interrupt recovery turn.
|
||||
|
||||
After a ``stream_interrupt_abort`` during an active gateway session, the recovery
|
||||
turn was sometimes built with ``model=""`` (a transient config-cache miss returned
|
||||
an empty ``user_config``). Every API call then failed HTTP 400 "No models
|
||||
provided", "trying fallback..." was logged but never executed (the user had no
|
||||
fallback configured), and the session went silent until the user re-sent.
|
||||
|
||||
These tests pin two fixes:
|
||||
1. ``_resolve_session_agent_runtime`` caches the last successfully-resolved
|
||||
model per session and recovers it when a fresh resolution comes back empty.
|
||||
2. ``_has_pending_fallback`` gates the "trying fallback..." status so it is only
|
||||
announced when a fallback chain actually exists.
|
||||
"""
|
||||
|
||||
import threading
|
||||
|
||||
import gateway.run as gateway_run
|
||||
|
||||
|
||||
def _make_runner():
|
||||
runner = object.__new__(gateway_run.GatewayRunner)
|
||||
runner._session_model_overrides = {}
|
||||
runner._last_resolved_model = {}
|
||||
runner._service_tier = None
|
||||
runner._agent_cache = {}
|
||||
runner._agent_cache_lock = threading.Lock()
|
||||
return runner
|
||||
|
||||
|
||||
def _patch_resolution(monkeypatch, *, model_from_config: str, provider: str = "openrouter"):
|
||||
"""Stub gateway model + runtime resolution to a known state."""
|
||||
monkeypatch.setattr(gateway_run, "_resolve_gateway_model", lambda cfg=None: model_from_config)
|
||||
monkeypatch.setattr(
|
||||
gateway_run,
|
||||
"_resolve_runtime_agent_kwargs",
|
||||
lambda: {
|
||||
"provider": provider,
|
||||
"api_key": "x",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_mode": "chat_completions",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def test_normal_turn_caches_last_resolved_model(monkeypatch):
|
||||
_patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
|
||||
runner = _make_runner()
|
||||
sk = "agent:main:discord:dm:123"
|
||||
|
||||
model, _ = runner._resolve_session_agent_runtime(session_key=sk, user_config={"model": {"default": "x"}})
|
||||
|
||||
assert model == "deepseek/deepseek-v4-flash"
|
||||
# Cached per-session AND process-wide for first-seen-session recovery.
|
||||
assert runner._last_resolved_model[sk] == "deepseek/deepseek-v4-flash"
|
||||
assert runner._last_resolved_model["*"] == "deepseek/deepseek-v4-flash"
|
||||
|
||||
|
||||
def test_empty_model_recovers_session_last_good(monkeypatch):
|
||||
runner = _make_runner()
|
||||
sk = "agent:main:discord:dm:123"
|
||||
|
||||
# Turn 1: config has the model — cache it.
|
||||
_patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
|
||||
runner._resolve_session_agent_runtime(session_key=sk, user_config={"model": {"default": "x"}})
|
||||
|
||||
# Turn 2: simulate the transient empty config read (the #35314 race).
|
||||
_patch_resolution(monkeypatch, model_from_config="", provider="")
|
||||
model, _ = runner._resolve_session_agent_runtime(session_key=sk, user_config={})
|
||||
|
||||
assert model == "deepseek/deepseek-v4-flash", "recovery turn must reuse last-known-good, not build model=''"
|
||||
|
||||
|
||||
def test_empty_model_new_session_recovers_global_last_good(monkeypatch):
|
||||
runner = _make_runner()
|
||||
|
||||
# Prime a different session so the process-wide "*" slot is populated.
|
||||
_patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
|
||||
runner._resolve_session_agent_runtime(session_key="agent:main:discord:dm:111", user_config={"model": {}})
|
||||
|
||||
# A brand-new session that hits an empty config read still recovers via "*".
|
||||
_patch_resolution(monkeypatch, model_from_config="", provider="")
|
||||
model, _ = runner._resolve_session_agent_runtime(session_key="agent:main:discord:dm:999", user_config={})
|
||||
|
||||
assert model == "deepseek/deepseek-v4-flash"
|
||||
|
||||
|
||||
def test_cold_start_empty_model_does_not_crash(monkeypatch):
|
||||
"""No last-good anywhere + empty config → returns '' gracefully (no exception)."""
|
||||
_patch_resolution(monkeypatch, model_from_config="", provider="")
|
||||
runner = _make_runner()
|
||||
|
||||
model, _ = runner._resolve_session_agent_runtime(session_key="agent:main:discord:dm:1", user_config={})
|
||||
|
||||
assert model == ""
|
||||
|
||||
|
||||
def test_bare_runner_without_cache_attr_does_not_crash(monkeypatch):
|
||||
"""object.__new__ runners (test helpers / pitfall #17) lack _last_resolved_model.
|
||||
|
||||
The getattr guard must tolerate the missing attribute.
|
||||
"""
|
||||
_patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
|
||||
runner = object.__new__(gateway_run.GatewayRunner)
|
||||
runner._session_model_overrides = {}
|
||||
runner._service_tier = None
|
||||
# Deliberately omit _last_resolved_model.
|
||||
|
||||
model, _ = runner._resolve_session_agent_runtime(session_key="x", user_config={"model": {}})
|
||||
|
||||
assert model == "deepseek/deepseek-v4-flash"
|
||||
|
||||
|
||||
# ── _has_pending_fallback gate ──────────────────────────────────────────────
|
||||
|
||||
|
||||
def _bare_agent():
|
||||
import run_agent
|
||||
|
||||
return object.__new__(run_agent.AIAgent)
|
||||
|
||||
|
||||
def test_has_pending_fallback_empty_chain():
|
||||
agent = _bare_agent()
|
||||
agent._fallback_chain = []
|
||||
agent._fallback_index = 0
|
||||
assert agent._has_pending_fallback() is False
|
||||
|
||||
|
||||
def test_has_pending_fallback_with_chain():
|
||||
agent = _bare_agent()
|
||||
agent._fallback_chain = [{"provider": "openai", "model": "gpt-5"}]
|
||||
agent._fallback_index = 0
|
||||
assert agent._has_pending_fallback() is True
|
||||
|
||||
|
||||
def test_has_pending_fallback_exhausted_chain():
|
||||
agent = _bare_agent()
|
||||
agent._fallback_chain = [{"provider": "openai", "model": "gpt-5"}]
|
||||
agent._fallback_index = 1
|
||||
assert agent._has_pending_fallback() is False
|
||||
|
||||
|
||||
def test_has_pending_fallback_missing_attrs():
|
||||
"""Bare agent with no fallback attributes set must default to False, not crash."""
|
||||
agent = _bare_agent()
|
||||
assert agent._has_pending_fallback() is False
|
||||
Reference in New Issue
Block a user