From ab2472e6924269840a9918ecb0887d42d3c1a1f6 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 2 Jun 2026 17:14:36 -0700 Subject: [PATCH] fix(aux): self-heal Nous-routed calls when a pinned model leaves the catalog (#37732) A long-lived process (gateway, watcher) caches the Nous Portal's recommended-models payload and can pin a model for its whole lifetime. When that model is later dropped from the Nous -> OpenRouter catalog, every auxiliary call 404s with 'model does not exist in our configuration or OpenRouter catalog' until the process restarts. Now such a 404 force-refreshes the Portal recommendation and retries once with the current pick (or the gemini-3-flash-preview default). Scoped to Nous-routed calls only. - _is_model_not_found_error(): 404/400 'not found / does not exist / not a valid model' predicate, excludes billing keywords so it never overlaps _is_payment_error. - _refresh_nous_recommended_model(): force-refresh fetch, returns a model distinct from the one that failed, else the known-good default. - Wired into both call_llm and async_call_llm error chains. --- agent/auxiliary_client.py | 132 +++++++++++++++++++++++++++ tests/agent/test_auxiliary_client.py | 104 +++++++++++++++++++++ 2 files changed, 236 insertions(+) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 4c8877232..961e30313 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -1621,6 +1621,47 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]: ) +def _refresh_nous_recommended_model( + *, vision: bool, stale_model: Optional[str] +) -> Optional[str]: + """Re-fetch the Nous Portal's recommended model after a stale-model 404. + + Long-lived processes (gateway, watchers) cache the Portal's + ``recommended-models`` payload for 10 minutes and, in practice, can pin a + model for the whole process lifetime. When that model is later dropped from + the Nous → OpenRouter catalog, every auxiliary call 404s with + "model does not exist". This forces a fresh Portal fetch and returns a + model name to retry with: + + * the Portal's current recommendation for the task, if it differs from + the model that just failed; otherwise + * ``_NOUS_MODEL`` (google/gemini-3-flash-preview), the known-good default, + if it too differs from the failed model. + + Returns ``None`` when no usable alternative is available (e.g. the Portal + still recommends the exact model that just 404'd and the default also + matches it) — callers should then let the original error propagate. + """ + stale = (stale_model or "").strip().lower() + fresh: Optional[str] = None + try: + from hermes_cli.models import get_nous_recommended_aux_model + + fresh = get_nous_recommended_aux_model(vision=vision, force_refresh=True) + except Exception as exc: + logger.debug( + "Nous recommended-model refresh failed (%s); using default %s", + exc, _NOUS_MODEL, + ) + if fresh and fresh.strip().lower() != stale: + return fresh + # Portal recommendation unchanged or unavailable — fall back to the + # hardcoded known-good default, but only if it's actually different. + if _NOUS_MODEL.strip().lower() != stale: + return _NOUS_MODEL + return None + + def _read_main_model() -> str: """Read the user's configured main model from config.yaml. @@ -2451,6 +2492,46 @@ def _is_unsupported_temperature_error(exc: Exception) -> bool: return _is_unsupported_parameter_error(exc, "temperature") +def _is_model_not_found_error(exc: Exception) -> bool: + """Detect "the requested model doesn't exist" errors (404 / invalid model). + + This fires when a resolved model name is no longer served by the endpoint + — most commonly when a long-lived process pinned a Portal-recommended model + that has since been dropped from the Nous → OpenRouter catalog. The Nous + proxy returns 404 with a body like:: + + Model 'gpt-5.4-mini' not found. The requested model does not exist + in our configuration or OpenRouter catalog. + + Distinct from :func:`_is_payment_error` (which also matches some 404s for + free-tier/credit language) — this one keys on "does not exist / not found / + not a valid model" phrasing, and explicitly excludes the billing keywords + that the payment path already owns so the two predicates don't overlap. + """ + status = getattr(exc, "status_code", None) + err_lower = str(exc).lower() + # Billing/quota 404s belong to _is_payment_error — don't claim them here. + if any(kw in err_lower for kw in ( + "credits", "insufficient funds", "billing", "out of funds", + "balance_depleted", "no usable credits", "free tier", "free-tier", + "not available on the free tier", + )): + return False + if status not in {404, 400, None}: + return False + return any(kw in err_lower for kw in ( + "model does not exist", + "does not exist in our configuration", + "openrouter catalog", + "is not a valid model", + "no such model", + "model not found", + "the model `", # OpenAI-style: "The model `X` does not exist" + "model_not_found", + "unknown model", + )) + + def _evict_cached_clients(provider: str) -> None: """Drop cached auxiliary clients for a provider so fresh creds are used.""" normalized = _normalize_aux_provider(provider) @@ -5027,6 +5108,32 @@ def call_llm( raise first_err = retry_err + # ── Stale-model self-heal (Nous Portal recommendation drift) ─── + # A long-lived process can pin a Portal-recommended model that has + # since been dropped from the Nous → OpenRouter catalog, so every + # auxiliary call 404s with "model does not exist". Force a fresh + # Portal fetch and retry once with the current recommendation (or the + # known-good default). Only applies to Nous-routed calls. + _heal_is_nous = ( + resolved_provider == "nous" + or base_url_host_matches(_base_info, "inference-api.nousresearch.com") + ) + if _is_model_not_found_error(first_err) and _heal_is_nous: + healed_model = _refresh_nous_recommended_model( + vision=(task == "vision"), stale_model=kwargs.get("model")) + if healed_model and healed_model != kwargs.get("model"): + logger.warning( + "Auxiliary %s: model %r no longer in Nous catalog; " + "retrying with refreshed recommendation %r", + task or "call", kwargs.get("model"), healed_model, + ) + kwargs["model"] = healed_model + try: + return _validate_llm_response( + client.chat.completions.create(**kwargs), task) + except Exception as retry_err: + first_err = retry_err + # ── Nous auth refresh parity with main agent ────────────────── client_is_nous = ( resolved_provider == "nous" @@ -5464,6 +5571,31 @@ async def async_call_llm( raise first_err = retry_err + # ── Stale-model self-heal (Nous Portal recommendation drift) ─── + # See the sync call_llm() path for the rationale: a long-lived process + # can pin a Portal-recommended model that has since been dropped from + # the Nous → OpenRouter catalog, 404'ing every auxiliary call. Force a + # fresh Portal fetch and retry once with the current recommendation. + _heal_is_nous = ( + resolved_provider == "nous" + or base_url_host_matches(_client_base, "inference-api.nousresearch.com") + ) + if _is_model_not_found_error(first_err) and _heal_is_nous: + healed_model = _refresh_nous_recommended_model( + vision=(task == "vision"), stale_model=kwargs.get("model")) + if healed_model and healed_model != kwargs.get("model"): + logger.warning( + "Auxiliary %s (async): model %r no longer in Nous catalog; " + "retrying with refreshed recommendation %r", + task or "call", kwargs.get("model"), healed_model, + ) + kwargs["model"] = healed_model + try: + return _validate_llm_response( + await client.chat.completions.create(**kwargs), task) + except Exception as retry_err: + first_err = retry_err + # ── Nous auth refresh parity with main agent ────────────────── client_is_nous = ( resolved_provider == "nous" diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 97c3a7f6b..e0c348378 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -22,6 +22,8 @@ from agent.auxiliary_client import ( _get_provider_chain, _is_payment_error, _is_rate_limit_error, + _is_model_not_found_error, + _refresh_nous_recommended_model, _normalize_aux_provider, _try_payment_fallback, _resolve_auto, @@ -1298,6 +1300,108 @@ class TestIsPaymentError: assert _is_payment_error(exc) is False +class TestIsModelNotFoundError: + """_is_model_not_found_error detects stale/invalid model 404s, distinct + from payment errors.""" + + def test_nous_openrouter_catalog_404(self): + """The exact incident error: a Portal-recommended model dropped from + the Nous → OpenRouter catalog.""" + exc = Exception( + "Model 'gpt-5.4-mini' not found. The requested model does not " + "exist in our configuration or OpenRouter catalog." + ) + exc.status_code = 404 + assert _is_model_not_found_error(exc) is True + + def test_openai_style_model_does_not_exist(self): + exc = Exception("The model `gpt-9-turbo` does not exist") + exc.status_code = 404 + assert _is_model_not_found_error(exc) is True + + def test_invalid_model_id_400(self): + exc = Exception("openrouter/foo/bar is not a valid model ID") + exc.status_code = 400 + assert _is_model_not_found_error(exc) is True + + def test_no_such_model(self): + exc = Exception("no such model: phantom-v1") + exc.status_code = 400 + assert _is_model_not_found_error(exc) is True + + def test_billing_404_is_not_model_not_found(self): + """Free-tier / credit 404s belong to _is_payment_error, not here — + the two predicates must not overlap.""" + exc = Exception( + "Model 'gpt-5' is not available on the free tier. Upgrade." + ) + exc.status_code = 404 + assert _is_model_not_found_error(exc) is False + assert _is_payment_error(exc) is True + + def test_out_of_funds_404_is_not_model_not_found(self): + exc = Exception( + "Your API key is blocked or out of funds. model_not_found" + ) + exc.status_code = 404 + # billing keyword wins — payment owns it + assert _is_model_not_found_error(exc) is False + + def test_rate_limit_is_not_model_not_found(self): + exc = Exception("rate limit exceeded, retry after 5s") + exc.status_code = 429 + assert _is_model_not_found_error(exc) is False + + def test_500_is_not_model_not_found(self): + exc = Exception("model does not exist") # right phrase, wrong status + exc.status_code = 500 + assert _is_model_not_found_error(exc) is False + + +class TestRefreshNousRecommendedModel: + """_refresh_nous_recommended_model picks a fresh model after a stale 404.""" + + def test_returns_fresh_portal_recommendation(self, monkeypatch): + monkeypatch.setattr( + "hermes_cli.models.get_nous_recommended_aux_model", + lambda **kw: "stepfun/step-3.7-flash:free", + ) + out = _refresh_nous_recommended_model( + vision=True, stale_model="openai/gpt-5.4-mini") + assert out == "stepfun/step-3.7-flash:free" + + def test_falls_back_to_default_when_portal_matches_stale(self, monkeypatch): + """If the Portal still recommends the model that just 404'd, fall back + to the known-good default.""" + monkeypatch.setattr( + "hermes_cli.models.get_nous_recommended_aux_model", + lambda **kw: "openai/gpt-5.4-mini", + ) + out = _refresh_nous_recommended_model( + vision=True, stale_model="openai/gpt-5.4-mini") + assert out == "google/gemini-3-flash-preview" + + def test_falls_back_to_default_when_portal_unavailable(self, monkeypatch): + def _boom(**kw): + raise RuntimeError("portal down") + monkeypatch.setattr( + "hermes_cli.models.get_nous_recommended_aux_model", _boom) + out = _refresh_nous_recommended_model( + vision=False, stale_model="some/dead-model") + assert out == "google/gemini-3-flash-preview" + + def test_returns_none_when_no_distinct_alternative(self, monkeypatch): + """When the failed model IS the default and the Portal has nothing + else, there's no usable alternative.""" + monkeypatch.setattr( + "hermes_cli.models.get_nous_recommended_aux_model", + lambda **kw: "google/gemini-3-flash-preview", + ) + out = _refresh_nous_recommended_model( + vision=False, stale_model="google/gemini-3-flash-preview") + assert out is None + + class TestIsRateLimitError: """_is_rate_limit_error detects 429 rate-limit errors warranting fallback."""