fix(aux): self-heal Nous-routed calls when a pinned model leaves the catalog (#37732)

A long-lived process (gateway, watcher) caches the Nous Portal's
recommended-models payload and can pin a model for its whole lifetime.
When that model is later dropped from the Nous -> OpenRouter catalog,
every auxiliary call 404s with 'model does not exist in our
configuration or OpenRouter catalog' until the process restarts.

Now such a 404 force-refreshes the Portal recommendation and retries
once with the current pick (or the gemini-3-flash-preview default).
Scoped to Nous-routed calls only.

- _is_model_not_found_error(): 404/400 'not found / does not exist /
  not a valid model' predicate, excludes billing keywords so it never
  overlaps _is_payment_error.
- _refresh_nous_recommended_model(): force-refresh fetch, returns a
  model distinct from the one that failed, else the known-good default.
- Wired into both call_llm and async_call_llm error chains.
This commit is contained in:
Teknium
2026-06-02 17:14:36 -07:00
committed by GitHub
parent bb1c8b6f1a
commit ab2472e692
2 changed files with 236 additions and 0 deletions

View File

@ -1621,6 +1621,47 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
)
def _refresh_nous_recommended_model(
*, vision: bool, stale_model: Optional[str]
) -> Optional[str]:
"""Re-fetch the Nous Portal's recommended model after a stale-model 404.
Long-lived processes (gateway, watchers) cache the Portal's
``recommended-models`` payload for 10 minutes and, in practice, can pin a
model for the whole process lifetime. When that model is later dropped from
the Nous → OpenRouter catalog, every auxiliary call 404s with
"model does not exist". This forces a fresh Portal fetch and returns a
model name to retry with:
* the Portal's current recommendation for the task, if it differs from
the model that just failed; otherwise
* ``_NOUS_MODEL`` (google/gemini-3-flash-preview), the known-good default,
if it too differs from the failed model.
Returns ``None`` when no usable alternative is available (e.g. the Portal
still recommends the exact model that just 404'd and the default also
matches it) — callers should then let the original error propagate.
"""
stale = (stale_model or "").strip().lower()
fresh: Optional[str] = None
try:
from hermes_cli.models import get_nous_recommended_aux_model
fresh = get_nous_recommended_aux_model(vision=vision, force_refresh=True)
except Exception as exc:
logger.debug(
"Nous recommended-model refresh failed (%s); using default %s",
exc, _NOUS_MODEL,
)
if fresh and fresh.strip().lower() != stale:
return fresh
# Portal recommendation unchanged or unavailable — fall back to the
# hardcoded known-good default, but only if it's actually different.
if _NOUS_MODEL.strip().lower() != stale:
return _NOUS_MODEL
return None
def _read_main_model() -> str:
"""Read the user's configured main model from config.yaml.
@ -2451,6 +2492,46 @@ def _is_unsupported_temperature_error(exc: Exception) -> bool:
return _is_unsupported_parameter_error(exc, "temperature")
def _is_model_not_found_error(exc: Exception) -> bool:
"""Detect "the requested model doesn't exist" errors (404 / invalid model).
This fires when a resolved model name is no longer served by the endpoint
— most commonly when a long-lived process pinned a Portal-recommended model
that has since been dropped from the Nous → OpenRouter catalog. The Nous
proxy returns 404 with a body like::
Model 'gpt-5.4-mini' not found. The requested model does not exist
in our configuration or OpenRouter catalog.
Distinct from :func:`_is_payment_error` (which also matches some 404s for
free-tier/credit language) — this one keys on "does not exist / not found /
not a valid model" phrasing, and explicitly excludes the billing keywords
that the payment path already owns so the two predicates don't overlap.
"""
status = getattr(exc, "status_code", None)
err_lower = str(exc).lower()
# Billing/quota 404s belong to _is_payment_error — don't claim them here.
if any(kw in err_lower for kw in (
"credits", "insufficient funds", "billing", "out of funds",
"balance_depleted", "no usable credits", "free tier", "free-tier",
"not available on the free tier",
)):
return False
if status not in {404, 400, None}:
return False
return any(kw in err_lower for kw in (
"model does not exist",
"does not exist in our configuration",
"openrouter catalog",
"is not a valid model",
"no such model",
"model not found",
"the model `", # OpenAI-style: "The model `X` does not exist"
"model_not_found",
"unknown model",
))
def _evict_cached_clients(provider: str) -> None:
"""Drop cached auxiliary clients for a provider so fresh creds are used."""
normalized = _normalize_aux_provider(provider)
@ -5027,6 +5108,32 @@ def call_llm(
raise
first_err = retry_err
# ── Stale-model self-heal (Nous Portal recommendation drift) ───
# A long-lived process can pin a Portal-recommended model that has
# since been dropped from the Nous → OpenRouter catalog, so every
# auxiliary call 404s with "model does not exist". Force a fresh
# Portal fetch and retry once with the current recommendation (or the
# known-good default). Only applies to Nous-routed calls.
_heal_is_nous = (
resolved_provider == "nous"
or base_url_host_matches(_base_info, "inference-api.nousresearch.com")
)
if _is_model_not_found_error(first_err) and _heal_is_nous:
healed_model = _refresh_nous_recommended_model(
vision=(task == "vision"), stale_model=kwargs.get("model"))
if healed_model and healed_model != kwargs.get("model"):
logger.warning(
"Auxiliary %s: model %r no longer in Nous catalog; "
"retrying with refreshed recommendation %r",
task or "call", kwargs.get("model"), healed_model,
)
kwargs["model"] = healed_model
try:
return _validate_llm_response(
client.chat.completions.create(**kwargs), task)
except Exception as retry_err:
first_err = retry_err
# ── Nous auth refresh parity with main agent ──────────────────
client_is_nous = (
resolved_provider == "nous"
@ -5464,6 +5571,31 @@ async def async_call_llm(
raise
first_err = retry_err
# ── Stale-model self-heal (Nous Portal recommendation drift) ───
# See the sync call_llm() path for the rationale: a long-lived process
# can pin a Portal-recommended model that has since been dropped from
# the Nous → OpenRouter catalog, 404'ing every auxiliary call. Force a
# fresh Portal fetch and retry once with the current recommendation.
_heal_is_nous = (
resolved_provider == "nous"
or base_url_host_matches(_client_base, "inference-api.nousresearch.com")
)
if _is_model_not_found_error(first_err) and _heal_is_nous:
healed_model = _refresh_nous_recommended_model(
vision=(task == "vision"), stale_model=kwargs.get("model"))
if healed_model and healed_model != kwargs.get("model"):
logger.warning(
"Auxiliary %s (async): model %r no longer in Nous catalog; "
"retrying with refreshed recommendation %r",
task or "call", kwargs.get("model"), healed_model,
)
kwargs["model"] = healed_model
try:
return _validate_llm_response(
await client.chat.completions.create(**kwargs), task)
except Exception as retry_err:
first_err = retry_err
# ── Nous auth refresh parity with main agent ──────────────────
client_is_nous = (
resolved_provider == "nous"