fix(model-catalog): fall through to raw.github when Vercel 403s; swap step-3.5-flash for step-3.7-flash on OpenRouter+Nous

The docs site (Vercel) serves /docs/api/model-catalog.json behind a bot
mitigation rule that returns HTTP 403 + x-vercel-mitigated: challenge for
non-browser User-Agents — including urllib (what the CLI uses) and curl.
When that happens, get_catalog() falls back to the stale disk cache and
new model releases (Opus 4.8, etc.) never reach the /model picker even
though they're already in OPENROUTER_MODELS and the live OpenRouter API.

Adds a fallback URL chain: when the primary catalog URL fails, walk
DEFAULT_CATALOG_FALLBACK_URLS — currently the raw.githubusercontent.com
copy of the same file. GitHub raw doesn't bot-gate, so the manifest stays
reachable through Vercel firewall hiccups. Per-provider override URLs
keep their direct-fetch semantics (operators configure those specifically,
no implicit fallback).

Also swaps stepfun/step-3.5-flash for stepfun/step-3.7-flash in the
OpenRouter + Nous Portal curated picker lists. Native stepfun provider
configuration (api.stepfun.ai) is left alone — that depends on what
stepfun.ai itself serves, not what OpenRouter routes.

Test plan: 5 new TestFallbackChain tests cover primary-success,
primary-failure-fallback-success, all-fail, primary==fallback-dedup, and
end-to-end get_catalog routing through the new helper. Existing 23 tests
in test_model_catalog.py still pass (28 total). Wider tests/hermes_cli/
sweep: 5701/5701 pass.
This commit is contained in:
teknium1
2026-05-28 23:58:52 -07:00
committed by Teknium
parent 8d57281650
commit f2d88c820c
4 changed files with 124 additions and 6 deletions

View File

@ -64,6 +64,15 @@ logger = logging.getLogger(__name__)
DEFAULT_CATALOG_URL = (
"https://hermes-agent.nousresearch.com/docs/api/model-catalog.json"
)
# Fallback fetch chain. The Docusaurus site is served through Vercel, which
# occasionally returns HTTP 403 + x-vercel-mitigated: challenge for non-
# browser clients (urllib, curl). When that happens the disk cache goes
# stale and new model releases never reach the picker. The raw GitHub URL
# is the same manifest published from the same repo and is not bot-gated,
# so we fall through to it whenever the primary URL fails.
DEFAULT_CATALOG_FALLBACK_URLS: tuple[str, ...] = (
"https://raw.githubusercontent.com/NousResearch/hermes-agent/main/website/static/api/model-catalog.json",
)
DEFAULT_TTL_HOURS = 24
DEFAULT_FETCH_TIMEOUT = 8.0
SUPPORTED_SCHEMA_VERSION = 1
@ -139,6 +148,31 @@ def _fetch_manifest(url: str, timeout: float) -> dict[str, Any] | None:
return data
def _fetch_manifest_with_fallback(
primary_url: str,
timeout: float,
fallback_urls: tuple[str, ...] = DEFAULT_CATALOG_FALLBACK_URLS,
) -> dict[str, Any] | None:
"""Try ``primary_url`` first, then walk ``fallback_urls``.
Returns the first manifest that fetches and validates, or None when
every URL fails. Skips fallback URLs identical to the primary so an
operator who configured the catalog URL to point at the raw GitHub
copy doesn't double-fetch.
"""
data = _fetch_manifest(primary_url, timeout)
if data is not None:
return data
for url in fallback_urls:
if not url or url == primary_url:
continue
data = _fetch_manifest(url, timeout)
if data is not None:
logger.info("model catalog primary URL failed; using fallback %s", url)
return data
return None
def _validate_manifest(data: Any) -> bool:
"""Return True when ``data`` matches the minimum manifest shape."""
if not isinstance(data, dict):
@ -235,7 +269,7 @@ def get_catalog(*, force_refresh: bool = False) -> dict[str, Any]:
return disk_data
# Need to (re)fetch. If it fails, fall back to any stale disk copy.
fetched = _fetch_manifest(cfg["url"], DEFAULT_FETCH_TIMEOUT)
fetched = _fetch_manifest_with_fallback(cfg["url"], DEFAULT_FETCH_TIMEOUT)
if fetched is not None:
_write_disk_cache(fetched)
new_disk_data, new_mtime = _read_disk_cache()

View File

@ -53,7 +53,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
("google/gemini-3.1-pro-preview", ""),
("google/gemini-3.1-flash-lite-preview", ""),
("qwen/qwen3.6-35b-a3b", ""),
("stepfun/step-3.5-flash", ""),
("stepfun/step-3.7-flash", ""),
("minimax/minimax-m2.7", ""),
("z-ai/glm-5.1", ""),
("x-ai/grok-4.20", ""),
@ -160,7 +160,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"google/gemini-3.1-pro-preview",
"google/gemini-3.1-flash-lite-preview",
"qwen/qwen3.6-35b-a3b",
"stepfun/step-3.5-flash",
"stepfun/step-3.7-flash",
"minimax/minimax-m2.7",
"z-ai/glm-5.1",
"x-ai/grok-4.3",

View File

@ -172,6 +172,90 @@ class TestFetchFailure:
assert result == manifest
class TestFallbackChain:
"""``_fetch_manifest_with_fallback`` walks ``DEFAULT_CATALOG_FALLBACK_URLS``
when the primary URL fails. Regression: the Docusaurus site behind Vercel
occasionally returns HTTP 403 + x-vercel-mitigated: challenge for urllib;
without a fallback URL the user's disk cache freezes and new model
releases (opus 4.8, etc.) never reach the picker.
"""
PRIMARY = "https://hermes-agent.nousresearch.com/docs/api/model-catalog.json"
FALLBACK = (
"https://raw.githubusercontent.com/NousResearch/hermes-agent"
"/main/website/static/api/model-catalog.json"
)
def test_uses_primary_when_it_succeeds(self, isolated_home):
from hermes_cli import model_catalog
calls: list[str] = []
def fake_fetch(url, timeout):
calls.append(url)
return _valid_manifest()
with patch.object(model_catalog, "_fetch_manifest", side_effect=fake_fetch):
result = model_catalog._fetch_manifest_with_fallback(self.PRIMARY, 5.0)
assert result is not None
assert calls == [self.PRIMARY], "fallback URLs must not be touched on primary success"
def test_falls_through_to_raw_github_on_primary_failure(self, isolated_home):
from hermes_cli import model_catalog
calls: list[str] = []
def fake_fetch(url, timeout):
calls.append(url)
if "hermes-agent.nousresearch.com" in url:
return None # simulate Vercel 403
return _valid_manifest()
with patch.object(model_catalog, "_fetch_manifest", side_effect=fake_fetch):
result = model_catalog._fetch_manifest_with_fallback(self.PRIMARY, 5.0)
assert result is not None
assert calls == [self.PRIMARY, self.FALLBACK]
def test_returns_none_when_all_urls_fail(self, isolated_home):
from hermes_cli import model_catalog
with patch.object(model_catalog, "_fetch_manifest", return_value=None) as fetch:
result = model_catalog._fetch_manifest_with_fallback(self.PRIMARY, 5.0)
assert result is None
# Primary + every fallback URL was attempted exactly once.
assert fetch.call_count == 1 + len(model_catalog.DEFAULT_CATALOG_FALLBACK_URLS)
def test_dedupes_when_primary_equals_fallback(self, isolated_home):
"""Operator who configured ``model_catalog.url`` to the raw GitHub URL
should not get a duplicate fetch from the fallback list."""
from hermes_cli import model_catalog
with patch.object(model_catalog, "_fetch_manifest", return_value=None) as fetch:
model_catalog._fetch_manifest_with_fallback(self.FALLBACK, 5.0)
assert fetch.call_count == 1, f"expected 1 call, got {fetch.call_count}"
def test_get_catalog_uses_fallback_chain(self, isolated_home):
"""End-to-end: ``get_catalog`` routes through the fallback helper so
a primary URL failure transparently produces a working catalog."""
from hermes_cli import model_catalog
manifest = _valid_manifest()
calls: list[str] = []
def fake_fetch(url, timeout):
calls.append(url)
if "hermes-agent.nousresearch.com" in url:
return None
return manifest
with patch.object(model_catalog, "_fetch_manifest", side_effect=fake_fetch):
result = model_catalog.get_catalog(force_refresh=True)
assert result == manifest
assert any("raw.githubusercontent.com" in c for c in calls)
class TestCuratedAccessors:
def test_openrouter_returns_tuples(self, isolated_home):
from hermes_cli import model_catalog

View File

@ -1,6 +1,6 @@
{
"version": 1,
"updated_at": "2026-05-28T17:19:08Z",
"updated_at": "2026-05-29T06:55:44Z",
"metadata": {
"source": "hermes-agent repo",
"docs": "https://hermes-agent.nousresearch.com/docs/reference/model-catalog"
@ -97,7 +97,7 @@
"description": ""
},
{
"id": "stepfun/step-3.5-flash",
"id": "stepfun/step-3.7-flash",
"description": ""
},
{
@ -210,7 +210,7 @@
"id": "qwen/qwen3.6-35b-a3b"
},
{
"id": "stepfun/step-3.5-flash"
"id": "stepfun/step-3.7-flash"
},
{
"id": "minimax/minimax-m2.7"