fix(minimax): drop stale ≤204,800 cache entries for MiniMax-M3 (#36726)

M3 is 1M context, but pre-catalog builds resolved it via the generic 'minimax' catch-all (204,800) and persisted that to the context-length cache. Step 1 of get_model_context_length returned the cached value directly before reaching the 'minimax-m3' (1M) catalog entry, so users who first probed M3 on an older build were stuck at 204K forever (e.g. /new in the Telegram gateway showing 'Context: 204K tokens (detected)'). Mirror the existing Kimi/Codex stale-cache guards: when a cached entry for a minimax-m3 slug is <= 204,800, drop it and re-resolve. M2.x slugs (correctly 204,800) are untouched since they don't match the M3 name.
2026-06-01 14:59:07 -07:00
parent b9646276fd
commit 1ffa22ee6b
2 changed files with 91 additions and 2 deletions
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@ -1128,6 +1128,18 @@ def _model_name_suggests_kimi(model: str) -> bool:
    return lower.startswith("kimi") or "moonshot" in lower


+def _model_name_suggests_minimax_m3(model: str) -> bool:
+    """Return True if the model name looks like MiniMax M3.
+
+    Catches ``MiniMax-M3``, ``minimax/minimax-m3``, and similar variants
+    across surfaces (native MiniMax-M3, OpenRouter/Nous minimax/minimax-m3).
+    Used as a guard against stale cache entries seeded by pre-catalog builds
+    that resolved M3 via the generic ``minimax`` catch-all (204,800) before
+    the ``minimax-m3`` (1M) entry existed in DEFAULT_CONTEXT_LENGTHS.
+    """
+    return "minimax-m3" in model.lower()
+
+
 def _query_local_context_length(model: str, base_url: str, api_key: str = "") -> Optional[int]:
    """Query a local server for the model's context length."""
    import httpx
@ -1539,6 +1551,19 @@ def get_model_context_length(
                    model, base_url, f"{cached:,}",
                )
                _invalidate_cached_context_length(model, base_url)
+            # Invalidate stale ≤204,800 cache entries for MiniMax-M3.  Pre-catalog
+            # builds resolved M3 via the generic ``minimax`` catch-all (204,800)
+            # and persisted it before the ``minimax-m3`` (1M) entry existed; that
+            # stale value would otherwise stick forever here at step 1.  M3 is 1M,
+            # so any sub-256K cached value for an M3 slug is a leftover — drop it
+            # and fall through to the hardcoded default.
+            elif cached <= 204_800 and _model_name_suggests_minimax_m3(model):
+                logger.info(
+                    "Dropping stale MiniMax-M3 cache entry %s@%s -> %s (pre-catalog value); "
+                    "re-resolving via hardcoded defaults",
+                    model, base_url, f"{cached:,}",
+                )
+                _invalidate_cached_context_length(model, base_url)
            # Nous Portal: the portal /v1/models endpoint is authoritative.
            # Bypass the persistent cache so step 5b can always reconcile
            # against it — this corrects pre-fix entries seeded from the
--- a/tests/agent/test_minimax_provider.py
+++ b/tests/agent/test_minimax_provider.py
@ -4,8 +4,9 @@ from unittest.mock import patch


 class TestMinimaxContextLengths:
-    """Verify context length entries match official docs (204,800 for all models).
+    """Verify context length entries match official docs.

+    M2.x series is 204,800; M3 is 1M (max output 512K).
    Source: https://platform.minimax.io/docs/api-reference/text-anthropic-api
    """

@ -15,11 +16,74 @@ class TestMinimaxContextLengths:

    def test_minimax_models_resolve_via_prefix(self):
        from agent.model_metadata import get_model_context_length
-        # All MiniMax models should resolve to 204,800 via the "minimax" prefix
+        # M2.x models resolve to 204,800 via the "minimax" catch-all
        for model in ("MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"):
            ctx = get_model_context_length(model, "")
            assert ctx == 204_800, f"{model} expected 204800, got {ctx}"

+    def test_minimax_m3_resolves_to_1m(self):
+        from agent.model_metadata import get_model_context_length
+        # M3 must beat the generic "minimax" catch-all (204,800) and resolve to
+        # a 1M-class context. The exact value depends on the source: our
+        # hardcoded catalog says 1,000,000; the OpenRouter catalog reports
+        # 1,048,576 (1024²). Either is correct — assert "≥ 1M, not 204,800".
+        for model in ("MiniMax-M3", "minimax/minimax-m3", "minimax-m3"):
+            ctx = get_model_context_length(model, "")
+            assert ctx >= 1_000_000, f"{model} expected 1M-class, got {ctx}"
+
+
+class TestMinimaxM3StaleCacheGuard:
+    """Pre-catalog builds resolved M3 via the generic 'minimax' catch-all
+    (204,800) and persisted it before the 'minimax-m3' (1M) catalog entry
+    existed.  The step-1 cache guard must drop that stale value and re-resolve
+    to 1M, while leaving correct M2.x entries (204,800) untouched.
+    """
+
+    def test_suggests_minimax_m3(self):
+        from agent.model_metadata import _model_name_suggests_minimax_m3
+        assert _model_name_suggests_minimax_m3("MiniMax-M3")
+        assert _model_name_suggests_minimax_m3("minimax/minimax-m3")
+        assert not _model_name_suggests_minimax_m3("MiniMax-M2.7")
+        assert not _model_name_suggests_minimax_m3("MiniMax-M2.5")
+
+    def test_stale_m3_cache_dropped_and_reresolves_to_1m(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        import importlib
+        import agent.model_metadata as mm
+        importlib.reload(mm)
+        base = "https://api.minimaxi.com/anthropic"
+        mm.save_context_length("MiniMax-M3", base, 204_800)
+        ctx = mm.get_model_context_length(
+            "MiniMax-M3", base_url=base, api_key="", provider="minimax-cn"
+        )
+        assert ctx == 1_000_000
+
+    def test_correct_m3_cache_preserved(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        import importlib
+        import agent.model_metadata as mm
+        importlib.reload(mm)
+        base = "https://api.minimaxi.com/anthropic"
+        mm.save_context_length("MiniMax-M3", base, 1_000_000)
+        ctx = mm.get_model_context_length(
+            "MiniMax-M3", base_url=base, api_key="", provider="minimax-cn"
+        )
+        assert ctx == 1_000_000
+
+    def test_m2_cache_not_clobbered(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        import importlib
+        import agent.model_metadata as mm
+        importlib.reload(mm)
+        base = "https://api.minimaxi.com/anthropic"
+        # 204,800 is the CORRECT value for M2.x — guard must not touch it.
+        for slug in ("MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1"):
+            mm.save_context_length(slug, base, 204_800)
+            ctx = mm.get_model_context_length(
+                slug, base_url=base, api_key="", provider="minimax-cn"
+            )
+            assert ctx == 204_800, f"{slug} should stay 204800, got {ctx}"
+


 class TestMinimaxThinkingSupport: