From 1ffa22ee6b6aee20c82cd44c2593c8cfceca260f Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 1 Jun 2026 14:59:07 -0700 Subject: [PATCH] =?UTF-8?q?fix(minimax):=20drop=20stale=20=E2=89=A4204,800?= =?UTF-8?q?=20cache=20entries=20for=20MiniMax-M3=20(#36726)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit M3 is 1M context, but pre-catalog builds resolved it via the generic 'minimax' catch-all (204,800) and persisted that to the context-length cache. Step 1 of get_model_context_length returned the cached value directly before reaching the 'minimax-m3' (1M) catalog entry, so users who first probed M3 on an older build were stuck at 204K forever (e.g. /new in the Telegram gateway showing 'Context: 204K tokens (detected)'). Mirror the existing Kimi/Codex stale-cache guards: when a cached entry for a minimax-m3 slug is <= 204,800, drop it and re-resolve. M2.x slugs (correctly 204,800) are untouched since they don't match the M3 name. --- agent/model_metadata.py | 25 ++++++++++ tests/agent/test_minimax_provider.py | 68 +++++++++++++++++++++++++++- 2 files changed, 91 insertions(+), 2 deletions(-) diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 38183aab6..831f26937 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -1128,6 +1128,18 @@ def _model_name_suggests_kimi(model: str) -> bool: return lower.startswith("kimi") or "moonshot" in lower +def _model_name_suggests_minimax_m3(model: str) -> bool: + """Return True if the model name looks like MiniMax M3. + + Catches ``MiniMax-M3``, ``minimax/minimax-m3``, and similar variants + across surfaces (native MiniMax-M3, OpenRouter/Nous minimax/minimax-m3). + Used as a guard against stale cache entries seeded by pre-catalog builds + that resolved M3 via the generic ``minimax`` catch-all (204,800) before + the ``minimax-m3`` (1M) entry existed in DEFAULT_CONTEXT_LENGTHS. + """ + return "minimax-m3" in model.lower() + + def _query_local_context_length(model: str, base_url: str, api_key: str = "") -> Optional[int]: """Query a local server for the model's context length.""" import httpx @@ -1539,6 +1551,19 @@ def get_model_context_length( model, base_url, f"{cached:,}", ) _invalidate_cached_context_length(model, base_url) + # Invalidate stale ≤204,800 cache entries for MiniMax-M3. Pre-catalog + # builds resolved M3 via the generic ``minimax`` catch-all (204,800) + # and persisted it before the ``minimax-m3`` (1M) entry existed; that + # stale value would otherwise stick forever here at step 1. M3 is 1M, + # so any sub-256K cached value for an M3 slug is a leftover — drop it + # and fall through to the hardcoded default. + elif cached <= 204_800 and _model_name_suggests_minimax_m3(model): + logger.info( + "Dropping stale MiniMax-M3 cache entry %s@%s -> %s (pre-catalog value); " + "re-resolving via hardcoded defaults", + model, base_url, f"{cached:,}", + ) + _invalidate_cached_context_length(model, base_url) # Nous Portal: the portal /v1/models endpoint is authoritative. # Bypass the persistent cache so step 5b can always reconcile # against it — this corrects pre-fix entries seeded from the diff --git a/tests/agent/test_minimax_provider.py b/tests/agent/test_minimax_provider.py index 2e7f134e4..f9444c471 100644 --- a/tests/agent/test_minimax_provider.py +++ b/tests/agent/test_minimax_provider.py @@ -4,8 +4,9 @@ from unittest.mock import patch class TestMinimaxContextLengths: - """Verify context length entries match official docs (204,800 for all models). + """Verify context length entries match official docs. + M2.x series is 204,800; M3 is 1M (max output 512K). Source: https://platform.minimax.io/docs/api-reference/text-anthropic-api """ @@ -15,11 +16,74 @@ class TestMinimaxContextLengths: def test_minimax_models_resolve_via_prefix(self): from agent.model_metadata import get_model_context_length - # All MiniMax models should resolve to 204,800 via the "minimax" prefix + # M2.x models resolve to 204,800 via the "minimax" catch-all for model in ("MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"): ctx = get_model_context_length(model, "") assert ctx == 204_800, f"{model} expected 204800, got {ctx}" + def test_minimax_m3_resolves_to_1m(self): + from agent.model_metadata import get_model_context_length + # M3 must beat the generic "minimax" catch-all (204,800) and resolve to + # a 1M-class context. The exact value depends on the source: our + # hardcoded catalog says 1,000,000; the OpenRouter catalog reports + # 1,048,576 (1024²). Either is correct — assert "≥ 1M, not 204,800". + for model in ("MiniMax-M3", "minimax/minimax-m3", "minimax-m3"): + ctx = get_model_context_length(model, "") + assert ctx >= 1_000_000, f"{model} expected 1M-class, got {ctx}" + + +class TestMinimaxM3StaleCacheGuard: + """Pre-catalog builds resolved M3 via the generic 'minimax' catch-all + (204,800) and persisted it before the 'minimax-m3' (1M) catalog entry + existed. The step-1 cache guard must drop that stale value and re-resolve + to 1M, while leaving correct M2.x entries (204,800) untouched. + """ + + def test_suggests_minimax_m3(self): + from agent.model_metadata import _model_name_suggests_minimax_m3 + assert _model_name_suggests_minimax_m3("MiniMax-M3") + assert _model_name_suggests_minimax_m3("minimax/minimax-m3") + assert not _model_name_suggests_minimax_m3("MiniMax-M2.7") + assert not _model_name_suggests_minimax_m3("MiniMax-M2.5") + + def test_stale_m3_cache_dropped_and_reresolves_to_1m(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + import importlib + import agent.model_metadata as mm + importlib.reload(mm) + base = "https://api.minimaxi.com/anthropic" + mm.save_context_length("MiniMax-M3", base, 204_800) + ctx = mm.get_model_context_length( + "MiniMax-M3", base_url=base, api_key="", provider="minimax-cn" + ) + assert ctx == 1_000_000 + + def test_correct_m3_cache_preserved(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + import importlib + import agent.model_metadata as mm + importlib.reload(mm) + base = "https://api.minimaxi.com/anthropic" + mm.save_context_length("MiniMax-M3", base, 1_000_000) + ctx = mm.get_model_context_length( + "MiniMax-M3", base_url=base, api_key="", provider="minimax-cn" + ) + assert ctx == 1_000_000 + + def test_m2_cache_not_clobbered(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + import importlib + import agent.model_metadata as mm + importlib.reload(mm) + base = "https://api.minimaxi.com/anthropic" + # 204,800 is the CORRECT value for M2.x — guard must not touch it. + for slug in ("MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1"): + mm.save_context_length(slug, base, 204_800) + ctx = mm.get_model_context_length( + slug, base_url=base, api_key="", provider="minimax-cn" + ) + assert ctx == 204_800, f"{slug} should stay 204800, got {ctx}" + class TestMinimaxThinkingSupport: