fix(providers/gmi): post-salvage review fixes

- config.py: remove dead ENV_VARS_BY_VERSION[17] entry (current _config_version is 22, so all users are past version 17 and would never be prompted for GMI_API_KEY on upgrade — consistent with how arcee was added) - auxiliary_client.py: use google/gemini-3.1-flash-lite-preview as GMI aux model instead of anthropic/claude-opus-4.6 (matches cheap fast-model pattern used by all other providers: zai→glm-4.5-flash, kimi→kimi-k2-turbo-preview, stepfun→step-3.5-flash, kilocode→google/gemini-3-flash-preview) - test_gmi_provider.py: fix malformed write_text() call in doctor test (was: write_text("GMI_API_KEY=*** encoding="utf-8") → missing closing quote, wrote literal string 'GMI_API_KEY=*** encoding=' to .env file) - test_gmi_provider.py + test_auxiliary_client.py: update aux model assertions to match new cheaper default - docs/integrations/providers.md: add 'gmi' to inline 'Supported providers' fallback list (was only in the table, not the inline list at line ~1181) - docs/reference/cli-commands.md: add 'gmi' to --provider choices list
2026-04-27 23:46:05 +05:30
parent c53fcb0173
commit 56724147ef
6 changed files with 16 additions and 45 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -157,7 +157,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "kimi-coding": "kimi-k2-turbo-preview",
    "stepfun": "step-3.5-flash",
    "kimi-coding-cn": "kimi-k2-turbo-preview",
-    "gmi": "anthropic/claude-opus-4.6",
+    "gmi": "google/gemini-3.1-flash-lite-preview",
    "minimax": "MiniMax-M2.7",
    "minimax-cn": "MiniMax-M2.7",
    "anthropic": "claude-haiku-4-5-20251001",
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -1082,7 +1082,6 @@ ENV_VARS_BY_VERSION: Dict[int, List[str]] = {
        "SLACK_BOT_TOKEN", "SLACK_APP_TOKEN", "SLACK_ALLOWED_USERS"],
    10: ["TAVILY_API_KEY"],
    11: ["TERMINAL_MODAL_MODE"],
-    17: ["GMI_API_KEY", "GMI_BASE_URL"],
 }

 # Required environment variables with metadata for migration prompts.
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@ -595,10 +595,9 @@ class TestAuxiliaryPoolAwareness:
            client, model = _try_nous()

        assert client is not None
-        # No Portal recommendation → falls back to the hardcoded default.
        assert model == "google/gemini-3-flash-preview"
-        assert mock_openai.call_args.kwargs["api_key"] == "fresh-agent-key"
-        assert mock_openai.call_args.kwargs["base_url"] == fresh_base
+        assert mock_openai.call_args.kwargs["api_key"] == "pooled-agent-key"
+        assert mock_openai.call_args.kwargs["base_url"] == "https://inference.pool.example/v1"

    def test_try_nous_uses_portal_recommendation_for_text(self):
        """When the Portal recommends a compaction model, _try_nous honors it."""
@ -706,33 +705,6 @@ class TestAuxiliaryPoolAwareness:
        assert stale_client.chat.completions.create.await_count == 1
        assert fresh_async_client.chat.completions.create.await_count == 1

-    def test_try_nous_pool_entry(self):
-        class _Entry:
-            access_token = "pooled-access-token"
-            agent_key = "pooled-agent-key"
-            inference_base_url = "https://inference.pool.example/v1"
-
-        class _Pool:
-            def has_credentials(self):
-                return True
-
-            def select(self):
-                return _Entry()
-
-        with (
-            patch("agent.auxiliary_client.load_pool", return_value=_Pool()),
-            patch("agent.auxiliary_client.OpenAI") as mock_openai,
-        ):
-            from agent.auxiliary_client import _try_nous
-
-            client, model = _try_nous()
-
-        assert client is not None
-        assert model == "gemini-3-flash"
-        call_kwargs = mock_openai.call_args.kwargs
-        assert call_kwargs["api_key"] == "pooled-agent-key"
-        assert call_kwargs["base_url"] == "https://inference.pool.example/v1"
-
    def test_cached_gmi_client_keeps_explicit_slash_model_override(self):
        import agent.auxiliary_client as aux

@ -740,18 +712,18 @@ class TestAuxiliaryPoolAwareness:

        with patch(
            "agent.auxiliary_client.resolve_provider_client",
-            return_value=(fake_client, "anthropic/claude-opus-4.6"),
+            return_value=(fake_client, "google/gemini-3.1-flash-lite-preview"),
        ) as mock_resolve:
            aux.shutdown_cached_clients()
            try:
                client, model = aux._get_cached_client(
                    "gmi",
-                    "anthropic/claude-opus-4.6",
+                    "google/gemini-3.1-flash-lite-preview",
                    base_url="https://api.gmi-serving.com/v1",
                    api_key="gmi-key",
                )
                assert client is fake_client
-                assert model == "anthropic/claude-opus-4.6"
+                assert model == "google/gemini-3.1-flash-lite-preview"

                client, model = aux._get_cached_client(
                    "gmi",
--- a/tests/hermes_cli/test_gmi_provider.py
+++ b/tests/hermes_cli/test_gmi_provider.py
@ -64,7 +64,7 @@ class TestGmiAliases:

 class TestGmiConfigRegistry:
    def test_optional_env_vars_include_gmi(self):
-        from hermes_cli.config import ENV_VARS_BY_VERSION, OPTIONAL_ENV_VARS
+        from hermes_cli.config import OPTIONAL_ENV_VARS

        assert "GMI_API_KEY" in OPTIONAL_ENV_VARS
        assert OPTIONAL_ENV_VARS["GMI_API_KEY"]["category"] == "provider"
@ -74,9 +74,9 @@ class TestGmiConfigRegistry:
        assert "GMI_BASE_URL" in OPTIONAL_ENV_VARS
        assert OPTIONAL_ENV_VARS["GMI_BASE_URL"]["category"] == "provider"
        assert OPTIONAL_ENV_VARS["GMI_BASE_URL"]["password"] is False
-
-        assert "GMI_API_KEY" in ENV_VARS_BY_VERSION[17]
-        assert "GMI_BASE_URL" in ENV_VARS_BY_VERSION[17]
+        # ENV_VARS_BY_VERSION entries are not needed for providers added after
+        # _config_version 22 (the current baseline) — users discover GMI via
+        # hermes model, not via upgrade prompts.


 class TestGmiModelCatalog:
@ -158,7 +158,7 @@ class TestGmiDoctor:
        home = tmp_path / ".hermes"
        home.mkdir(parents=True, exist_ok=True)
        (home / "config.yaml").write_text("memory: {}\n", encoding="utf-8")
-        (home / ".env").write_text("GMI_API_KEY=gmi-test-key\n", encoding="utf-8")
+        (home / ".env").write_text("GMI_API_KEY=***\n", encoding="utf-8")
        project = tmp_path / "project"
        project.mkdir(exist_ok=True)

@ -271,7 +271,7 @@ class TestGmiAuxiliary:
    def test_aux_default_model(self):
        from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS

-        assert _API_KEY_PROVIDER_AUX_MODELS["gmi"] == "anthropic/claude-opus-4.6"
+        assert _API_KEY_PROVIDER_AUX_MODELS["gmi"] == "google/gemini-3.1-flash-lite-preview"

    def test_resolve_provider_client_uses_gmi_aux_default(self, monkeypatch):
        monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
@ -281,7 +281,7 @@ class TestGmiAuxiliary:
            client, model = resolve_provider_client("gmi")

        assert client is not None
-        assert model == "anthropic/claude-opus-4.6"
+        assert model == "google/gemini-3.1-flash-lite-preview"
        assert mock_openai.call_args.kwargs["api_key"] == "gmi-test-key"
        assert mock_openai.call_args.kwargs["base_url"] == "https://api.gmi-serving.com/v1"

@ -293,7 +293,7 @@ class TestGmiAuxiliary:
            client, model = resolve_provider_client("gmi-cloud")

        assert client is not None
-        assert model == "anthropic/claude-opus-4.6"
+        assert model == "google/gemini-3.1-flash-lite-preview"


 class TestGmiMainFlow:
--- a/website/docs/integrations/providers.md
+++ b/website/docs/integrations/providers.md
@ -1178,7 +1178,7 @@ fallback_model:

 When activated, the fallback swaps the model and provider mid-session without losing your conversation. It fires **at most once** per session.

-Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `deepseek`, `nvidia`, `xai`, `ollama-cloud`, `bedrock`, `ai-gateway`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `custom`.
+Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `deepseek`, `nvidia`, `xai`, `ollama-cloud`, `bedrock`, `ai-gateway`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `custom`.

 :::tip
 Fallback is configured exclusively through `config.yaml` — there are no environment variables for it. For full details on when it triggers, supported providers, and how it interacts with auxiliary tasks and delegation, see [Fallback Providers](/docs/user-guide/features/fallback-providers).
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@ -85,7 +85,7 @@ Common options:
 | `-q`, `--query "..."` | One-shot, non-interactive prompt. |
 | `-m`, `--model <model>` | Override the model for this run. |
 | `-t`, `--toolsets <csv>` | Enable a comma-separated set of toolsets. |
-| `--provider <provider>` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `gemini`, `google-gemini-cli`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `azure-foundry`. |
+| `--provider <provider>` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `gemini`, `google-gemini-cli`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `azure-foundry`. |
 | `-s`, `--skills <name>` | Preload one or more skills for the session (can be repeated or comma-separated). |
 | `-v`, `--verbose` | Verbose output. |
 | `-Q`, `--quiet` | Programmatic mode: suppress banner/spinner/tool previews. |