fix(auth): mirror Nous OAuth credentials to providers.nous on CLI login

`hermes auth add nous --type oauth` only wrote credential_pool.nous, leaving providers.nous empty. When the Nous agent_key's 24h TTL expired, run_agent.py's 401-recovery path called resolve_nous_runtime_credentials (which reads providers.nous), got AuthError "Hermes is not logged into Nous Portal", caught it as logger.debug (suppressed at INFO level), and the agent died with "Non-retryable client error" — no signal to the user that recovery even tried. Introduce persist_nous_credentials() as the single source of truth for Nous device-code login persistence. Both auth_commands (CLI) and web_server (dashboard) now route through it, so pool and providers stay in sync at write time. Why: CLI-provisioned profiles couldn't recover from agent_key expiry, producing silent daily outages 24h after first login. PR #6856/#6869 addressed adjacent issues but assumed providers.nous was populated; this one wasn't being written. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-17 23:44:10 +00:00
parent a155b4a159
commit c096a6935f
5 changed files with 221 additions and 40 deletions
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@ -2159,6 +2159,54 @@ def refresh_nous_oauth_from_state(
    )


+def persist_nous_credentials(
+    creds: Dict[str, Any],
+    *,
+    label: str,
+    source: str,
+):
+    """Persist minted Nous OAuth credentials to both auth-store sections.
+
+    Nous credentials are read at runtime from two independent locations:
+
+    - ``credential_pool.nous``: used by the runtime ``pool.select()`` path that
+      services outbound inference requests.
+    - ``providers.nous``: used by ``resolve_nous_runtime_credentials()`` — the
+      singleton-state reader invoked during 401 recovery and dashboard status
+      checks.
+
+    Historically ``hermes auth add nous`` wrote only to the pool while the web
+    dashboard device-code flow wrote to both, so CLI-provisioned profiles
+    failed silently when the recovery path was later consulted.  This helper
+    is the single source of truth for CLI/web device-code persistence: both
+    stores are always written together.
+
+    Returns the added :class:`PooledCredential` entry.
+    """
+    from agent.credential_pool import (
+        PooledCredential,
+        load_pool,
+        AUTH_TYPE_OAUTH,
+    )
+
+    pool = load_pool("nous")
+    entry = PooledCredential.from_dict("nous", {
+        **creds,
+        "label": label,
+        "auth_type": AUTH_TYPE_OAUTH,
+        "source": source,
+        "base_url": creds.get("inference_base_url"),
+    })
+    pool.add_entry(entry)
+
+    with _auth_store_lock():
+        auth_store = _load_auth_store()
+        _save_provider_state(auth_store, "nous", creds)
+        _save_auth_store(auth_store)
+
+    return entry
+
+
 def resolve_nous_runtime_credentials(
    *,
    min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
--- a/hermes_cli/auth_commands.py
+++ b/hermes_cli/auth_commands.py
@ -221,15 +221,13 @@ def auth_add_command(args) -> None:
            creds.get("access_token", ""),
            _oauth_default_label(provider, len(pool.entries()) + 1),
        )
-        entry = PooledCredential.from_dict(provider, {
-            **creds,
-            "label": label,
-            "auth_type": AUTH_TYPE_OAUTH,
-            "source": f"{SOURCE_MANUAL}:device_code",
-            "base_url": creds.get("inference_base_url"),
-        })
-        pool.add_entry(entry)
-        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
+        auth_mod.persist_nous_credentials(
+            creds,
+            label=label,
+            source=f"{SOURCE_MANUAL}:device_code",
+        )
+        count = len(load_pool(provider).entries())
+        print(f'Added {provider} OAuth credential #{count}: "{label}"')
        return

    if provider == "openai-codex":
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@ -1444,38 +1444,13 @@ def _nous_poller(session_id: str) -> None:
            auth_state, min_key_ttl_seconds=300, timeout_seconds=15.0,
            force_refresh=False, force_mint=True,
        )
-        # Save into credential pool same as auth_commands.py does
-        from agent.credential_pool import (
-            PooledCredential,
-            load_pool,
-            AUTH_TYPE_OAUTH,
-            SOURCE_MANUAL,
+        from agent.credential_pool import SOURCE_MANUAL
+        from hermes_cli.auth import persist_nous_credentials
+        persist_nous_credentials(
+            full_state,
+            label="dashboard device_code",
+            source=f"{SOURCE_MANUAL}:dashboard_device_code",
        )
-        pool = load_pool("nous")
-        entry = PooledCredential.from_dict("nous", {
-            **full_state,
-            "label": "dashboard device_code",
-            "auth_type": AUTH_TYPE_OAUTH,
-            "source": f"{SOURCE_MANUAL}:dashboard_device_code",
-            "base_url": full_state.get("inference_base_url"),
-        })
-        pool.add_entry(entry)
-        # Also persist to auth store so get_nous_auth_status() sees it
-        # (matches what _login_nous in auth.py does for the CLI flow).
-        try:
-            from hermes_cli.auth import (
-                _load_auth_store, _save_provider_state, _save_auth_store,
-                _auth_store_lock,
-            )
-            with _auth_store_lock():
-                auth_store = _load_auth_store()
-                _save_provider_state(auth_store, "nous", full_state)
-                _save_auth_store(auth_store)
-        except Exception as store_exc:
-            _log.warning(
-                "oauth/device: credential pool saved but auth store write failed "
-                "(session=%s): %s", session_id, store_exc,
-            )
        with _oauth_sessions_lock:
            sess["status"] = "approved"
        _log.info("oauth/device: nous login completed (session=%s)", session_id)
--- a/tests/hermes_cli/test_auth_commands.py
+++ b/tests/hermes_cli/test_auth_commands.py
@ -148,6 +148,17 @@ def test_auth_add_nous_oauth_persists_pool_entry(tmp_path, monkeypatch):
    assert entry["agent_key"] == "ak-test"
    assert entry["portal_base_url"] == "https://portal.example.com"

+    # `hermes auth add nous` must also populate providers.nous so the
+    # 401-recovery path (resolve_nous_runtime_credentials) can mint a fresh
+    # agent_key when the 24h TTL expires. If this mirror is missing, recovery
+    # raises "Hermes is not logged into Nous Portal" and the agent dies.
+    singleton = payload["providers"]["nous"]
+    assert singleton["access_token"] == token
+    assert singleton["refresh_token"] == "refresh-token"
+    assert singleton["agent_key"] == "ak-test"
+    assert singleton["portal_base_url"] == "https://portal.example.com"
+    assert singleton["inference_base_url"] == "https://inference.example.com/v1"
+

 def test_auth_add_codex_oauth_persists_pool_entry(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
--- a/tests/hermes_cli/test_auth_nous_provider.py
+++ b/tests/hermes_cli/test_auth_nous_provider.py
@ -456,3 +456,152 @@ class TestLoginNousSkipKeepsCurrent:
        assert "nous" in auth_after.get("providers", {})


+# =============================================================================
+# persist_nous_credentials: shared helper for CLI + web dashboard login paths
+# =============================================================================
+
+
+def _full_state_fixture() -> dict:
+    """Shape of the dict returned by _nous_device_code_login /
+    refresh_nous_oauth_from_state. Used as helper input."""
+    return {
+        "portal_base_url": "https://portal.example.com",
+        "inference_base_url": "https://inference.example.com/v1",
+        "client_id": "hermes-cli",
+        "scope": "inference:mint_agent_key",
+        "token_type": "Bearer",
+        "access_token": "access-tok",
+        "refresh_token": "refresh-tok",
+        "obtained_at": "2026-04-17T22:00:00+00:00",
+        "expires_at": "2026-04-17T22:15:00+00:00",
+        "expires_in": 900,
+        "agent_key": "agent-key-value",
+        "agent_key_id": "ak-id",
+        "agent_key_expires_at": "2026-04-18T22:00:00+00:00",
+        "agent_key_expires_in": 86400,
+        "agent_key_reused": False,
+        "agent_key_obtained_at": "2026-04-17T22:00:10+00:00",
+        "tls": {"insecure": False, "ca_bundle": None},
+    }
+
+
+def test_persist_nous_credentials_writes_both_pool_and_providers(tmp_path, monkeypatch):
+    """Helper must populate BOTH credential_pool.nous AND providers.nous.
+
+    Regression guard: before this helper existed, `hermes auth add nous`
+    wrote only the pool. After the Nous agent_key's 24h TTL expired, the
+    401-recovery path in run_agent.py called resolve_nous_runtime_credentials
+    which reads providers.nous, found it empty, raised AuthError, and the
+    agent failed with "Non-retryable client error". Both stores must stay
+    in sync at write time.
+    """
+    from hermes_cli.auth import persist_nous_credentials
+
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1, "providers": {},
+    }))
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    creds = _full_state_fixture()
+    entry = persist_nous_credentials(
+        creds, label="test-label", source="manual:device_code",
+    )
+
+    assert entry is not None
+    assert entry.provider == "nous"
+
+    payload = json.loads((hermes_home / "auth.json").read_text())
+
+    # providers.nous populated with the full state (new behavior)
+    singleton = payload["providers"]["nous"]
+    assert singleton["access_token"] == "access-tok"
+    assert singleton["refresh_token"] == "refresh-tok"
+    assert singleton["agent_key"] == "agent-key-value"
+    assert singleton["agent_key_expires_at"] == "2026-04-18T22:00:00+00:00"
+
+    # credential_pool.nous populated with the pool entry
+    pool_entries = payload["credential_pool"]["nous"]
+    pool_entry = next(
+        item for item in pool_entries if item["source"] == "manual:device_code"
+    )
+    assert pool_entry["label"] == "test-label"
+    assert pool_entry["agent_key"] == "agent-key-value"
+    assert pool_entry["base_url"] == "https://inference.example.com/v1"
+
+
+def test_persist_nous_credentials_allows_recovery_from_401(tmp_path, monkeypatch):
+    """End-to-end: after persisting via the helper, resolve_nous_runtime_credentials
+    must succeed (not raise "Hermes is not logged into Nous Portal").
+
+    This is the exact path that ran_agent.py's `_try_refresh_nous_client_credentials`
+    calls after a Nous 401 — before the fix it would raise AuthError because
+    providers.nous was empty.
+    """
+    from hermes_cli.auth import persist_nous_credentials, resolve_nous_runtime_credentials
+
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1, "providers": {},
+    }))
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    persist_nous_credentials(
+        _full_state_fixture(),
+        label="recovery-test",
+        source="manual:device_code",
+    )
+
+    # Stub the network-touching steps so we don't actually contact the
+    # portal — the point of this test is that state lookup succeeds and
+    # doesn't raise "Hermes is not logged into Nous Portal".
+    def _fake_refresh_access_token(*, client, portal_base_url, client_id, refresh_token):
+        return {
+            "access_token": "access-new",
+            "refresh_token": "refresh-new",
+            "expires_in": 900,
+            "token_type": "Bearer",
+        }
+
+    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
+        return _mint_payload(api_key="new-agent-key")
+
+    monkeypatch.setattr("hermes_cli.auth._refresh_access_token", _fake_refresh_access_token)
+    monkeypatch.setattr("hermes_cli.auth._mint_agent_key", _fake_mint_agent_key)
+
+    creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=300, force_mint=True)
+    assert creds["api_key"] == "new-agent-key"
+
+
+def test_persist_nous_credentials_preserves_existing_providers_entry(tmp_path, monkeypatch):
+    """Calling persist twice must upsert providers.nous (not duplicate or crash)."""
+    from hermes_cli.auth import persist_nous_credentials
+
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1, "providers": {},
+    }))
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    first = _full_state_fixture()
+    persist_nous_credentials(first, label="first", source="manual:device_code")
+
+    second = _full_state_fixture()
+    second["access_token"] = "access-second"
+    second["agent_key"] = "agent-key-second"
+    persist_nous_credentials(second, label="second", source="manual:device_code")
+
+    payload = json.loads((hermes_home / "auth.json").read_text())
+
+    # providers.nous reflects the latest write (singleton semantics)
+    assert payload["providers"]["nous"]["access_token"] == "access-second"
+    assert payload["providers"]["nous"]["agent_key"] == "agent-key-second"
+
+    # credential_pool.nous has both entries (pool = multi-credential)
+    pool_entries = payload["credential_pool"]["nous"]
+    labels = [e.get("label") for e in pool_entries]
+    assert "first" in labels
+    assert "second" in labels