fix(auth): mirror Nous OAuth credentials to providers.nous on CLI login

`hermes auth add nous --type oauth` only wrote credential_pool.nous,
leaving providers.nous empty. When the Nous agent_key's 24h TTL expired,
run_agent.py's 401-recovery path called resolve_nous_runtime_credentials
(which reads providers.nous), got AuthError "Hermes is not logged into
Nous Portal", caught it as logger.debug (suppressed at INFO level), and
the agent died with "Non-retryable client error" — no signal to the
user that recovery even tried.

Introduce persist_nous_credentials() as the single source of truth for
Nous device-code login persistence. Both auth_commands (CLI) and
web_server (dashboard) now route through it, so pool and providers
stay in sync at write time.

Why: CLI-provisioned profiles couldn't recover from agent_key expiry,
producing silent daily outages 24h after first login. PR #6856/#6869
addressed adjacent issues but assumed providers.nous was populated;
this one wasn't being written.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Antoine Khater
2026-04-17 23:44:10 +00:00
committed by Teknium
parent a155b4a159
commit c096a6935f
5 changed files with 221 additions and 40 deletions

View File

@ -2159,6 +2159,54 @@ def refresh_nous_oauth_from_state(
)
def persist_nous_credentials(
creds: Dict[str, Any],
*,
label: str,
source: str,
):
"""Persist minted Nous OAuth credentials to both auth-store sections.
Nous credentials are read at runtime from two independent locations:
- ``credential_pool.nous``: used by the runtime ``pool.select()`` path that
services outbound inference requests.
- ``providers.nous``: used by ``resolve_nous_runtime_credentials()`` — the
singleton-state reader invoked during 401 recovery and dashboard status
checks.
Historically ``hermes auth add nous`` wrote only to the pool while the web
dashboard device-code flow wrote to both, so CLI-provisioned profiles
failed silently when the recovery path was later consulted. This helper
is the single source of truth for CLI/web device-code persistence: both
stores are always written together.
Returns the added :class:`PooledCredential` entry.
"""
from agent.credential_pool import (
PooledCredential,
load_pool,
AUTH_TYPE_OAUTH,
)
pool = load_pool("nous")
entry = PooledCredential.from_dict("nous", {
**creds,
"label": label,
"auth_type": AUTH_TYPE_OAUTH,
"source": source,
"base_url": creds.get("inference_base_url"),
})
pool.add_entry(entry)
with _auth_store_lock():
auth_store = _load_auth_store()
_save_provider_state(auth_store, "nous", creds)
_save_auth_store(auth_store)
return entry
def resolve_nous_runtime_credentials(
*,
min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,

View File

@ -221,15 +221,13 @@ def auth_add_command(args) -> None:
creds.get("access_token", ""),
_oauth_default_label(provider, len(pool.entries()) + 1),
)
entry = PooledCredential.from_dict(provider, {
**creds,
"label": label,
"auth_type": AUTH_TYPE_OAUTH,
"source": f"{SOURCE_MANUAL}:device_code",
"base_url": creds.get("inference_base_url"),
})
pool.add_entry(entry)
print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
auth_mod.persist_nous_credentials(
creds,
label=label,
source=f"{SOURCE_MANUAL}:device_code",
)
count = len(load_pool(provider).entries())
print(f'Added {provider} OAuth credential #{count}: "{label}"')
return
if provider == "openai-codex":

View File

@ -1444,38 +1444,13 @@ def _nous_poller(session_id: str) -> None:
auth_state, min_key_ttl_seconds=300, timeout_seconds=15.0,
force_refresh=False, force_mint=True,
)
# Save into credential pool same as auth_commands.py does
from agent.credential_pool import (
PooledCredential,
load_pool,
AUTH_TYPE_OAUTH,
SOURCE_MANUAL,
from agent.credential_pool import SOURCE_MANUAL
from hermes_cli.auth import persist_nous_credentials
persist_nous_credentials(
full_state,
label="dashboard device_code",
source=f"{SOURCE_MANUAL}:dashboard_device_code",
)
pool = load_pool("nous")
entry = PooledCredential.from_dict("nous", {
**full_state,
"label": "dashboard device_code",
"auth_type": AUTH_TYPE_OAUTH,
"source": f"{SOURCE_MANUAL}:dashboard_device_code",
"base_url": full_state.get("inference_base_url"),
})
pool.add_entry(entry)
# Also persist to auth store so get_nous_auth_status() sees it
# (matches what _login_nous in auth.py does for the CLI flow).
try:
from hermes_cli.auth import (
_load_auth_store, _save_provider_state, _save_auth_store,
_auth_store_lock,
)
with _auth_store_lock():
auth_store = _load_auth_store()
_save_provider_state(auth_store, "nous", full_state)
_save_auth_store(auth_store)
except Exception as store_exc:
_log.warning(
"oauth/device: credential pool saved but auth store write failed "
"(session=%s): %s", session_id, store_exc,
)
with _oauth_sessions_lock:
sess["status"] = "approved"
_log.info("oauth/device: nous login completed (session=%s)", session_id)

View File

@ -148,6 +148,17 @@ def test_auth_add_nous_oauth_persists_pool_entry(tmp_path, monkeypatch):
assert entry["agent_key"] == "ak-test"
assert entry["portal_base_url"] == "https://portal.example.com"
# `hermes auth add nous` must also populate providers.nous so the
# 401-recovery path (resolve_nous_runtime_credentials) can mint a fresh
# agent_key when the 24h TTL expires. If this mirror is missing, recovery
# raises "Hermes is not logged into Nous Portal" and the agent dies.
singleton = payload["providers"]["nous"]
assert singleton["access_token"] == token
assert singleton["refresh_token"] == "refresh-token"
assert singleton["agent_key"] == "ak-test"
assert singleton["portal_base_url"] == "https://portal.example.com"
assert singleton["inference_base_url"] == "https://inference.example.com/v1"
def test_auth_add_codex_oauth_persists_pool_entry(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))

View File

@ -456,3 +456,152 @@ class TestLoginNousSkipKeepsCurrent:
assert "nous" in auth_after.get("providers", {})
# =============================================================================
# persist_nous_credentials: shared helper for CLI + web dashboard login paths
# =============================================================================
def _full_state_fixture() -> dict:
"""Shape of the dict returned by _nous_device_code_login /
refresh_nous_oauth_from_state. Used as helper input."""
return {
"portal_base_url": "https://portal.example.com",
"inference_base_url": "https://inference.example.com/v1",
"client_id": "hermes-cli",
"scope": "inference:mint_agent_key",
"token_type": "Bearer",
"access_token": "access-tok",
"refresh_token": "refresh-tok",
"obtained_at": "2026-04-17T22:00:00+00:00",
"expires_at": "2026-04-17T22:15:00+00:00",
"expires_in": 900,
"agent_key": "agent-key-value",
"agent_key_id": "ak-id",
"agent_key_expires_at": "2026-04-18T22:00:00+00:00",
"agent_key_expires_in": 86400,
"agent_key_reused": False,
"agent_key_obtained_at": "2026-04-17T22:00:10+00:00",
"tls": {"insecure": False, "ca_bundle": None},
}
def test_persist_nous_credentials_writes_both_pool_and_providers(tmp_path, monkeypatch):
"""Helper must populate BOTH credential_pool.nous AND providers.nous.
Regression guard: before this helper existed, `hermes auth add nous`
wrote only the pool. After the Nous agent_key's 24h TTL expired, the
401-recovery path in run_agent.py called resolve_nous_runtime_credentials
which reads providers.nous, found it empty, raised AuthError, and the
agent failed with "Non-retryable client error". Both stores must stay
in sync at write time.
"""
from hermes_cli.auth import persist_nous_credentials
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1, "providers": {},
}))
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
creds = _full_state_fixture()
entry = persist_nous_credentials(
creds, label="test-label", source="manual:device_code",
)
assert entry is not None
assert entry.provider == "nous"
payload = json.loads((hermes_home / "auth.json").read_text())
# providers.nous populated with the full state (new behavior)
singleton = payload["providers"]["nous"]
assert singleton["access_token"] == "access-tok"
assert singleton["refresh_token"] == "refresh-tok"
assert singleton["agent_key"] == "agent-key-value"
assert singleton["agent_key_expires_at"] == "2026-04-18T22:00:00+00:00"
# credential_pool.nous populated with the pool entry
pool_entries = payload["credential_pool"]["nous"]
pool_entry = next(
item for item in pool_entries if item["source"] == "manual:device_code"
)
assert pool_entry["label"] == "test-label"
assert pool_entry["agent_key"] == "agent-key-value"
assert pool_entry["base_url"] == "https://inference.example.com/v1"
def test_persist_nous_credentials_allows_recovery_from_401(tmp_path, monkeypatch):
"""End-to-end: after persisting via the helper, resolve_nous_runtime_credentials
must succeed (not raise "Hermes is not logged into Nous Portal").
This is the exact path that ran_agent.py's `_try_refresh_nous_client_credentials`
calls after a Nous 401 — before the fix it would raise AuthError because
providers.nous was empty.
"""
from hermes_cli.auth import persist_nous_credentials, resolve_nous_runtime_credentials
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1, "providers": {},
}))
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
persist_nous_credentials(
_full_state_fixture(),
label="recovery-test",
source="manual:device_code",
)
# Stub the network-touching steps so we don't actually contact the
# portal — the point of this test is that state lookup succeeds and
# doesn't raise "Hermes is not logged into Nous Portal".
def _fake_refresh_access_token(*, client, portal_base_url, client_id, refresh_token):
return {
"access_token": "access-new",
"refresh_token": "refresh-new",
"expires_in": 900,
"token_type": "Bearer",
}
def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
return _mint_payload(api_key="new-agent-key")
monkeypatch.setattr("hermes_cli.auth._refresh_access_token", _fake_refresh_access_token)
monkeypatch.setattr("hermes_cli.auth._mint_agent_key", _fake_mint_agent_key)
creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=300, force_mint=True)
assert creds["api_key"] == "new-agent-key"
def test_persist_nous_credentials_preserves_existing_providers_entry(tmp_path, monkeypatch):
"""Calling persist twice must upsert providers.nous (not duplicate or crash)."""
from hermes_cli.auth import persist_nous_credentials
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1, "providers": {},
}))
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
first = _full_state_fixture()
persist_nous_credentials(first, label="first", source="manual:device_code")
second = _full_state_fixture()
second["access_token"] = "access-second"
second["agent_key"] = "agent-key-second"
persist_nous_credentials(second, label="second", source="manual:device_code")
payload = json.loads((hermes_home / "auth.json").read_text())
# providers.nous reflects the latest write (singleton semantics)
assert payload["providers"]["nous"]["access_token"] == "access-second"
assert payload["providers"]["nous"]["agent_key"] == "agent-key-second"
# credential_pool.nous has both entries (pool = multi-credential)
pool_entries = payload["credential_pool"]["nous"]
labels = [e.get("label") for e in pool_entries]
assert "first" in labels
assert "second" in labels