fix(auth): mirror Nous OAuth credentials to providers.nous on CLI login
`hermes auth add nous --type oauth` only wrote credential_pool.nous, leaving providers.nous empty. When the Nous agent_key's 24h TTL expired, run_agent.py's 401-recovery path called resolve_nous_runtime_credentials (which reads providers.nous), got AuthError "Hermes is not logged into Nous Portal", caught it as logger.debug (suppressed at INFO level), and the agent died with "Non-retryable client error" — no signal to the user that recovery even tried. Introduce persist_nous_credentials() as the single source of truth for Nous device-code login persistence. Both auth_commands (CLI) and web_server (dashboard) now route through it, so pool and providers stay in sync at write time. Why: CLI-provisioned profiles couldn't recover from agent_key expiry, producing silent daily outages 24h after first login. PR #6856/#6869 addressed adjacent issues but assumed providers.nous was populated; this one wasn't being written. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@ -2159,6 +2159,54 @@ def refresh_nous_oauth_from_state(
|
||||
)
|
||||
|
||||
|
||||
def persist_nous_credentials(
|
||||
creds: Dict[str, Any],
|
||||
*,
|
||||
label: str,
|
||||
source: str,
|
||||
):
|
||||
"""Persist minted Nous OAuth credentials to both auth-store sections.
|
||||
|
||||
Nous credentials are read at runtime from two independent locations:
|
||||
|
||||
- ``credential_pool.nous``: used by the runtime ``pool.select()`` path that
|
||||
services outbound inference requests.
|
||||
- ``providers.nous``: used by ``resolve_nous_runtime_credentials()`` — the
|
||||
singleton-state reader invoked during 401 recovery and dashboard status
|
||||
checks.
|
||||
|
||||
Historically ``hermes auth add nous`` wrote only to the pool while the web
|
||||
dashboard device-code flow wrote to both, so CLI-provisioned profiles
|
||||
failed silently when the recovery path was later consulted. This helper
|
||||
is the single source of truth for CLI/web device-code persistence: both
|
||||
stores are always written together.
|
||||
|
||||
Returns the added :class:`PooledCredential` entry.
|
||||
"""
|
||||
from agent.credential_pool import (
|
||||
PooledCredential,
|
||||
load_pool,
|
||||
AUTH_TYPE_OAUTH,
|
||||
)
|
||||
|
||||
pool = load_pool("nous")
|
||||
entry = PooledCredential.from_dict("nous", {
|
||||
**creds,
|
||||
"label": label,
|
||||
"auth_type": AUTH_TYPE_OAUTH,
|
||||
"source": source,
|
||||
"base_url": creds.get("inference_base_url"),
|
||||
})
|
||||
pool.add_entry(entry)
|
||||
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
_save_provider_state(auth_store, "nous", creds)
|
||||
_save_auth_store(auth_store)
|
||||
|
||||
return entry
|
||||
|
||||
|
||||
def resolve_nous_runtime_credentials(
|
||||
*,
|
||||
min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
|
||||
|
||||
@ -221,15 +221,13 @@ def auth_add_command(args) -> None:
|
||||
creds.get("access_token", ""),
|
||||
_oauth_default_label(provider, len(pool.entries()) + 1),
|
||||
)
|
||||
entry = PooledCredential.from_dict(provider, {
|
||||
**creds,
|
||||
"label": label,
|
||||
"auth_type": AUTH_TYPE_OAUTH,
|
||||
"source": f"{SOURCE_MANUAL}:device_code",
|
||||
"base_url": creds.get("inference_base_url"),
|
||||
})
|
||||
pool.add_entry(entry)
|
||||
print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
|
||||
auth_mod.persist_nous_credentials(
|
||||
creds,
|
||||
label=label,
|
||||
source=f"{SOURCE_MANUAL}:device_code",
|
||||
)
|
||||
count = len(load_pool(provider).entries())
|
||||
print(f'Added {provider} OAuth credential #{count}: "{label}"')
|
||||
return
|
||||
|
||||
if provider == "openai-codex":
|
||||
|
||||
@ -1444,38 +1444,13 @@ def _nous_poller(session_id: str) -> None:
|
||||
auth_state, min_key_ttl_seconds=300, timeout_seconds=15.0,
|
||||
force_refresh=False, force_mint=True,
|
||||
)
|
||||
# Save into credential pool same as auth_commands.py does
|
||||
from agent.credential_pool import (
|
||||
PooledCredential,
|
||||
load_pool,
|
||||
AUTH_TYPE_OAUTH,
|
||||
SOURCE_MANUAL,
|
||||
from agent.credential_pool import SOURCE_MANUAL
|
||||
from hermes_cli.auth import persist_nous_credentials
|
||||
persist_nous_credentials(
|
||||
full_state,
|
||||
label="dashboard device_code",
|
||||
source=f"{SOURCE_MANUAL}:dashboard_device_code",
|
||||
)
|
||||
pool = load_pool("nous")
|
||||
entry = PooledCredential.from_dict("nous", {
|
||||
**full_state,
|
||||
"label": "dashboard device_code",
|
||||
"auth_type": AUTH_TYPE_OAUTH,
|
||||
"source": f"{SOURCE_MANUAL}:dashboard_device_code",
|
||||
"base_url": full_state.get("inference_base_url"),
|
||||
})
|
||||
pool.add_entry(entry)
|
||||
# Also persist to auth store so get_nous_auth_status() sees it
|
||||
# (matches what _login_nous in auth.py does for the CLI flow).
|
||||
try:
|
||||
from hermes_cli.auth import (
|
||||
_load_auth_store, _save_provider_state, _save_auth_store,
|
||||
_auth_store_lock,
|
||||
)
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
_save_provider_state(auth_store, "nous", full_state)
|
||||
_save_auth_store(auth_store)
|
||||
except Exception as store_exc:
|
||||
_log.warning(
|
||||
"oauth/device: credential pool saved but auth store write failed "
|
||||
"(session=%s): %s", session_id, store_exc,
|
||||
)
|
||||
with _oauth_sessions_lock:
|
||||
sess["status"] = "approved"
|
||||
_log.info("oauth/device: nous login completed (session=%s)", session_id)
|
||||
|
||||
@ -148,6 +148,17 @@ def test_auth_add_nous_oauth_persists_pool_entry(tmp_path, monkeypatch):
|
||||
assert entry["agent_key"] == "ak-test"
|
||||
assert entry["portal_base_url"] == "https://portal.example.com"
|
||||
|
||||
# `hermes auth add nous` must also populate providers.nous so the
|
||||
# 401-recovery path (resolve_nous_runtime_credentials) can mint a fresh
|
||||
# agent_key when the 24h TTL expires. If this mirror is missing, recovery
|
||||
# raises "Hermes is not logged into Nous Portal" and the agent dies.
|
||||
singleton = payload["providers"]["nous"]
|
||||
assert singleton["access_token"] == token
|
||||
assert singleton["refresh_token"] == "refresh-token"
|
||||
assert singleton["agent_key"] == "ak-test"
|
||||
assert singleton["portal_base_url"] == "https://portal.example.com"
|
||||
assert singleton["inference_base_url"] == "https://inference.example.com/v1"
|
||||
|
||||
|
||||
def test_auth_add_codex_oauth_persists_pool_entry(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
|
||||
|
||||
@ -456,3 +456,152 @@ class TestLoginNousSkipKeepsCurrent:
|
||||
assert "nous" in auth_after.get("providers", {})
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# persist_nous_credentials: shared helper for CLI + web dashboard login paths
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def _full_state_fixture() -> dict:
|
||||
"""Shape of the dict returned by _nous_device_code_login /
|
||||
refresh_nous_oauth_from_state. Used as helper input."""
|
||||
return {
|
||||
"portal_base_url": "https://portal.example.com",
|
||||
"inference_base_url": "https://inference.example.com/v1",
|
||||
"client_id": "hermes-cli",
|
||||
"scope": "inference:mint_agent_key",
|
||||
"token_type": "Bearer",
|
||||
"access_token": "access-tok",
|
||||
"refresh_token": "refresh-tok",
|
||||
"obtained_at": "2026-04-17T22:00:00+00:00",
|
||||
"expires_at": "2026-04-17T22:15:00+00:00",
|
||||
"expires_in": 900,
|
||||
"agent_key": "agent-key-value",
|
||||
"agent_key_id": "ak-id",
|
||||
"agent_key_expires_at": "2026-04-18T22:00:00+00:00",
|
||||
"agent_key_expires_in": 86400,
|
||||
"agent_key_reused": False,
|
||||
"agent_key_obtained_at": "2026-04-17T22:00:10+00:00",
|
||||
"tls": {"insecure": False, "ca_bundle": None},
|
||||
}
|
||||
|
||||
|
||||
def test_persist_nous_credentials_writes_both_pool_and_providers(tmp_path, monkeypatch):
|
||||
"""Helper must populate BOTH credential_pool.nous AND providers.nous.
|
||||
|
||||
Regression guard: before this helper existed, `hermes auth add nous`
|
||||
wrote only the pool. After the Nous agent_key's 24h TTL expired, the
|
||||
401-recovery path in run_agent.py called resolve_nous_runtime_credentials
|
||||
which reads providers.nous, found it empty, raised AuthError, and the
|
||||
agent failed with "Non-retryable client error". Both stores must stay
|
||||
in sync at write time.
|
||||
"""
|
||||
from hermes_cli.auth import persist_nous_credentials
|
||||
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
(hermes_home / "auth.json").write_text(json.dumps({
|
||||
"version": 1, "providers": {},
|
||||
}))
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
creds = _full_state_fixture()
|
||||
entry = persist_nous_credentials(
|
||||
creds, label="test-label", source="manual:device_code",
|
||||
)
|
||||
|
||||
assert entry is not None
|
||||
assert entry.provider == "nous"
|
||||
|
||||
payload = json.loads((hermes_home / "auth.json").read_text())
|
||||
|
||||
# providers.nous populated with the full state (new behavior)
|
||||
singleton = payload["providers"]["nous"]
|
||||
assert singleton["access_token"] == "access-tok"
|
||||
assert singleton["refresh_token"] == "refresh-tok"
|
||||
assert singleton["agent_key"] == "agent-key-value"
|
||||
assert singleton["agent_key_expires_at"] == "2026-04-18T22:00:00+00:00"
|
||||
|
||||
# credential_pool.nous populated with the pool entry
|
||||
pool_entries = payload["credential_pool"]["nous"]
|
||||
pool_entry = next(
|
||||
item for item in pool_entries if item["source"] == "manual:device_code"
|
||||
)
|
||||
assert pool_entry["label"] == "test-label"
|
||||
assert pool_entry["agent_key"] == "agent-key-value"
|
||||
assert pool_entry["base_url"] == "https://inference.example.com/v1"
|
||||
|
||||
|
||||
def test_persist_nous_credentials_allows_recovery_from_401(tmp_path, monkeypatch):
|
||||
"""End-to-end: after persisting via the helper, resolve_nous_runtime_credentials
|
||||
must succeed (not raise "Hermes is not logged into Nous Portal").
|
||||
|
||||
This is the exact path that ran_agent.py's `_try_refresh_nous_client_credentials`
|
||||
calls after a Nous 401 — before the fix it would raise AuthError because
|
||||
providers.nous was empty.
|
||||
"""
|
||||
from hermes_cli.auth import persist_nous_credentials, resolve_nous_runtime_credentials
|
||||
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
(hermes_home / "auth.json").write_text(json.dumps({
|
||||
"version": 1, "providers": {},
|
||||
}))
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
persist_nous_credentials(
|
||||
_full_state_fixture(),
|
||||
label="recovery-test",
|
||||
source="manual:device_code",
|
||||
)
|
||||
|
||||
# Stub the network-touching steps so we don't actually contact the
|
||||
# portal — the point of this test is that state lookup succeeds and
|
||||
# doesn't raise "Hermes is not logged into Nous Portal".
|
||||
def _fake_refresh_access_token(*, client, portal_base_url, client_id, refresh_token):
|
||||
return {
|
||||
"access_token": "access-new",
|
||||
"refresh_token": "refresh-new",
|
||||
"expires_in": 900,
|
||||
"token_type": "Bearer",
|
||||
}
|
||||
|
||||
def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
|
||||
return _mint_payload(api_key="new-agent-key")
|
||||
|
||||
monkeypatch.setattr("hermes_cli.auth._refresh_access_token", _fake_refresh_access_token)
|
||||
monkeypatch.setattr("hermes_cli.auth._mint_agent_key", _fake_mint_agent_key)
|
||||
|
||||
creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=300, force_mint=True)
|
||||
assert creds["api_key"] == "new-agent-key"
|
||||
|
||||
|
||||
def test_persist_nous_credentials_preserves_existing_providers_entry(tmp_path, monkeypatch):
|
||||
"""Calling persist twice must upsert providers.nous (not duplicate or crash)."""
|
||||
from hermes_cli.auth import persist_nous_credentials
|
||||
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
(hermes_home / "auth.json").write_text(json.dumps({
|
||||
"version": 1, "providers": {},
|
||||
}))
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
first = _full_state_fixture()
|
||||
persist_nous_credentials(first, label="first", source="manual:device_code")
|
||||
|
||||
second = _full_state_fixture()
|
||||
second["access_token"] = "access-second"
|
||||
second["agent_key"] = "agent-key-second"
|
||||
persist_nous_credentials(second, label="second", source="manual:device_code")
|
||||
|
||||
payload = json.loads((hermes_home / "auth.json").read_text())
|
||||
|
||||
# providers.nous reflects the latest write (singleton semantics)
|
||||
assert payload["providers"]["nous"]["access_token"] == "access-second"
|
||||
assert payload["providers"]["nous"]["agent_key"] == "agent-key-second"
|
||||
|
||||
# credential_pool.nous has both entries (pool = multi-credential)
|
||||
pool_entries = payload["credential_pool"]["nous"]
|
||||
labels = [e.get("label") for e in pool_entries]
|
||||
assert "first" in labels
|
||||
assert "second" in labels
|
||||
|
||||
Reference in New Issue
Block a user