When OpenAI Codex returns 401 token_invalidated or token_revoked, the credential is broken upstream — retrying after a TTL cooldown cannot fix it. The existing code treated every 401/429 the same way: STATUS_EXHAUSTED with a TTL cooldown (5 min for 401, 1 hour for 429). After the TTL elapsed, the broken credential re-entered rotation and immediately failed again with the same 401, surfacing as 'Failed to generate context summary' on every context-compression cycle. Reporter observed 7 separate 401 token_invalidated failures from the same revoked credential in a single day; the only workaround was removing it manually via 'hermes auth'. Add a STATUS_DEAD terminal state. Only 401 responses whose error.code/reason matches a known terminal OAuth state (token_invalidated, token_revoked, invalid_token, invalid_grant, unauthorized_client, refresh_token_reused) transition to DEAD. Everything else keeps the existing TTL semantics — 429 rate limits are transient and should recover. DEAD entries are excluded from rotation unconditionally. They only clear when an explicit write-side re-auth sync rewrites the tokens (the existing _sync_codex_pool_entries / _sync_*_entry_from_auth_store paths already clear last_status to None). The read-side auth.json-sync paths also now fire on DEAD so an in-flight pool entry can adopt fresh tokens written by another process without needing explicit re-auth. After 24 hours, DEAD manual entries (source='manual:*') are pruned from the pool automatically so dead state doesn't accumulate forever. Singleton-seeded DEAD entries (source='device_code' etc.) are kept because _seed_from_singletons would recreate them on the next load with the same stale tokens — pruning would be pointless. The audit trail stays visible (label, last_error_reason, timestamps). Closes #32849.
This commit is contained in:
@ -55,6 +55,38 @@ def _load_config_safe() -> Optional[dict]:
|
||||
|
||||
STATUS_OK = "ok"
|
||||
STATUS_EXHAUSTED = "exhausted"
|
||||
# Terminal failure — the credential will never recover on its own. Used for
|
||||
# upstream-permanent OAuth states like ``token_invalidated`` / ``token_revoked``
|
||||
# where retrying after a TTL cooldown is guaranteed to fail. ``DEAD`` entries
|
||||
# are excluded from rotation unconditionally and only clear when an explicit
|
||||
# write-side sync (e.g. ``_save_codex_tokens`` after a fresh device-code
|
||||
# login) rewrites the tokens.
|
||||
STATUS_DEAD = "dead"
|
||||
|
||||
# OAuth error reasons that indicate the credential is permanently invalid
|
||||
# server-side and cannot be recovered by retry/refresh. Sourced from
|
||||
# OpenAI Codex Responses API, Anthropic, xAI, and Google OAuth spec.
|
||||
_TERMINAL_AUTH_REASONS = frozenset({
|
||||
"token_invalidated", # OpenAI Codex: "Your authentication token has been invalidated."
|
||||
"token_revoked", # OAuth 2.0 RFC 7009: token explicitly revoked
|
||||
"invalid_token", # RFC 6750: bearer token is malformed/expired/revoked
|
||||
"invalid_grant", # RFC 6749: refresh_token rejected during refresh
|
||||
"unauthorized_client", # RFC 6749: client no longer authorized
|
||||
"refresh_token_reused", # Single-use refresh token consumed by another process
|
||||
})
|
||||
|
||||
# How long a DEAD manual credential is preserved before being pruned.
|
||||
# Manual entries (``manual:*``) are independent credentials with no singleton
|
||||
# to re-seed from, so pruning them after a quiet window cleans up dead state
|
||||
# without losing recoverability — the user always has the option to re-add
|
||||
# via ``hermes auth add``.
|
||||
#
|
||||
# Singleton-seeded entries (``device_code``, ``loopback_pkce``, ``claude_code``)
|
||||
# are NOT pruned because ``_seed_from_singletons`` would just re-create them
|
||||
# on the next ``load_pool()`` with the same stale singleton tokens, defeating
|
||||
# the cleanup. They remain in the pool marked DEAD until an explicit re-auth
|
||||
# write-side sync (``_save_codex_tokens`` etc.) clears the status.
|
||||
DEAD_MANUAL_PRUNE_TTL_SECONDS = 24 * 60 * 60 # 24 hours
|
||||
|
||||
AUTH_TYPE_OAUTH = "oauth"
|
||||
AUTH_TYPE_API_KEY = "api_key"
|
||||
@ -438,6 +470,29 @@ class CredentialPool:
|
||||
[entry.to_dict() for entry in self._entries],
|
||||
)
|
||||
|
||||
def _is_terminal_auth_failure(
|
||||
self,
|
||||
status_code: Optional[int],
|
||||
normalized_error: Dict[str, Any],
|
||||
) -> bool:
|
||||
"""Detect upstream-permanent OAuth failures that won't recover on TTL.
|
||||
|
||||
Only fires for 401 responses whose error code/reason matches a known
|
||||
terminal OAuth state (token_invalidated, token_revoked, invalid_grant,
|
||||
etc.). Distinguishes permanent failures from transient ones like
|
||||
token_expired (refreshable) or generic 401 without a specific reason
|
||||
(could be a server-side glitch worth retrying).
|
||||
|
||||
Returns False for non-401 status codes — 429 rate limits and 402
|
||||
billing failures are transient by nature and should keep TTL semantics.
|
||||
"""
|
||||
if status_code != 401:
|
||||
return False
|
||||
reason = normalized_error.get("reason")
|
||||
if not isinstance(reason, str):
|
||||
return False
|
||||
return reason.strip().lower() in _TERMINAL_AUTH_REASONS
|
||||
|
||||
def _mark_exhausted(
|
||||
self,
|
||||
entry: PooledCredential,
|
||||
@ -445,9 +500,20 @@ class CredentialPool:
|
||||
error_context: Optional[Dict[str, Any]] = None,
|
||||
) -> PooledCredential:
|
||||
normalized_error = _normalize_error_context(error_context)
|
||||
# Permanent OAuth failures (token_invalidated, token_revoked, etc.)
|
||||
# transition to STATUS_DEAD instead of STATUS_EXHAUSTED. Without this,
|
||||
# a revoked credential gets a 1-hour TTL cooldown and then re-enters
|
||||
# rotation, failing immediately every hour until the user manually
|
||||
# removes it (issue #32849). DEAD entries are excluded from rotation
|
||||
# unconditionally and only clear via an explicit re-auth write-side
|
||||
# sync (``_save_codex_tokens`` after a fresh device-code login).
|
||||
if self._is_terminal_auth_failure(status_code, normalized_error):
|
||||
terminal_status = STATUS_DEAD
|
||||
else:
|
||||
terminal_status = STATUS_EXHAUSTED
|
||||
updated = replace(
|
||||
entry,
|
||||
last_status=STATUS_EXHAUSTED,
|
||||
last_status=terminal_status,
|
||||
last_status_at=time.time(),
|
||||
last_error_code=status_code,
|
||||
last_error_reason=normalized_error.get("reason"),
|
||||
@ -1158,13 +1224,14 @@ class CredentialPool:
|
||||
"""
|
||||
now = time.time()
|
||||
cleared_any = False
|
||||
entries_to_prune: List[str] = []
|
||||
available: List[PooledCredential] = []
|
||||
for entry in self._entries:
|
||||
# For anthropic claude_code entries, sync from the credentials file
|
||||
# before any status/refresh checks. This picks up tokens refreshed
|
||||
# by other processes (Claude Code CLI, other Hermes profiles).
|
||||
if (self.provider == "anthropic" and entry.source == "claude_code"
|
||||
and entry.last_status == STATUS_EXHAUSTED):
|
||||
and entry.last_status in {STATUS_EXHAUSTED, STATUS_DEAD}):
|
||||
synced = self._sync_anthropic_entry_from_credentials_file(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
@ -1175,7 +1242,7 @@ class CredentialPool:
|
||||
# exhausted status stale.
|
||||
if (self.provider == "nous"
|
||||
and entry.source == "device_code"
|
||||
and entry.last_status == STATUS_EXHAUSTED):
|
||||
and entry.last_status in {STATUS_EXHAUSTED, STATUS_DEAD}):
|
||||
synced = self._sync_nous_entry_from_auth_store(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
@ -1187,7 +1254,7 @@ class CredentialPool:
|
||||
# future for ChatGPT weekly windows).
|
||||
if (self.provider == "openai-codex"
|
||||
and entry.source == "device_code"
|
||||
and entry.last_status == STATUS_EXHAUSTED):
|
||||
and entry.last_status in {STATUS_EXHAUSTED, STATUS_DEAD}):
|
||||
synced = self._sync_codex_entry_from_auth_store(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
@ -1198,11 +1265,41 @@ class CredentialPool:
|
||||
# xAI Grok OAuth login) has since rotated in auth.json.
|
||||
if (self.provider == "xai-oauth"
|
||||
and entry.source == "loopback_pkce"
|
||||
and entry.last_status == STATUS_EXHAUSTED):
|
||||
and entry.last_status in {STATUS_EXHAUSTED, STATUS_DEAD}):
|
||||
synced = self._sync_xai_oauth_entry_from_auth_store(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
cleared_any = True
|
||||
if entry.last_status == STATUS_DEAD:
|
||||
# Manual DEAD credentials get pruned after a 24h quiet window
|
||||
# so the pool doesn't accumulate dead entries forever. The
|
||||
# user can always re-add via ``hermes auth add``. Singleton-
|
||||
# seeded DEAD entries are kept so the audit trail (label,
|
||||
# last_error_reason, timestamps) stays visible — pruning them
|
||||
# would just be undone by ``_seed_from_singletons`` on the
|
||||
# next load anyway.
|
||||
if _is_manual_source(entry.source):
|
||||
dead_at = entry.last_status_at or 0
|
||||
if dead_at and now - dead_at > DEAD_MANUAL_PRUNE_TTL_SECONDS:
|
||||
_label = entry.label or entry.id[:8]
|
||||
logger.warning(
|
||||
"credential pool: pruning DEAD manual entry %s "
|
||||
"(reason=%s, age=%.1fh) — re-add via `hermes auth add %s`",
|
||||
_label,
|
||||
entry.last_error_reason or "unknown",
|
||||
(now - dead_at) / 3600.0,
|
||||
self.provider,
|
||||
)
|
||||
# Mark for removal after the loop completes; we can't
|
||||
# mutate self._entries while iterating.
|
||||
entries_to_prune.append(entry.id)
|
||||
cleared_any = True
|
||||
# Permanently failed credentials never re-enter rotation via
|
||||
# TTL. They only clear when a write-side re-auth sync rewrites
|
||||
# the tokens (e.g. ``_save_codex_tokens`` after a fresh
|
||||
# device-code login). The auth.json-sync paths below handle
|
||||
# the re-auth case for OAuth singletons.
|
||||
continue
|
||||
if entry.last_status == STATUS_EXHAUSTED:
|
||||
exhausted_until = _exhausted_until(entry)
|
||||
if exhausted_until is not None and now < exhausted_until:
|
||||
@ -1226,6 +1323,9 @@ class CredentialPool:
|
||||
continue
|
||||
entry = refreshed
|
||||
available.append(entry)
|
||||
if entries_to_prune:
|
||||
pruned_ids = set(entries_to_prune)
|
||||
self._entries = [e for e in self._entries if e.id not in pruned_ids]
|
||||
if cleared_any:
|
||||
self._persist()
|
||||
return available
|
||||
@ -1293,11 +1393,22 @@ class CredentialPool:
|
||||
if entry is None:
|
||||
return None
|
||||
_label = entry.label or entry.id[:8]
|
||||
logger.info(
|
||||
"credential pool: marking %s exhausted (status=%s), rotating",
|
||||
_label, status_code,
|
||||
)
|
||||
self._mark_exhausted(entry, status_code, error_context)
|
||||
# Re-read the updated entry to log the correct terminal state.
|
||||
updated_entry = next(
|
||||
(e for e in self._entries if e.id == entry.id), entry,
|
||||
)
|
||||
if updated_entry.last_status == STATUS_DEAD:
|
||||
logger.warning(
|
||||
"credential pool: marking %s DEAD (status=%s, reason=%s) — "
|
||||
"permanently failed, will NOT re-enter rotation until re-auth",
|
||||
_label, status_code, updated_entry.last_error_reason or "unknown",
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"credential pool: marking %s exhausted (status=%s), rotating",
|
||||
_label, status_code,
|
||||
)
|
||||
self._current_id = None
|
||||
next_entry = self._select_unlocked()
|
||||
if next_entry:
|
||||
|
||||
@ -379,6 +379,415 @@ def test_mark_exhausted_and_rotate_persists_status(tmp_path, monkeypatch):
|
||||
assert persisted["last_error_code"] == 402
|
||||
|
||||
|
||||
def test_token_invalidated_marks_credential_dead(tmp_path, monkeypatch):
|
||||
"""OpenAI Codex token_invalidated must mark the credential DEAD, not exhausted.
|
||||
|
||||
Regression for #32849: when an OAuth credential is revoked upstream, the
|
||||
1-hour exhausted TTL means it re-enters rotation every hour and fails
|
||||
again with the same 401 — surfacing as "Failed to generate context
|
||||
summary" on context compression. Terminal OAuth failures should never
|
||||
auto-recover.
|
||||
"""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"credential_pool": {
|
||||
"openai-codex": [
|
||||
{
|
||||
"id": "cred-dead",
|
||||
"label": "revoked",
|
||||
"auth_type": "oauth",
|
||||
"priority": 0,
|
||||
"source": "manual:device_code",
|
||||
"access_token": "revoked-at",
|
||||
"refresh_token": "revoked-rt",
|
||||
},
|
||||
{
|
||||
"id": "cred-ok",
|
||||
"label": "healthy",
|
||||
"auth_type": "oauth",
|
||||
"priority": 1,
|
||||
"source": "manual:device_code",
|
||||
"access_token": "healthy-at",
|
||||
"refresh_token": "healthy-rt",
|
||||
},
|
||||
]
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
from agent.credential_pool import load_pool, STATUS_DEAD
|
||||
|
||||
pool = load_pool("openai-codex")
|
||||
assert pool.select().id == "cred-dead"
|
||||
|
||||
# Simulate the exact OpenAI Codex 401 token_invalidated response shape.
|
||||
next_entry = pool.mark_exhausted_and_rotate(
|
||||
status_code=401,
|
||||
error_context={
|
||||
"reason": "token_invalidated",
|
||||
"message": "Your authentication token has been invalidated. Please try signing in again.",
|
||||
},
|
||||
)
|
||||
|
||||
# Rotation still works — we hand off to the healthy credential.
|
||||
assert next_entry is not None
|
||||
assert next_entry.id == "cred-ok"
|
||||
|
||||
# The revoked credential is now permanently marked DEAD.
|
||||
auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
|
||||
persisted = auth_payload["credential_pool"]["openai-codex"][0]
|
||||
assert persisted["last_status"] == STATUS_DEAD
|
||||
assert persisted["last_error_code"] == 401
|
||||
assert persisted["last_error_reason"] == "token_invalidated"
|
||||
|
||||
|
||||
def test_dead_credential_never_re_enters_rotation_after_ttl(tmp_path, monkeypatch):
|
||||
"""A DEAD credential must stay excluded regardless of how much time passes.
|
||||
|
||||
The exhausted TTL clears entries after 5 min (401) / 1 hour (429).
|
||||
A DEAD credential has no recovery TTL — it stays dead until either
|
||||
(a) an explicit re-auth write-side sync rewrites the tokens, or
|
||||
(b) the manual-prune TTL elapses (covered by separate tests below).
|
||||
This test verifies the core invariant in the recent-entry window.
|
||||
"""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
|
||||
# DEAD entry from 2 hours ago — well past the exhausted TTLs (5min/1h)
|
||||
# but well within the 24h manual-prune window.
|
||||
two_hours_ago = time.time() - (2 * 3600)
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"credential_pool": {
|
||||
"openai-codex": [
|
||||
{
|
||||
"id": "cred-dead",
|
||||
"label": "revoked",
|
||||
"auth_type": "oauth",
|
||||
"priority": 0,
|
||||
"source": "manual:device_code",
|
||||
"access_token": "revoked-at",
|
||||
"refresh_token": "revoked-rt",
|
||||
"last_status": "dead",
|
||||
"last_status_at": two_hours_ago,
|
||||
"last_error_code": 401,
|
||||
"last_error_reason": "token_invalidated",
|
||||
},
|
||||
{
|
||||
"id": "cred-ok",
|
||||
"label": "healthy",
|
||||
"auth_type": "oauth",
|
||||
"priority": 1,
|
||||
"source": "manual:device_code",
|
||||
"access_token": "healthy-at",
|
||||
"refresh_token": "healthy-rt",
|
||||
},
|
||||
]
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
from agent.credential_pool import load_pool, STATUS_DEAD
|
||||
|
||||
pool = load_pool("openai-codex")
|
||||
selected = pool.select()
|
||||
# Should skip the dead entry and pick the healthy one — even though
|
||||
# the dead entry has priority 0 (would normally be picked first) and
|
||||
# plenty of time has passed since it was marked dead.
|
||||
assert selected is not None
|
||||
assert selected.id == "cred-ok"
|
||||
|
||||
# The DEAD entry is still marked dead on disk — not cleared by TTL.
|
||||
auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
|
||||
dead_entry = next(e for e in auth_payload["credential_pool"]["openai-codex"]
|
||||
if e["id"] == "cred-dead")
|
||||
assert dead_entry["last_status"] == STATUS_DEAD
|
||||
|
||||
|
||||
def test_429_rate_limit_still_uses_exhausted_not_dead(tmp_path, monkeypatch):
|
||||
"""429 rate limits must NOT be treated as terminal.
|
||||
|
||||
They should keep the existing 1-hour TTL cooldown semantics so the
|
||||
credential re-enters rotation once the rate window resets.
|
||||
"""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"credential_pool": {
|
||||
"openai-codex": [
|
||||
{
|
||||
"id": "cred-1",
|
||||
"label": "primary",
|
||||
"auth_type": "oauth",
|
||||
"priority": 0,
|
||||
"source": "manual:device_code",
|
||||
"access_token": "at-1",
|
||||
"refresh_token": "rt-1",
|
||||
},
|
||||
{
|
||||
"id": "cred-2",
|
||||
"label": "secondary",
|
||||
"auth_type": "oauth",
|
||||
"priority": 1,
|
||||
"source": "manual:device_code",
|
||||
"access_token": "at-2",
|
||||
"refresh_token": "rt-2",
|
||||
},
|
||||
]
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
from agent.credential_pool import load_pool, STATUS_EXHAUSTED
|
||||
|
||||
pool = load_pool("openai-codex")
|
||||
assert pool.select().id == "cred-1"
|
||||
|
||||
next_entry = pool.mark_exhausted_and_rotate(
|
||||
status_code=429,
|
||||
error_context={"reason": "rate_limit_exceeded", "message": "Rate limit exceeded"},
|
||||
)
|
||||
assert next_entry is not None
|
||||
assert next_entry.id == "cred-2"
|
||||
|
||||
auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
|
||||
persisted = auth_payload["credential_pool"]["openai-codex"][0]
|
||||
# 429 stays exhausted (transient) — NOT dead.
|
||||
assert persisted["last_status"] == STATUS_EXHAUSTED
|
||||
assert persisted["last_error_code"] == 429
|
||||
|
||||
|
||||
def test_generic_401_without_terminal_reason_still_uses_exhausted(tmp_path, monkeypatch):
|
||||
"""A 401 with no specific code/reason should keep TTL semantics.
|
||||
|
||||
Only specific terminal reasons (token_invalidated, token_revoked, etc.)
|
||||
transition to DEAD. A generic 401 might be a transient server-side
|
||||
issue worth retrying after the 5-min TTL.
|
||||
"""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"credential_pool": {
|
||||
"openai-codex": [
|
||||
{
|
||||
"id": "cred-1",
|
||||
"label": "primary",
|
||||
"auth_type": "oauth",
|
||||
"priority": 0,
|
||||
"source": "manual:device_code",
|
||||
"access_token": "at-1",
|
||||
"refresh_token": "rt-1",
|
||||
},
|
||||
{
|
||||
"id": "cred-2",
|
||||
"label": "secondary",
|
||||
"auth_type": "oauth",
|
||||
"priority": 1,
|
||||
"source": "manual:device_code",
|
||||
"access_token": "at-2",
|
||||
"refresh_token": "rt-2",
|
||||
},
|
||||
]
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
from agent.credential_pool import load_pool, STATUS_EXHAUSTED
|
||||
|
||||
pool = load_pool("openai-codex")
|
||||
pool.select()
|
||||
|
||||
# 401 with no specific reason — stays exhausted, NOT dead.
|
||||
pool.mark_exhausted_and_rotate(
|
||||
status_code=401,
|
||||
error_context={"message": "Unauthorized"},
|
||||
)
|
||||
|
||||
auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
|
||||
persisted = auth_payload["credential_pool"]["openai-codex"][0]
|
||||
assert persisted["last_status"] == STATUS_EXHAUSTED
|
||||
assert persisted["last_error_code"] == 401
|
||||
|
||||
|
||||
def test_dead_manual_entry_pruned_after_24h(tmp_path, monkeypatch):
|
||||
"""A DEAD manual entry is removed from the pool after the prune TTL.
|
||||
|
||||
Manual entries (``manual:*``) are independent credentials with no
|
||||
singleton to re-seed from, so we can clean them up after a quiet
|
||||
window without losing recoverability — the user can always re-add
|
||||
via ``hermes auth add``.
|
||||
"""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
|
||||
# DEAD entry from > 24h ago
|
||||
long_ago = time.time() - (25 * 3600)
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"credential_pool": {
|
||||
"openai-codex": [
|
||||
{
|
||||
"id": "cred-old-dead",
|
||||
"label": "ancient-dead",
|
||||
"auth_type": "oauth",
|
||||
"priority": 0,
|
||||
"source": "manual:device_code",
|
||||
"access_token": "stale",
|
||||
"refresh_token": "stale",
|
||||
"last_status": "dead",
|
||||
"last_status_at": long_ago,
|
||||
"last_error_code": 401,
|
||||
"last_error_reason": "token_invalidated",
|
||||
},
|
||||
{
|
||||
"id": "cred-ok",
|
||||
"label": "healthy",
|
||||
"auth_type": "oauth",
|
||||
"priority": 1,
|
||||
"source": "manual:device_code",
|
||||
"access_token": "healthy-at",
|
||||
"refresh_token": "healthy-rt",
|
||||
},
|
||||
]
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
from agent.credential_pool import load_pool
|
||||
|
||||
pool = load_pool("openai-codex")
|
||||
# Trigger _available_entries via select; that runs the prune.
|
||||
selected = pool.select()
|
||||
assert selected is not None
|
||||
assert selected.id == "cred-ok"
|
||||
|
||||
# On-disk pool should have the dead entry removed.
|
||||
auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
|
||||
persisted = auth_payload["credential_pool"]["openai-codex"]
|
||||
assert len(persisted) == 1
|
||||
assert persisted[0]["id"] == "cred-ok"
|
||||
|
||||
|
||||
def test_dead_manual_entry_kept_within_24h(tmp_path, monkeypatch):
|
||||
"""A DEAD manual entry stays in the pool until the prune TTL elapses.
|
||||
|
||||
Recent DEAD entries are kept so the audit trail (last_error_reason,
|
||||
timestamps) remains visible while the user investigates. They simply
|
||||
don't participate in rotation (covered by the DEAD-skip test above).
|
||||
"""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
|
||||
# DEAD entry from only an hour ago — well within the 24h window
|
||||
recent = time.time() - 3600
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"credential_pool": {
|
||||
"openai-codex": [
|
||||
{
|
||||
"id": "cred-recent-dead",
|
||||
"label": "recent-dead",
|
||||
"auth_type": "oauth",
|
||||
"priority": 0,
|
||||
"source": "manual:device_code",
|
||||
"access_token": "stale",
|
||||
"refresh_token": "stale",
|
||||
"last_status": "dead",
|
||||
"last_status_at": recent,
|
||||
"last_error_code": 401,
|
||||
"last_error_reason": "token_invalidated",
|
||||
},
|
||||
{
|
||||
"id": "cred-ok",
|
||||
"label": "healthy",
|
||||
"auth_type": "oauth",
|
||||
"priority": 1,
|
||||
"source": "manual:device_code",
|
||||
"access_token": "healthy-at",
|
||||
"refresh_token": "healthy-rt",
|
||||
},
|
||||
]
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
from agent.credential_pool import load_pool, STATUS_DEAD
|
||||
|
||||
pool = load_pool("openai-codex")
|
||||
selected = pool.select()
|
||||
assert selected is not None
|
||||
assert selected.id == "cred-ok"
|
||||
|
||||
# On-disk pool should still have BOTH entries — recent dead is preserved.
|
||||
auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
|
||||
persisted = auth_payload["credential_pool"]["openai-codex"]
|
||||
assert len(persisted) == 2
|
||||
dead_entry = next(e for e in persisted if e["id"] == "cred-recent-dead")
|
||||
assert dead_entry["last_status"] == STATUS_DEAD
|
||||
|
||||
|
||||
def test_dead_singleton_seeded_entry_not_pruned(tmp_path, monkeypatch):
|
||||
"""A DEAD ``device_code`` entry must NOT be pruned even after 24h.
|
||||
|
||||
Singleton-seeded entries get re-created by ``_seed_from_singletons`` on
|
||||
every ``load_pool()``, so pruning them is pointless — they reappear
|
||||
immediately with the same stale singleton tokens. Keep them visible
|
||||
with the DEAD marker so the user knows what's broken.
|
||||
"""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
|
||||
long_ago = time.time() - (48 * 3600)
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"providers": {
|
||||
"openai-codex": {
|
||||
"tokens": {"access_token": "revoked-at", "refresh_token": "revoked-rt"},
|
||||
"last_refresh": "2026-01-01T00:00:00Z",
|
||||
"auth_mode": "chatgpt",
|
||||
},
|
||||
},
|
||||
"credential_pool": {
|
||||
"openai-codex": [
|
||||
{
|
||||
"id": "cred-seeded-dead",
|
||||
"label": "seeded-dead",
|
||||
"auth_type": "oauth",
|
||||
"priority": 0,
|
||||
"source": "device_code", # singleton-seeded, NOT manual
|
||||
"access_token": "revoked-at",
|
||||
"refresh_token": "revoked-rt",
|
||||
"last_status": "dead",
|
||||
"last_status_at": long_ago,
|
||||
"last_error_code": 401,
|
||||
"last_error_reason": "token_invalidated",
|
||||
},
|
||||
]
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
from agent.credential_pool import load_pool, STATUS_DEAD
|
||||
|
||||
pool = load_pool("openai-codex")
|
||||
# No healthy entry available; select returns None (pool empty for rotation).
|
||||
assert pool.select() is None
|
||||
|
||||
# On-disk: the singleton-seeded DEAD entry is preserved.
|
||||
auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
|
||||
persisted = auth_payload["credential_pool"]["openai-codex"]
|
||||
assert len(persisted) == 1
|
||||
assert persisted[0]["id"] == "cred-seeded-dead"
|
||||
assert persisted[0]["last_status"] == STATUS_DEAD
|
||||
|
||||
|
||||
def test_load_pool_seeds_env_api_key(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-seeded")
|
||||
|
||||
Reference in New Issue
Block a user