diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 56bd7a084..13b3a222c 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -1243,8 +1243,30 @@ def _read_nous_auth() -> Optional[dict]:
 
 
 def _nous_api_key(provider: dict) -> str:
-    """Extract the Nous runtime credential from the compatibility field."""
-    return provider.get("agent_key") or provider.get("access_token", "")
+    """Extract a usable Nous inference JWT from stored auth state."""
+    try:
+        from hermes_cli.auth import _nous_invoke_jwt_is_usable
+    except Exception:
+        _nous_invoke_jwt_is_usable = None
+
+    for token_key, expiry_key in (
+        ("agent_key", "agent_key_expires_at"),
+        ("access_token", "expires_at"),
+    ):
+        token = provider.get(token_key)
+        if not isinstance(token, str) or not token.strip():
+            continue
+        if _nous_invoke_jwt_is_usable is None:
+            if token.count(".") == 2:
+                return token
+            continue
+        if _nous_invoke_jwt_is_usable(
+            token,
+            scope=provider.get("scope"),
+            expires_at=provider.get(expiry_key),
+        ):
+            return token
+    return ""
 
 
 def _nous_base_url() -> str:
@@ -1256,25 +1278,21 @@ def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[
     """Return fresh Nous runtime credentials when available.
 
     This mirrors the main agent's 401 recovery path and keeps auxiliary
-    clients aligned with the singleton auth store + JWT/mint flow instead of
+    clients aligned with the singleton auth store + JWT refresh flow instead of
     relying only on whatever raw tokens happen to be sitting in auth.json
     or the credential pool.
     """
     try:
         from hermes_cli.auth import (
             NOUS_INFERENCE_AUTH_MODE_AUTO,
-            NOUS_INFERENCE_AUTH_MODE_LEGACY,
             resolve_nous_runtime_credentials,
         )
 
         creds = resolve_nous_runtime_credentials(
             min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
             timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
-            inference_auth_mode=(
-                NOUS_INFERENCE_AUTH_MODE_LEGACY
-                if force_refresh
-                else NOUS_INFERENCE_AUTH_MODE_AUTO
-            ),
+            inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_AUTO,
+            force_refresh=force_refresh,
         )
     except Exception as exc:
         logger.debug("Auxiliary Nous runtime credential resolution failed: %s", exc)
@@ -1558,13 +1576,9 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
         _mark_provider_unhealthy("nous", ttl=60)
         return None, None
     if runtime is None and nous:
-        # Runtime credential mint failed but stored Nous auth is still present.
-        # Falls back to the raw stored token below; surface a debug line so
-        # operators investigating expired/invalid sessions have a breadcrumb,
-        # without blocking the fallback path the rest of this function relies on.
         logger.debug(
-            "Auxiliary Nous: runtime credential mint failed; falling back to "
-            "stored auth.json token."
+            "Auxiliary Nous: runtime JWT refresh failed; checking stored "
+            "auth.json token."
         )
     global auxiliary_is_nous
     auxiliary_is_nous = True
@@ -1602,6 +1616,13 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
         api_key, base_url = runtime
     else:
         api_key = _nous_api_key(nous or {})
+        if not api_key:
+            logger.warning(
+                "Auxiliary Nous client unavailable: no usable inference JWT found "
+                "(run: hermes auth add nous)."
+            )
+            _mark_provider_unhealthy("nous", ttl=60)
+            return None, None
         base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/")
     return (
         OpenAI(
@@ -2725,15 +2746,12 @@ def _refresh_provider_credentials(provider: str) -> bool:
             _evict_cached_clients(normalized)
             return True
         if normalized == "nous":
-            from hermes_cli.auth import (
-                NOUS_INFERENCE_AUTH_MODE_LEGACY,
-                resolve_nous_runtime_credentials,
-            )
+            from hermes_cli.auth import resolve_nous_runtime_credentials
 
             creds = resolve_nous_runtime_credentials(
                 min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
                 timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
-                inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_LEGACY,
+                force_refresh=True,
             )
             if not str(creds.get("api_key", "") or "").strip():
                 return False
diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py
index da76af8b1..a6c975be3 100644
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@@ -203,15 +203,13 @@ def _print_billing_or_entitlement_guidance(
 def _try_refresh_nous_paid_entitlement_credentials(agent) -> bool:
     """Refresh Nous runtime credentials after a fresh paid-entitlement check."""
     try:
-        from hermes_cli.auth import NOUS_INFERENCE_AUTH_MODE_LEGACY
         from hermes_cli.nous_account import get_nous_portal_account_info
 
         account_info = get_nous_portal_account_info(force_fresh=True)
         if account_info.paid_service_access is not True:
             return False
         return agent._try_refresh_nous_client_credentials(
-            force=False,
-            inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_LEGACY,
+            force=True,
         )
     except Exception:
         return False
diff --git a/agent/credential_pool.py b/agent/credential_pool.py
index 072199ce7..f5ebafd8e 100644
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -203,7 +203,7 @@ class PooledCredential:
     def runtime_api_key(self) -> str:
         if self.provider == "nous":
             # Nous stores the runtime inference credential in agent_key for
-            # compatibility. It may be a NAS invoke JWT or legacy opaque key.
+            # compatibility. It must be a NAS invoke JWT.
             return str(self.agent_key or self.access_token or "")
         return str(self.access_token or "")
 
@@ -919,11 +919,8 @@ class CredentialPool:
                     entry = synced
                 auth_mod.resolve_nous_runtime_credentials(
                     min_key_ttl_seconds=DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
-                    inference_auth_mode=(
-                        auth_mod.NOUS_INFERENCE_AUTH_MODE_LEGACY
-                        if force
-                        else auth_mod.NOUS_INFERENCE_AUTH_MODE_AUTO
-                    ),
+                    inference_auth_mode=auth_mod.NOUS_INFERENCE_AUTH_MODE_AUTO,
+                    force_refresh=force,
                 )
                 updated = self._sync_nous_entry_from_auth_store(entry)
             else:
@@ -1205,7 +1202,7 @@ class CredentialPool:
                 auth_mod.XAI_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
             )
         if self.provider == "nous":
-            # Nous refresh/mint can require network access and should happen when
+            # Nous refresh can require network access and should happen when
             # runtime credentials are actually resolved, not merely when the pool
             # is enumerated for listing, migration, or selection.
             return False
@@ -1748,9 +1745,9 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
                     "inference_base_url": state.get("inference_base_url"),
                     "agent_key": state.get("agent_key"),
                     "agent_key_expires_at": state.get("agent_key_expires_at"),
-                    # Carry the mint/refresh timestamps into the pool so
+                    # Carry the refresh timestamps into the pool so
                     # freshness-sensitive consumers (self-heal hooks, pool
-                    # pruning by age) can distinguish just-minted credentials
+                    # pruning by age) can distinguish just-refreshed credentials
                     # from stale ones.  Without these, fresh device_code
                     # entries get obtained_at=None and look older than they
                     # are (#15099).
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 6bbe1473a..38f9e604a 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -9,14 +9,11 @@ Architecture:
 - ProviderConfig registry defines known OAuth providers
 - Auth store (auth.json) holds per-provider credential state
 - resolve_provider() picks the active provider via priority chain
-- resolve_*_runtime_credentials() handles token refresh and key minting
+- resolve_*_runtime_credentials() handles token refresh and runtime keys
 - logout_command() is the CLI entry point for clearing auth
 
 Nous authentication paths:
 - Invoke JWT (preferred): use a scoped access_token directly for inference.
-- Legacy session key (fallback): mint an opaque 24h key when JWT auth is
-  unavailable, or when HERMES_AGENT_USE_LEGACY_SESSION_KEYS is set for
-  debugging or rollback.
 """
 
 from __future__ import annotations
@@ -73,22 +70,16 @@ AUTH_LOCK_TIMEOUT_SECONDS = 15.0
 DEFAULT_NOUS_PORTAL_URL = "https://portal.nousresearch.com"
 DEFAULT_NOUS_INFERENCE_URL = "https://inference-api.nousresearch.com/v1"
 DEFAULT_NOUS_CLIENT_ID = "hermes-cli"
-NOUS_LEGACY_AGENT_KEY_SCOPE = "inference:mint_agent_key"
 NOUS_INFERENCE_INVOKE_SCOPE = "inference:invoke"
-DEFAULT_NOUS_SCOPE = f"{NOUS_INFERENCE_INVOKE_SCOPE} {NOUS_LEGACY_AGENT_KEY_SCOPE}"
-NOUS_LEGACY_SESSION_KEYS_ENV = "HERMES_AGENT_USE_LEGACY_SESSION_KEYS"
+DEFAULT_NOUS_SCOPE = NOUS_INFERENCE_INVOKE_SCOPE
 NOUS_DEVICE_CODE_SOURCE = "device_code"
 NOUS_INFERENCE_AUTH_MODE_AUTO = "auto"
 NOUS_INFERENCE_AUTH_MODE_FRESH = "fresh"
-NOUS_INFERENCE_AUTH_MODE_LEGACY = "legacy"
 NOUS_INFERENCE_AUTH_MODES = frozenset({
     NOUS_INFERENCE_AUTH_MODE_AUTO,
     NOUS_INFERENCE_AUTH_MODE_FRESH,
-    NOUS_INFERENCE_AUTH_MODE_LEGACY,
 })
 NOUS_AUTH_PATH_INVOKE_JWT = "invoke_jwt"
-NOUS_AUTH_PATH_LEGACY_SESSION_KEY_CACHE = "legacy_session_key_cache"
-NOUS_AUTH_PATH_LEGACY_SESSION_KEY_MINT = "legacy_session_key_mint"
 DEFAULT_AGENT_KEY_MIN_TTL_SECONDS = 30 * 60  # 30 minutes
 ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120       # refresh 2 min before expiry
 NOUS_INVOKE_JWT_MIN_TTL_SECONDS = ACCESS_TOKEN_REFRESH_SKEW_SECONDS
@@ -1653,12 +1644,11 @@ def _optional_base_url(value: Any) -> Optional[str]:
     return cleaned if cleaned else None
 
 
-# Allowlist of hosts the Nous Portal proxy is willing to forward minted
-# bearer tokens to. The bearer is a long-lived agent_key minted by
-# portal.nousresearch.com — sending it anywhere else would leak it.
+# Allowlist of hosts the Nous Portal proxy is willing to forward inference
+# JWTs to. Sending a bearer anywhere else would leak it.
 #
 # This is consulted only for URLs coming from the NETWORK side (Portal
-# refresh / agent-key-mint responses). User-controlled env-var overrides
+# refresh responses). User-controlled env-var overrides
 # (NOUS_INFERENCE_BASE_URL) bypass validation — that's the documented
 # dev/staging escape hatch and the env source is already trusted (the
 # user set it themselves).
@@ -1676,10 +1666,10 @@ def _validate_nous_inference_url_from_network(url: Optional[str]) -> Optional[st
     unexpected host — letting the caller fall back to the configured
     default rather than persist or forward a poisoned value.
 
-    Defense-in-depth: a compromised refresh / mint response from the
-    Portal API (MITM, malicious response injection) could otherwise
-    redirect every subsequent proxy request — bearing the user's
-    legitimately-minted agent_key — to an attacker-controlled endpoint.
+    Defense-in-depth: a compromised refresh response from the Portal API
+    (MITM, malicious response injection) could otherwise redirect every
+    subsequent proxy request — bearing the user's inference JWT — to an
+    attacker-controlled endpoint.
     Validating scheme + host at the source closes that loop before the
     poisoned URL ever lands in ``auth.json``.
 
@@ -1743,14 +1733,6 @@ def _scope_values(raw_scope: Any) -> set[str]:
     return scopes
 
 
-def _nous_legacy_session_keys_forced() -> bool:
-    return is_truthy_value(os.getenv(NOUS_LEGACY_SESSION_KEYS_ENV), default=False)
-
-
-def _nous_scope_has_invoke(raw_scope: Any) -> bool:
-    return NOUS_INFERENCE_INVOKE_SCOPE in _scope_values(raw_scope)
-
-
 def _normalize_nous_inference_auth_mode(inference_auth_mode: Optional[str]) -> str:
     mode = str(inference_auth_mode or NOUS_INFERENCE_AUTH_MODE_AUTO).strip().lower()
     if mode not in NOUS_INFERENCE_AUTH_MODES:
@@ -1809,23 +1791,6 @@ def _nous_invoke_jwt_is_usable(
     )
 
 
-def _nous_legacy_session_key_reason(
-    token: Any,
-    *,
-    scope: Any = None,
-    expires_at: Any = None,
-    inference_auth_mode: str = NOUS_INFERENCE_AUTH_MODE_AUTO,
-) -> str:
-    if inference_auth_mode == NOUS_INFERENCE_AUTH_MODE_LEGACY:
-        return "forced_legacy_session_key"
-    if _nous_legacy_session_keys_forced():
-        return "forced_legacy_session_keys"
-    return (
-        _nous_invoke_jwt_status(token, scope=scope, expires_at=expires_at)
-        or "invoke_jwt_unavailable"
-    )
-
-
 def _choose_nous_inference_auth_path(
     state: Dict[str, Any],
     *,
@@ -1833,34 +1798,29 @@ def _choose_nous_inference_auth_path(
     min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
     inference_auth_mode: str = NOUS_INFERENCE_AUTH_MODE_AUTO,
 ) -> Tuple[str, Optional[str]]:
-    inference_auth_mode = _normalize_nous_inference_auth_mode(inference_auth_mode)
+    del min_key_ttl_seconds
+    _normalize_nous_inference_auth_mode(inference_auth_mode)
     token = state.get("access_token") if access_token is None else access_token
-    if (
-        not _nous_legacy_session_keys_forced()
-        and inference_auth_mode != NOUS_INFERENCE_AUTH_MODE_LEGACY
-        and _nous_invoke_jwt_is_usable(
-            token,
-            scope=state.get("scope"),
-            expires_at=state.get("expires_at"),
-        )
+    if _nous_invoke_jwt_is_usable(
+        token,
+        scope=state.get("scope"),
+        expires_at=state.get("expires_at"),
     ):
         return NOUS_AUTH_PATH_INVOKE_JWT, None
-    if (
-        inference_auth_mode == NOUS_INFERENCE_AUTH_MODE_AUTO
-        and _agent_key_is_usable(
-            state,
-            max(60, int(min_key_ttl_seconds)),
-        )
-    ):
-        return NOUS_AUTH_PATH_LEGACY_SESSION_KEY_CACHE, None
-    return (
-        NOUS_AUTH_PATH_LEGACY_SESSION_KEY_MINT,
-        _nous_legacy_session_key_reason(
+    reason = (
+        _nous_invoke_jwt_status(
             token,
             scope=state.get("scope"),
             expires_at=state.get("expires_at"),
-            inference_auth_mode=inference_auth_mode,
-        ),
+        )
+        or "invoke_jwt_unavailable"
+    )
+    raise AuthError(
+        "Nous Portal access token is not a usable inference JWT "
+        f"({reason}). Re-authenticate with: hermes auth add nous",
+        provider="nous",
+        code=reason,
+        relogin_required=True,
     )
 
 
@@ -1877,24 +1837,6 @@ def _log_nous_invoke_jwt_selected(
     )
 
 
-def _log_nous_legacy_session_key_selected(
-    reason: str,
-    *,
-    access_token: Any,
-    sequence_id: Optional[str] = None,
-) -> None:
-    logger.info(
-        "Nous inference auth: using legacy session key path (%s)",
-        reason,
-    )
-    _oauth_trace(
-        "nous_legacy_session_key_selected",
-        sequence_id=sequence_id,
-        reason=reason,
-        access_token_fp=_token_fingerprint(access_token),
-    )
-
-
 def _nous_jwt_expires_at(token: Any, fallback_expires_at: Any = None) -> Optional[str]:
     claims = _decode_jwt_claims(token)
     exp = claims.get("exp")
@@ -4304,85 +4246,6 @@ def _request_device_code(
     return data
 
 
-def _is_nous_invoke_scope_refusal(exc: Exception) -> bool:
-    if not isinstance(exc, httpx.HTTPStatusError):
-        return False
-    response = exc.response
-    if response.status_code not in {400, 401, 403}:
-        return False
-    try:
-        payload = response.json()
-    except Exception:
-        payload = {}
-    text = " ".join(
-        str(value)
-        for value in (
-            payload.get("error") if isinstance(payload, dict) else None,
-            payload.get("error_description") if isinstance(payload, dict) else None,
-            response.text,
-        )
-        if value
-    ).lower()
-    if not text:
-        return False
-    return (
-        "invalid_scope" in text
-        or "unsupported_scope" in text
-        or "scope" in text and NOUS_INFERENCE_INVOKE_SCOPE in text
-    )
-
-
-def _nous_device_scope_with_env_override(
-    requested_scope: Optional[str],
-    *,
-    default_scope: str = DEFAULT_NOUS_SCOPE,
-) -> Tuple[str, bool]:
-    explicit_scope = requested_scope is not None
-    scope = requested_scope or default_scope
-    if _nous_legacy_session_keys_forced():
-        scope = NOUS_LEGACY_AGENT_KEY_SCOPE
-    return scope, explicit_scope
-
-
-def _request_nous_device_code_with_scope_fallback(
-    *,
-    client: httpx.Client,
-    portal_base_url: str,
-    client_id: str,
-    scope: str,
-    allow_legacy_fallback: bool,
-) -> Tuple[Dict[str, Any], str]:
-    try:
-        return (
-            _request_device_code(
-                client=client,
-                portal_base_url=portal_base_url,
-                client_id=client_id,
-                scope=scope,
-            ),
-            scope,
-        )
-    except Exception as exc:
-        if (
-            allow_legacy_fallback
-            and _nous_scope_has_invoke(scope)
-            and _is_nous_invoke_scope_refusal(exc)
-        ):
-            logger.info("Nous inference auth: NAS refused invoke scope, retrying legacy scope")
-            _oauth_trace("nous_device_code_invoke_scope_refused")
-            retry_scope = NOUS_LEGACY_AGENT_KEY_SCOPE
-            return (
-                _request_device_code(
-                    client=client,
-                    portal_base_url=portal_base_url,
-                    client_id=client_id,
-                    scope=retry_scope,
-                ),
-                retry_scope,
-            )
-        raise
-
-
 def _poll_for_token(
     client: httpx.Client,
     portal_base_url: str,
@@ -4433,7 +4296,7 @@ def _poll_for_token(
 
 
 # =============================================================================
-# Nous Portal — token refresh, agent key minting, model discovery
+# Nous Portal — token refresh and model discovery
 # =============================================================================
 
 # -----------------------------------------------------------------------------
@@ -4512,9 +4375,9 @@ def _nous_shared_store_lock(timeout_seconds: float = AUTH_LOCK_TIMEOUT_SECONDS):
     to be held, acquire ``_auth_store_lock`` FIRST. All runtime refresh
     paths follow this order. The one exception is
     ``_try_import_shared_nous_state``, which holds this lock alone for
-    the entire refresh+mint cycle so concurrent imports on sibling
-    profiles can't race on the single-use shared refresh token; that
-    helper must NOT be called with ``_auth_store_lock`` already held.
+    the entire refresh cycle so concurrent imports on sibling profiles
+    can't race on the single-use shared refresh token; that helper must
+    NOT be called with ``_auth_store_lock`` already held.
     """
     try:
         lock_path = _nous_shared_store_path().with_suffix(".lock")
@@ -4574,9 +4437,8 @@ def _write_shared_nous_state(state: Dict[str, Any]) -> None:
     is a convenience layer; the per-profile auth.json remains the source
     of truth.
 
-    We deliberately omit the runtime ``agent_key`` compatibility field
-    (either an invoke JWT or legacy opaque session key) — only OAuth tokens
-    are cross-profile useful.
+    We deliberately omit the runtime ``agent_key`` compatibility field;
+    the OAuth tokens are the cross-profile source of truth.
     """
     refresh_token = state.get("refresh_token")
     access_token = state.get("access_token")
@@ -4802,9 +4664,9 @@ def _try_import_shared_nous_state(
 ) -> Optional[Dict[str, Any]]:
     """Attempt to rehydrate Nous OAuth state from the shared store.
 
-    Reads the shared file (if present), runs a forced refresh+mint using
-    the stored refresh_token to produce a fresh access_token + agent_key
-    scoped to this profile, and returns the full auth_state dict ready
+    Reads the shared file (if present), runs a forced refresh using the
+    stored refresh_token to produce a fresh inference JWT scoped to this
+    profile, and returns the full auth_state dict ready
     for ``persist_nous_credentials()``.
 
     Returns ``None`` when no shared state is available or the rehydrate
@@ -4820,7 +4682,7 @@ def _try_import_shared_nous_state(
 
             # Build a full state dict so refresh_nous_oauth_from_state has every
             # field it needs. force_refresh=True gets us a fresh access_token
-            # for this profile; fresh auth mode avoids stale cached legacy keys.
+            # for this profile.
             state: Dict[str, Any] = {
                 "access_token": shared.get("access_token"),
                 "refresh_token": shared.get("refresh_token"),
@@ -4927,39 +4789,6 @@ def _refresh_access_token(
     raise AuthError(description, provider="nous", code=code, relogin_required=relogin)
 
 
-def _mint_agent_key(
-    *,
-    client: httpx.Client,
-    portal_base_url: str,
-    access_token: str,
-    min_ttl_seconds: int,
-) -> Dict[str, Any]:
-    """Mint (or reuse) a short-lived inference API key."""
-    response = client.post(
-        f"{portal_base_url}/api/oauth/agent-key",
-        headers={"Authorization": f"Bearer {access_token}"},
-        json={"min_ttl_seconds": max(60, int(min_ttl_seconds))},
-    )
-
-    if response.status_code == 200:
-        payload = response.json()
-        if "api_key" not in payload:
-            raise AuthError("Mint response missing api_key",
-                            provider="nous", code="server_error")
-        return payload
-
-    try:
-        error_payload = response.json()
-    except Exception as exc:
-        raise AuthError("Agent key mint request failed",
-                        provider="nous", code="server_error") from exc
-
-    code = str(error_payload.get("error", "server_error"))
-    description = str(error_payload.get("error_description") or "Agent key mint request failed")
-    relogin = code in {"invalid_token", "invalid_grant"}
-    raise AuthError(description, provider="nous", code=code, relogin_required=relogin)
-
-
 def fetch_nous_models(
     *,
     inference_base_url: str,
@@ -5021,15 +4850,12 @@ def _agent_key_is_usable(state: Dict[str, Any], min_ttl_seconds: int) -> bool:
     key = state.get("agent_key")
     if not isinstance(key, str) or not key.strip():
         return False
-    if _decode_jwt_claims(key):
-        if _nous_legacy_session_keys_forced():
-            return False
-        return _nous_invoke_jwt_is_usable(
-            key,
-            scope=state.get("scope"),
-            expires_at=state.get("agent_key_expires_at"),
-        )
-    return not _is_expiring(state.get("agent_key_expires_at"), min_ttl_seconds)
+    return _nous_invoke_jwt_is_usable(
+        key,
+        scope=state.get("scope"),
+        expires_at=state.get("agent_key_expires_at"),
+        min_ttl_seconds=max(0, int(min_ttl_seconds)),
+    )
 
 
 def resolve_nous_access_token(
@@ -5160,11 +4986,11 @@ def refresh_nous_oauth_pure(
 ) -> Dict[str, Any]:
     """Refresh Nous OAuth state without mutating auth.json directly.
 
-    ``on_state_update`` is called after a successful access-token refresh and
-    before any subsequent agent-key mint. Callers that own persistent state can
-    use it to save the newly rotated refresh token before later work can fail.
+    ``on_state_update`` is called after a successful access-token refresh.
+    Callers that own persistent state can use it to save the newly rotated
+    refresh token before later validation can fail.
     """
-    inference_auth_mode = _normalize_nous_inference_auth_mode(inference_auth_mode)
+    _normalize_nous_inference_auth_mode(inference_auth_mode)
     state: Dict[str, Any] = {
         "access_token": access_token,
         "refresh_token": refresh_token,
@@ -5186,33 +5012,39 @@ def refresh_nous_oauth_pure(
     timeout = httpx.Timeout(timeout_seconds if timeout_seconds else 15.0)
 
     with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}, verify=verify) as client:
-        min_agent_key_ttl = max(60, int(min_key_ttl_seconds))
-        legacy_session_keys = _nous_legacy_session_keys_forced()
-        current_invoke_jwt_usable = (
-            not legacy_session_keys
-            and _nous_invoke_jwt_is_usable(
-                state.get("access_token"),
-                scope=state.get("scope"),
-                expires_at=state.get("expires_at"),
-            )
+        del min_key_ttl_seconds
+        current_invoke_jwt_status = _nous_invoke_jwt_status(
+            state.get("access_token"),
+            scope=state.get("scope"),
+            expires_at=state.get("expires_at"),
         )
-        if (
-            force_refresh
-            or (
-                _is_expiring(state.get("expires_at"), ACCESS_TOKEN_REFRESH_SKEW_SECONDS)
-                and not current_invoke_jwt_usable
-            )
-        ):
+        if force_refresh or current_invoke_jwt_status is not None:
+            refresh_token_value = state.get("refresh_token")
+            if not isinstance(refresh_token_value, str) or not refresh_token_value:
+                if current_invoke_jwt_status is not None:
+                    raise AuthError(
+                        "Nous Portal access token is not a usable inference JWT "
+                        f"({current_invoke_jwt_status}) and no refresh token is available. "
+                        "Re-authenticate with: hermes auth add nous",
+                        provider="nous",
+                        code=current_invoke_jwt_status,
+                        relogin_required=True,
+                    )
+                raise AuthError(
+                    "No refresh token is available for Nous Portal.",
+                    provider="nous",
+                    relogin_required=True,
+                )
             refreshed = _refresh_access_token(
                 client=client,
                 portal_base_url=state["portal_base_url"],
                 client_id=state["client_id"],
-                refresh_token=state["refresh_token"],
+                refresh_token=refresh_token_value,
             )
             now = datetime.now(timezone.utc)
             access_ttl = _coerce_ttl_seconds(refreshed.get("expires_in"))
             state["access_token"] = refreshed["access_token"]
-            state["refresh_token"] = refreshed.get("refresh_token") or state["refresh_token"]
+            state["refresh_token"] = refreshed.get("refresh_token") or refresh_token_value
             state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer"
             state["scope"] = refreshed.get("scope") or state.get("scope")
             refreshed_url = _validate_nous_inference_url_from_network(refreshed.get("inference_base_url"))
@@ -5226,34 +5058,12 @@ def refresh_nous_oauth_pure(
             if on_state_update is not None:
                 on_state_update(dict(state), "post_refresh_access_token")
 
-        selected_auth_path, fallback_reason = _choose_nous_inference_auth_path(
+        selected_auth_path, _ = _choose_nous_inference_auth_path(
             state,
-            min_key_ttl_seconds=min_agent_key_ttl,
             inference_auth_mode=inference_auth_mode,
         )
         if selected_auth_path == NOUS_AUTH_PATH_INVOKE_JWT:
             _select_nous_invoke_jwt(state)
-        elif selected_auth_path == NOUS_AUTH_PATH_LEGACY_SESSION_KEY_MINT:
-            _log_nous_legacy_session_key_selected(
-                fallback_reason or "legacy_session_key_required",
-                access_token=state.get("access_token"),
-            )
-            mint_payload = _mint_agent_key(
-                client=client,
-                portal_base_url=state["portal_base_url"],
-                access_token=state["access_token"],
-                min_ttl_seconds=min_key_ttl_seconds,
-            )
-            now = datetime.now(timezone.utc)
-            state["agent_key"] = mint_payload.get("api_key")
-            state["agent_key_id"] = mint_payload.get("key_id")
-            state["agent_key_expires_at"] = mint_payload.get("expires_at")
-            state["agent_key_expires_in"] = mint_payload.get("expires_in")
-            state["agent_key_reused"] = bool(mint_payload.get("reused", False))
-            state["agent_key_obtained_at"] = now.isoformat()
-            minted_url = _validate_nous_inference_url_from_network(mint_payload.get("inference_base_url"))
-            if minted_url:
-                state["inference_base_url"] = minted_url
 
     return state
 
@@ -5296,7 +5106,7 @@ def persist_nous_credentials(
     *,
     label: Optional[str] = None,
 ):
-    """Persist minted Nous OAuth credentials as the singleton provider state
+    """Persist Nous OAuth credentials as the singleton provider state
     and ensure the credential pool is in sync.
 
     Nous credentials are read at runtime from two independent locations:
@@ -5307,7 +5117,7 @@ def persist_nous_credentials(
     - ``credential_pool.nous``: used by the runtime ``pool.select()`` path.
 
     Historically ``hermes auth add nous`` wrote a ``manual:device_code`` pool
-    entry only, skipping ``providers.nous``.  When the 24h agent_key TTL
+    entry only, skipping ``providers.nous``. When the runtime credential
     expired, the recovery path read the empty singleton state and raised
     ``AuthError`` silently (``logger.debug`` at INFO level).
 
@@ -5367,16 +5177,16 @@ def resolve_nous_runtime_credentials(
     insecure: Optional[bool] = None,
     ca_bundle: Optional[str] = None,
     inference_auth_mode: str = NOUS_INFERENCE_AUTH_MODE_AUTO,
+    force_refresh: bool = False,
 ) -> Dict[str, Any]:
     """
     Resolve Nous inference credentials for runtime use.
 
-    Ensures access_token is valid (refreshes if needed) and a short-lived
-    inference key is present with minimum TTL (mints/reuses as needed).
-    Concurrent processes coordinate through the auth store file lock.
+    Ensures access_token is a valid inference-scoped JWT, refreshing it when
+    needed. Concurrent processes coordinate through the auth store file lock.
 
     Returns dict with: provider, base_url, api_key, key_id, expires_at,
-    expires_in, source ("invoke_jwt", "cache", or "portal"), and auth_path.
+    expires_in, source ("invoke_jwt"), and auth_path.
     """
     inference_auth_mode = _normalize_nous_inference_auth_mode(inference_auth_mode)
     min_key_ttl_seconds = max(60, int(min_key_ttl_seconds))
@@ -5456,6 +5266,7 @@ def resolve_nous_runtime_credentials(
             refresh_token_fp=_token_fingerprint(state.get("refresh_token")),
         )
 
+        selected_auth_path = NOUS_AUTH_PATH_INVOKE_JWT
         with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}, verify=verify) as client:
             access_token = state.get("access_token")
             refresh_token = state.get("refresh_token")
@@ -5464,43 +5275,40 @@ def resolve_nous_runtime_credentials(
                 raise AuthError("No access token found for Nous Portal login.",
                                 provider="nous", relogin_required=True)
 
-            # Step 1: refresh access token if expiring. If the access token
-            # is already a valid invoke JWT, trust its own exp claim even when
-            # older auth.json metadata has a stale/missing expires_at.
-            current_invoke_jwt_usable = (
-                not _nous_legacy_session_keys_forced()
-                and _nous_invoke_jwt_is_usable(
-                    access_token,
-                    scope=state.get("scope"),
-                    expires_at=state.get("expires_at"),
-                )
+            invoke_jwt_status = _nous_invoke_jwt_status(
+                access_token,
+                scope=state.get("scope"),
+                expires_at=state.get("expires_at"),
             )
-            if (
-                _is_expiring(state.get("expires_at"), ACCESS_TOKEN_REFRESH_SKEW_SECONDS)
-                and not current_invoke_jwt_usable
-            ):
+            if force_refresh or invoke_jwt_status is not None:
                 with _nous_shared_store_lock(timeout_seconds=max(timeout_seconds + 5.0, AUTH_LOCK_TIMEOUT_SECONDS)):
                     if _merge_shared_nous_oauth_state(state):
                         access_token = state.get("access_token")
                         refresh_token = state.get("refresh_token")
-                        _persist_state("post_shared_merge_access_expiring")
-
-                    if (
-                        _is_expiring(state.get("expires_at"), ACCESS_TOKEN_REFRESH_SKEW_SECONDS)
-                        and not _nous_invoke_jwt_is_usable(
+                        invoke_jwt_status = _nous_invoke_jwt_status(
                             access_token,
                             scope=state.get("scope"),
                             expires_at=state.get("expires_at"),
                         )
-                    ):
-                        if not isinstance(refresh_token, str) or not refresh_token:
-                            raise AuthError("Session expired and no refresh token is available.",
-                                            provider="nous", relogin_required=True)
+                        _persist_state("post_shared_merge_access_unusable")
 
+                    if force_refresh or invoke_jwt_status is not None:
+                        if not isinstance(refresh_token, str) or not refresh_token:
+                            reason = invoke_jwt_status or "force_refresh"
+                            raise AuthError(
+                                "Nous Portal access token is not a usable inference JWT "
+                                f"({reason}) and no refresh token is available. "
+                                "Re-authenticate with: hermes auth add nous",
+                                provider="nous",
+                                code=reason,
+                                relogin_required=True,
+                            )
+
+                        refresh_reason = "force_refresh" if force_refresh else (invoke_jwt_status or "access_unusable")
                         _oauth_trace(
                             "refresh_start",
                             sequence_id=sequence_id,
-                            reason="access_expiring",
+                            reason=refresh_reason,
                             refresh_token_fp=_token_fingerprint(refresh_token),
                         )
                         try:
@@ -5542,166 +5350,25 @@ def resolve_nous_runtime_credentials(
                         _oauth_trace(
                             "refresh_success",
                             sequence_id=sequence_id,
-                            reason="access_expiring",
+                            reason=refresh_reason,
                             previous_refresh_token_fp=_token_fingerprint(previous_refresh_token),
                             new_refresh_token_fp=_token_fingerprint(refresh_token),
                         )
-                        # Persist immediately so downstream mint failures cannot drop rotated refresh tokens.
-                        _persist_state("post_refresh_access_expiring")
+                        # Persist immediately so validation failures cannot drop rotated refresh tokens.
+                        _persist_state("post_refresh_access_token")
 
-            # Step 2: resolve the compatibility ``agent_key`` field. Preferred
-            # path stores the NAS invoke JWT there; legacy path mints/reuses
-            # the opaque session key.
-            used_cached_key = False
-            mint_payload: Optional[Dict[str, Any]] = None
-            selected_auth_path, fallback_reason = _choose_nous_inference_auth_path(
+            selected_auth_path, _ = _choose_nous_inference_auth_path(
                 state,
                 access_token=access_token,
-                min_key_ttl_seconds=min_key_ttl_seconds,
                 inference_auth_mode=inference_auth_mode,
             )
+            _select_nous_invoke_jwt(
+                state,
+                access_token=access_token,
+                sequence_id=sequence_id,
+            )
 
-            if selected_auth_path == NOUS_AUTH_PATH_INVOKE_JWT:
-                _select_nous_invoke_jwt(
-                    state,
-                    access_token=access_token,
-                    sequence_id=sequence_id,
-                )
-            elif selected_auth_path == NOUS_AUTH_PATH_LEGACY_SESSION_KEY_CACHE:
-                used_cached_key = True
-                logger.info("Nous inference auth: using cached agent_key")
-                _oauth_trace("agent_key_reuse", sequence_id=sequence_id)
-            else:
-                _log_nous_legacy_session_key_selected(
-                    fallback_reason or "legacy_session_key_required",
-                    access_token=access_token,
-                    sequence_id=sequence_id,
-                )
-                try:
-                    _oauth_trace(
-                        "mint_start",
-                        sequence_id=sequence_id,
-                        access_token_fp=_token_fingerprint(access_token),
-                    )
-                    mint_payload = _mint_agent_key(
-                        client=client, portal_base_url=portal_base_url,
-                        access_token=access_token, min_ttl_seconds=min_key_ttl_seconds,
-                    )
-                except AuthError as exc:
-                    _oauth_trace(
-                        "mint_error",
-                        sequence_id=sequence_id,
-                        code=exc.code,
-                    )
-                    # Retry path: access token may be stale server-side despite local checks
-                    latest_refresh_token = state.get("refresh_token")
-                    if (
-                        exc.code in {"invalid_token", "invalid_grant"}
-                        and isinstance(latest_refresh_token, str)
-                        and latest_refresh_token
-                    ):
-                        with _nous_shared_store_lock(timeout_seconds=max(timeout_seconds + 5.0, AUTH_LOCK_TIMEOUT_SECONDS)):
-                            if _merge_shared_nous_oauth_state(state):
-                                access_token = state.get("access_token")
-                                latest_refresh_token = state.get("refresh_token")
-                                _persist_state("post_shared_merge_mint_retry")
-                            else:
-                                _oauth_trace(
-                                    "refresh_start",
-                                    sequence_id=sequence_id,
-                                    reason="mint_retry_after_invalid_token",
-                                    refresh_token_fp=_token_fingerprint(latest_refresh_token),
-                                )
-                                try:
-                                    refreshed = _refresh_access_token(
-                                        client=client, portal_base_url=portal_base_url,
-                                        client_id=client_id, refresh_token=latest_refresh_token,
-                                    )
-                                except AuthError as exc:
-                                    if _is_terminal_nous_refresh_error(exc):
-                                        _quarantine_nous_oauth_state(
-                                            state,
-                                            exc,
-                                            reason="runtime_mint_retry_refresh_failure",
-                                        )
-                                        _quarantine_nous_pool_entries(
-                                            auth_store,
-                                            exc,
-                                            reason="runtime_mint_retry_refresh_failure",
-                                        )
-                                        _persist_state("terminal_runtime_mint_retry_refresh_failure")
-                                    raise
-                                now = datetime.now(timezone.utc)
-                                access_ttl = _coerce_ttl_seconds(refreshed.get("expires_in"))
-                                state["access_token"] = refreshed["access_token"]
-                                state["refresh_token"] = refreshed.get("refresh_token") or latest_refresh_token
-                                state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer"
-                                state["scope"] = refreshed.get("scope") or state.get("scope")
-                                refreshed_url = _validate_nous_inference_url_from_network(refreshed.get("inference_base_url"))
-                                if refreshed_url:
-                                    inference_base_url = refreshed_url
-                                state["obtained_at"] = now.isoformat()
-                                state["expires_in"] = access_ttl
-                                state["expires_at"] = datetime.fromtimestamp(
-                                    now.timestamp() + access_ttl, tz=timezone.utc
-                                ).isoformat()
-                                access_token = state["access_token"]
-                                refresh_token = state["refresh_token"]
-                                _oauth_trace(
-                                    "refresh_success",
-                                    sequence_id=sequence_id,
-                                    reason="mint_retry_after_invalid_token",
-                                    previous_refresh_token_fp=_token_fingerprint(latest_refresh_token),
-                                    new_refresh_token_fp=_token_fingerprint(refresh_token),
-                                )
-                                # Persist retry refresh immediately for crash safety and cross-process visibility.
-                                _persist_state("post_refresh_mint_retry")
-
-                        retry_inference_auth_mode = (
-                            NOUS_INFERENCE_AUTH_MODE_LEGACY
-                            if inference_auth_mode == NOUS_INFERENCE_AUTH_MODE_LEGACY
-                            else NOUS_INFERENCE_AUTH_MODE_FRESH
-                        )
-                        retry_auth_path, _ = _choose_nous_inference_auth_path(
-                            state,
-                            access_token=access_token,
-                            min_key_ttl_seconds=min_key_ttl_seconds,
-                            inference_auth_mode=retry_inference_auth_mode,
-                        )
-                        if retry_auth_path == NOUS_AUTH_PATH_INVOKE_JWT:
-                            mint_payload = None
-                            selected_auth_path = NOUS_AUTH_PATH_INVOKE_JWT
-                            _select_nous_invoke_jwt(
-                                state,
-                                access_token=access_token,
-                                sequence_id=sequence_id,
-                            )
-                        else:
-                            mint_payload = _mint_agent_key(
-                                client=client, portal_base_url=portal_base_url,
-                                access_token=access_token, min_ttl_seconds=min_key_ttl_seconds,
-                            )
-                    else:
-                        raise
-
-            if mint_payload is not None:
-                now = datetime.now(timezone.utc)
-                state["agent_key"] = mint_payload.get("api_key")
-                state["agent_key_id"] = mint_payload.get("key_id")
-                state["agent_key_expires_at"] = mint_payload.get("expires_at")
-                state["agent_key_expires_in"] = mint_payload.get("expires_in")
-                state["agent_key_reused"] = bool(mint_payload.get("reused", False))
-                state["agent_key_obtained_at"] = now.isoformat()
-                minted_url = _validate_nous_inference_url_from_network(mint_payload.get("inference_base_url"))
-                if minted_url:
-                    inference_base_url = minted_url
-                _oauth_trace(
-                    "mint_success",
-                    sequence_id=sequence_id,
-                    reused=bool(mint_payload.get("reused", False)),
-                )
-
-            # Persist routing and TLS metadata for non-interactive refresh/mint
+            # Persist routing and TLS metadata for non-interactive refresh.
             state["portal_base_url"] = portal_base_url
             state["inference_base_url"] = inference_base_url
             state["client_id"] = client_id
@@ -5735,11 +5402,7 @@ def resolve_nous_runtime_credentials(
         "key_id": state.get("agent_key_id"),
         "expires_at": expires_at,
         "expires_in": expires_in,
-        "source": (
-            NOUS_AUTH_PATH_INVOKE_JWT
-            if selected_auth_path == NOUS_AUTH_PATH_INVOKE_JWT
-            else ("cache" if used_cached_key else "portal")
-        ),
+        "source": NOUS_AUTH_PATH_INVOKE_JWT,
         "auth_path": selected_auth_path,
     }
 
@@ -5765,8 +5428,7 @@ def _snapshot_nous_pool_status() -> Dict[str, Any]:
     """Best-effort status from the credential pool.
 
     This is a fallback only. The auth-store provider state is the runtime source
-    of truth because it is what ``resolve_nous_runtime_credentials()`` refreshes
-    and mints against.
+    of truth because it is what ``resolve_nous_runtime_credentials()`` refreshes.
     """
     try:
         from agent.credential_pool import load_pool
@@ -5858,7 +5520,7 @@ def get_nous_auth_status() -> Dict[str, Any]:
     """Status snapshot for Nous auth.
 
     Prefer the auth-store provider state, because that is the live source of
-    truth for refresh + mint operations. When provider state exists, validate it
+    truth for refresh operations. When provider state exists, validate it
     by resolving runtime credentials so revoked refresh sessions do not show up
     as a healthy login. If provider state is absent, fall back to the credential
     pool for the just-logged-in / not-yet-promoted case.
@@ -7719,10 +7381,7 @@ def _nous_device_code_login(
         or pconfig.inference_base_url
     ).rstrip("/")
     client_id = client_id or pconfig.client_id
-    scope, explicit_scope = _nous_device_scope_with_env_override(
-        scope,
-        default_scope=pconfig.scope,
-    )
+    scope = scope or pconfig.scope
     timeout = httpx.Timeout(timeout_seconds)
     verify: bool | str = False if insecure else (ca_bundle if ca_bundle else True)
 
@@ -7737,12 +7396,11 @@ def _nous_device_code_login(
         print(f"TLS verification: custom CA bundle ({ca_bundle})")
 
     with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}, verify=verify) as client:
-        device_data, scope = _request_nous_device_code_with_scope_fallback(
+        device_data = _request_device_code(
             client=client,
             portal_base_url=portal_base_url,
             client_id=client_id,
             scope=scope,
-            allow_legacy_fallback=not explicit_scope,
         )
 
         verification_url = str(device_data["verification_uri_complete"])
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index c7f41f7c3..96e5161a3 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -3004,7 +3004,6 @@ def _model_flow_nous(config, current_model="", args=None):
     """Nous Portal provider: ensure logged in, then pick model."""
     from hermes_cli.auth import (
         get_provider_auth_state,
-        NOUS_INFERENCE_AUTH_MODE_LEGACY,
         _prompt_model_selection,
         _save_model_choice,
         _update_config_for_provider,
@@ -3107,13 +3106,13 @@ def _model_flow_nous(config, current_model="", args=None):
         try:
             refreshed_creds = resolve_nous_runtime_credentials(
                 min_key_ttl_seconds=5 * 60,
-                inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_LEGACY,
+                force_refresh=True,
             )
             if refreshed_creds:
                 creds = refreshed_creds
         except Exception:
             # Runtime inference has its own paid-entitlement recovery path; do
-            # not block model selection if this opportunistic remint fails.
+            # not block model selection if this opportunistic refresh fails.
             pass
 
     # Resolve portal URL early — needed both for upgrade links and for the
diff --git a/hermes_cli/proxy/adapters/base.py b/hermes_cli/proxy/adapters/base.py
index db778e18f..65107b6f1 100644
--- a/hermes_cli/proxy/adapters/base.py
+++ b/hermes_cli/proxy/adapters/base.py
@@ -69,11 +69,11 @@ class UpstreamAdapter(ABC):
 
     @abstractmethod
     def get_credential(self) -> UpstreamCredential:
-        """Return a fresh credential, refreshing/minting if necessary.
+        """Return a fresh credential, refreshing or rotating if necessary.
 
         Implementations should:
           - refresh the access token if it's near expiry
-          - mint/rotate the upstream bearer key if it's near expiry
+          - rotate the upstream bearer key if it's near expiry
           - persist any refreshed state back to disk
 
         Raises:
@@ -90,8 +90,7 @@ class UpstreamAdapter(ABC):
         """Return an alternate credential after an upstream auth failure.
 
         The default is no retry. Providers can override this for one-shot
-        fallback paths, such as switching from a preferred token type to a
-        legacy bearer after the upstream rejects the first request.
+        fallback paths after the upstream rejects the first request.
         """
         _ = failed_credential, status_code
         return None
diff --git a/hermes_cli/proxy/adapters/nous_portal.py b/hermes_cli/proxy/adapters/nous_portal.py
index 57c0a8824..0d06fd545 100644
--- a/hermes_cli/proxy/adapters/nous_portal.py
+++ b/hermes_cli/proxy/adapters/nous_portal.py
@@ -1,13 +1,8 @@
 """Nous Portal upstream adapter.
 
 Reads the user's Nous OAuth state from ``~/.hermes/auth.json`` through the
-shared runtime resolver, refreshes the access token and resolves the
-``agent_key`` compatibility credential when needed, then exposes the upstream
-base URL plus bearer for the proxy server to forward to.
-
-The ``agent_key`` field may hold either a NAS invoke JWT or the legacy
-opaque session key. The refresh helper handles both — see
-:func:`hermes_cli.auth.resolve_nous_runtime_credentials`.
+shared runtime resolver, validates or refreshes the inference JWT, then exposes
+the upstream base URL plus bearer for the proxy server to forward to.
 """
 
 from __future__ import annotations
@@ -20,7 +15,6 @@ from hermes_cli.auth import (
     AuthError,
     DEFAULT_NOUS_INFERENCE_URL,
     NOUS_INFERENCE_AUTH_MODE_AUTO,
-    NOUS_INFERENCE_AUTH_MODE_LEGACY,
     _load_auth_store,
     _auth_store_lock,
     _is_terminal_nous_refresh_error,
@@ -72,8 +66,8 @@ class NousPortalAdapter(UpstreamAdapter):
         state = self._read_state()
         if state is None:
             return False
-        # We need either a usable agent_key OR (refresh_token + access_token)
-        # to recover. The refresh helper will mint/refresh as needed.
+        # We need either a usable inference JWT OR (refresh_token + access_token)
+        # to recover. The refresh helper validates and refreshes as needed.
         return bool(
             state.get("agent_key")
             or (state.get("refresh_token") and state.get("access_token"))
@@ -90,14 +84,8 @@ class NousPortalAdapter(UpstreamAdapter):
         failed_credential: UpstreamCredential,
         status_code: int,
     ) -> Optional[UpstreamCredential]:
-        if status_code != 401:
-            return None
-        if failed_credential.bearer.count(".") != 2:
-            return None
-        logger.info("proxy: Nous upstream rejected bearer; retrying with legacy session key")
-        return self._get_credential(
-            inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_LEGACY,
-        )
+        _ = failed_credential, status_code
+        return None
 
     def _get_credential(self, *, inference_auth_mode: str) -> UpstreamCredential:
         with self._lock:
@@ -131,10 +119,10 @@ class NousPortalAdapter(UpstreamAdapter):
                     f"Failed to refresh Nous Portal credentials: {exc}"
                 ) from exc
 
-            agent_key = refreshed.get("api_key")
-            if not agent_key:
+            runtime_key = refreshed.get("api_key")
+            if not runtime_key:
                 raise RuntimeError(
-                    "Nous Portal refresh did not return a usable agent_key. "
+                    "Nous Portal refresh did not return a usable inference JWT. "
                     "Try `hermes auth add nous` to re-authenticate."
                 )
 
@@ -145,7 +133,7 @@ class NousPortalAdapter(UpstreamAdapter):
             base_url = base_url.rstrip("/")
 
             return UpstreamCredential(
-                bearer=agent_key,
+                bearer=runtime_key,
                 base_url=base_url,
                 expires_at=refreshed.get("expires_at"),
             )
diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
index c40316e02..608cce53d 100644
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -1115,10 +1115,17 @@ def _resolve_explicit_runtime(
             explicit_base_url
             or str(state.get("inference_base_url") or auth_mod.DEFAULT_NOUS_INFERENCE_URL).strip().rstrip("/")
         )
-        # Only use the agent_key compatibility field for inference. It may be
-        # either a NAS invoke JWT or a legacy opaque session key; raw OAuth
-        # access_token fallback is handled by resolve_nous_runtime_credentials().
-        api_key = explicit_api_key or str(state.get("agent_key") or "").strip()
+        # Only use the agent_key compatibility field for inference when it
+        # contains a NAS invoke JWT; raw OAuth access_token fallback is handled
+        # by resolve_nous_runtime_credentials().
+        api_key = explicit_api_key or (
+            str(state.get("agent_key") or "").strip()
+            if _agent_key_is_usable(
+                state,
+                max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
+            )
+            else ""
+        )
         expires_at = state.get("agent_key_expires_at") or state.get("expires_at")
         if not api_key:
             creds = resolve_nous_runtime_credentials(
@@ -1309,12 +1316,11 @@ def resolve_runtime_provider(
                 or getattr(entry, "access_token", "")
             )
         # For Nous, the pool entry's runtime_api_key is the agent_key
-        # compatibility field: either an invoke JWT or legacy opaque key.
-        # The pool doesn't
+        # compatibility field. It must be an invoke JWT. The pool doesn't
         # refresh it during selection (that would trigger network calls in
         # non-runtime contexts like `hermes auth list`).  If the key is
         # expired, clear pool_api_key so we fall through to
-        # resolve_nous_runtime_credentials() which handles refresh + fallback.
+        # resolve_nous_runtime_credentials() which handles refresh.
         if provider == "nous" and entry is not None and pool_api_key:
             min_ttl = max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800")))
             nous_state = {
diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py
index eaa1b2432..df286fb6d 100644
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@@ -1898,8 +1898,7 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
     """
     if provider_id == "nous":
         from hermes_cli.auth import (
-            _nous_device_scope_with_env_override,
-            _request_nous_device_code_with_scope_fallback,
+            _request_device_code,
             PROVIDER_REGISTRY,
         )
         import httpx
@@ -1910,22 +1909,21 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
             or pconfig.portal_base_url
         ).rstrip("/")
         client_id = pconfig.client_id
-        scope, explicit_scope = _nous_device_scope_with_env_override(
-            None,
-            default_scope=pconfig.scope,
-        )
+        scope = pconfig.scope
 
         def _do_nous_device_request():
             with httpx.Client(
                 timeout=httpx.Timeout(15.0),
                 headers={"Accept": "application/json"},
             ) as client:
-                return _request_nous_device_code_with_scope_fallback(
-                    client=client,
-                    portal_base_url=portal_base_url,
-                    client_id=client_id,
-                    scope=scope,
-                    allow_legacy_fallback=not explicit_scope,
+                return (
+                    _request_device_code(
+                        client=client,
+                        portal_base_url=portal_base_url,
+                        client_id=client_id,
+                        scope=scope,
+                    ),
+                    scope,
                 )
 
         device_data, effective_scope = await asyncio.get_running_loop().run_in_executor(
@@ -2093,7 +2091,7 @@ def _nous_poller(session_id: str) -> None:
                 expires_in=expires_in,
                 poll_interval=interval,
             )
-        # Same post-processing as _nous_device_code_login (mint agent key)
+        # Same post-processing as _nous_device_code_login (validate/refresh JWT)
         now = datetime.now(timezone.utc)
         token_ttl = int(token_data.get("expires_in") or 0)
         auth_state = {
diff --git a/run_agent.py b/run_agent.py
index 96af23158..7c90d0554 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -3007,19 +3007,15 @@ class AIAgent:
         try:
             from hermes_cli.auth import (
                 NOUS_INFERENCE_AUTH_MODE_AUTO,
-                NOUS_INFERENCE_AUTH_MODE_LEGACY,
                 resolve_nous_runtime_credentials,
             )
 
-            selected_auth_mode = inference_auth_mode or (
-                NOUS_INFERENCE_AUTH_MODE_LEGACY
-                if force
-                else NOUS_INFERENCE_AUTH_MODE_AUTO
-            )
+            selected_auth_mode = inference_auth_mode or NOUS_INFERENCE_AUTH_MODE_AUTO
             creds = resolve_nous_runtime_credentials(
                 min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
                 timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
                 inference_auth_mode=selected_auth_mode,
+                force_refresh=force,
             )
         except Exception as exc:
             logger.debug("Nous credential refresh failed: %s", exc)
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index f1f54ae30..66e52b6c1 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -1,5 +1,6 @@
 """Tests for agent.auxiliary_client resolution chain, provider overrides, and model overrides."""
 
+import base64
 import json
 import logging
 import time
@@ -29,6 +30,12 @@ from agent.auxiliary_client import (
 )
 
 
+def _jwt_with_claims(claims: dict) -> str:
+    header = base64.urlsafe_b64encode(b'{"alg":"none","typ":"JWT"}').decode().rstrip("=")
+    payload = base64.urlsafe_b64encode(json.dumps(claims).encode()).decode().rstrip("=")
+    return f"{header}.{payload}.sig"
+
+
 @pytest.fixture(autouse=True)
 def _clean_env(monkeypatch):
     """Strip provider env vars so each test starts clean."""
@@ -887,9 +894,16 @@ class TestVisionClientFallback:
 
 class TestAuxiliaryPoolAwareness:
     def test_try_nous_uses_pool_entry(self):
+        pooled_token = _jwt_with_claims({
+            "scope": "inference:invoke",
+            "exp": int(time.time() + 3600),
+        })
+
         class _Entry:
             access_token = "pooled-access-token"
-            agent_key = "pooled-agent-key"
+            agent_key = pooled_token
+            agent_key_expires_at = "2099-01-01T00:00:00+00:00"
+            scope = "inference:invoke"
             inference_base_url = "https://inference.pool.example/v1"
 
         class _Pool:
@@ -910,7 +924,7 @@ class TestAuxiliaryPoolAwareness:
 
         assert client is not None
         assert model == "google/gemini-3-flash-preview"
-        assert mock_openai.call_args.kwargs["api_key"] == "pooled-agent-key"
+        assert mock_openai.call_args.kwargs["api_key"] == pooled_token
         assert mock_openai.call_args.kwargs["base_url"] == "https://inference.pool.example/v1"
 
     def test_try_nous_uses_portal_recommendation_for_text(self):
diff --git a/tests/agent/test_credential_pool.py b/tests/agent/test_credential_pool.py
index b783c7ab6..2f46b273b 100644
--- a/tests/agent/test_credential_pool.py
+++ b/tests/agent/test_credential_pool.py
@@ -1225,7 +1225,7 @@ def test_load_pool_migrates_nous_provider_state(tmp_path, monkeypatch):
                     "inference_base_url": "https://inference.example.com/v1",
                     "client_id": "hermes-cli",
                     "token_type": "Bearer",
-                    "scope": "inference:mint_agent_key",
+                    "scope": "inference:invoke",
                     "access_token": "access-token",
                     "refresh_token": "refresh-token",
                     "expires_at": "2026-03-24T12:00:00+00:00",
@@ -1252,7 +1252,7 @@ def test_load_pool_mirrors_nous_invoke_jwt_agent_key_runtime_api_key(tmp_path, m
     expires_at = datetime.fromtimestamp(time.time() + 3600, tz=timezone.utc).isoformat()
     token = _jwt_with_claims({
         "sub": "test-user",
-        "scope": ["inference:invoke", "inference:mint_agent_key"],
+        "scope": ["inference:invoke"],
         "exp": int(time.time() + 3600),
     })
     _write_auth_store(
@@ -1266,7 +1266,7 @@ def test_load_pool_mirrors_nous_invoke_jwt_agent_key_runtime_api_key(tmp_path, m
                     "inference_base_url": "https://inference.example.com/v1",
                     "client_id": "hermes-cli",
                     "token_type": "Bearer",
-                    "scope": "inference:invoke inference:mint_agent_key",
+                    "scope": "inference:invoke",
                     "access_token": token,
                     "refresh_token": "refresh-token",
                     "expires_at": expires_at,
@@ -1307,7 +1307,7 @@ def test_nous_pool_terminal_refresh_removes_device_code_entry(tmp_path, monkeypa
                     "inference_base_url": "https://inference.example.com/v1",
                     "client_id": "hermes-cli",
                     "token_type": "Bearer",
-                    "scope": "inference:mint_agent_key",
+                    "scope": "inference:invoke",
                     "access_token": "access-token",
                     "refresh_token": "refresh-token",
                     "expires_at": "2026-03-24T12:00:00+00:00",
@@ -1479,7 +1479,7 @@ def test_load_pool_migrates_nous_provider_state_preserves_tls(tmp_path, monkeypa
                     "inference_base_url": "https://inference.example.com/v1",
                     "client_id": "hermes-cli",
                     "token_type": "Bearer",
-                    "scope": "inference:mint_agent_key",
+                    "scope": "inference:invoke",
                     "access_token": "access-token",
                     "refresh_token": "refresh-token",
                     "expires_at": "2026-03-24T12:00:00+00:00",
@@ -2405,7 +2405,7 @@ def test_sync_nous_entry_from_auth_store_adopts_newer_tokens(tmp_path, monkeypat
                     "inference_base_url": "https://inference.example.com/v1",
                     "client_id": "hermes-cli",
                     "token_type": "Bearer",
-                    "scope": "inference:mint_agent_key",
+                    "scope": "inference:invoke",
                     "access_token": "access-OLD",
                     "refresh_token": "refresh-OLD",
                     "expires_at": "2026-03-24T12:00:00+00:00",
@@ -2435,7 +2435,7 @@ def test_sync_nous_entry_from_auth_store_adopts_newer_tokens(tmp_path, monkeypat
                     "inference_base_url": "https://inference.example.com/v1",
                     "client_id": "hermes-cli",
                     "token_type": "Bearer",
-                    "scope": "inference:mint_agent_key",
+                    "scope": "inference:invoke",
                     "access_token": "access-NEW",
                     "refresh_token": "refresh-NEW",
                     "expires_at": "2026-03-24T12:30:00+00:00",
@@ -2467,7 +2467,7 @@ def test_sync_nous_entry_noop_when_tokens_match(tmp_path, monkeypatch):
                     "inference_base_url": "https://inference.example.com/v1",
                     "client_id": "hermes-cli",
                     "token_type": "Bearer",
-                    "scope": "inference:mint_agent_key",
+                    "scope": "inference:invoke",
                     "access_token": "access-token",
                     "refresh_token": "refresh-token",
                     "expires_at": "2026-03-24T12:00:00+00:00",
@@ -2504,7 +2504,7 @@ def test_nous_exhausted_entry_recovers_via_auth_store_sync(tmp_path, monkeypatch
                     "inference_base_url": "https://inference.example.com/v1",
                     "client_id": "hermes-cli",
                     "token_type": "Bearer",
-                    "scope": "inference:mint_agent_key",
+                    "scope": "inference:invoke",
                     "access_token": "access-OLD",
                     "refresh_token": "refresh-OLD",
                     "expires_at": "2026-03-24T12:00:00+00:00",
@@ -2541,7 +2541,7 @@ def test_nous_exhausted_entry_recovers_via_auth_store_sync(tmp_path, monkeypatch
                     "inference_base_url": "https://inference.example.com/v1",
                     "client_id": "hermes-cli",
                     "token_type": "Bearer",
-                    "scope": "inference:mint_agent_key",
+                    "scope": "inference:invoke",
                     "access_token": "access-FRESH",
                     "refresh_token": "refresh-FRESH",
                     "expires_at": "2026-03-24T12:30:00+00:00",
diff --git a/tests/hermes_cli/test_auth_commands.py b/tests/hermes_cli/test_auth_commands.py
index 801b190cd..371a7080c 100644
--- a/tests/hermes_cli/test_auth_commands.py
+++ b/tests/hermes_cli/test_auth_commands.py
@@ -107,15 +107,15 @@ def test_auth_add_nous_oauth_persists_pool_entry(tmp_path, monkeypatch):
             "portal_base_url": "https://portal.example.com",
             "inference_base_url": "https://inference.example.com/v1",
             "client_id": "hermes-cli",
-            "scope": "inference:invoke inference:mint_agent_key",
+            "scope": "inference:invoke",
             "token_type": "Bearer",
             "access_token": token,
             "refresh_token": "refresh-token",
             "obtained_at": "2026-03-23T10:00:00+00:00",
             "expires_at": "2026-03-23T11:00:00+00:00",
             "expires_in": 3600,
-            "agent_key": "ak-test",
-            "agent_key_id": "ak-id",
+            "agent_key": token,
+            "agent_key_id": None,
             "agent_key_expires_at": "2026-03-23T10:30:00+00:00",
             "agent_key_expires_in": 1800,
             "agent_key_reused": False,
@@ -228,15 +228,15 @@ def test_auth_add_nous_oauth_honors_custom_label(tmp_path, monkeypatch):
             "portal_base_url": "https://portal.example.com",
             "inference_base_url": "https://inference.example.com/v1",
             "client_id": "hermes-cli",
-            "scope": "inference:invoke inference:mint_agent_key",
+            "scope": "inference:invoke",
             "token_type": "Bearer",
             "access_token": token,
             "refresh_token": "refresh-token",
             "obtained_at": "2026-03-23T10:00:00+00:00",
             "expires_at": "2026-03-23T11:00:00+00:00",
             "expires_in": 3600,
-            "agent_key": "ak-test",
-            "agent_key_id": "ak-id",
+            "agent_key": token,
+            "agent_key_id": None,
             "agent_key_expires_at": "2026-03-23T10:30:00+00:00",
             "agent_key_expires_in": 1800,
             "agent_key_reused": False,
diff --git a/tests/hermes_cli/test_auth_nous_provider.py b/tests/hermes_cli/test_auth_nous_provider.py
index 32d1c2aa8..453a0d19e 100644
--- a/tests/hermes_cli/test_auth_nous_provider.py
+++ b/tests/hermes_cli/test_auth_nous_provider.py
@@ -1,4 +1,4 @@
-"""Regression tests for Nous OAuth refresh + agent-key mint interactions."""
+"""Regression tests for Nous OAuth refresh and inference JWT interactions."""
 
 import base64
 import json
@@ -126,14 +126,15 @@ class TestResolveVerifyFallback:
 def _setup_nous_auth(
     hermes_home: Path,
     *,
-    access_token: str = "access-old",
+    access_token: str = "",
     refresh_token: str = "refresh-old",
-    scope: str = "inference:mint_agent_key",
+    scope: str = "inference:invoke",
     expires_at: str = "2026-02-01T00:00:00+00:00",
     expires_in: int = 0,
     agent_key: str | None = None,
     agent_key_expires_at: str | None = None,
 ) -> None:
+    access_token = access_token or _invoke_jwt(seconds=3600, scope=scope)
     hermes_home.mkdir(parents=True, exist_ok=True)
     auth_store = {
         "version": 1,
@@ -162,16 +163,6 @@ def _setup_nous_auth(
     (hermes_home / "auth.json").write_text(json.dumps(auth_store, indent=2))
 
 
-def _mint_payload(api_key: str = "agent-key") -> dict:
-    return {
-        "api_key": api_key,
-        "key_id": "key-id-1",
-        "expires_at": datetime.now(timezone.utc).isoformat(),
-        "expires_in": 1800,
-        "reused": False,
-    }
-
-
 def _jwt_with_claims(claims: dict) -> str:
     def _part(payload: dict) -> str:
         raw = json.dumps(payload, separators=(",", ":")).encode("utf-8")
@@ -184,7 +175,7 @@ def _future_iso(seconds: int = 3600) -> str:
     return datetime.fromtimestamp(time.time() + seconds, tz=timezone.utc).isoformat()
 
 
-def _invoke_jwt(*, seconds: int = 3600, scope: object = "inference:invoke inference:mint_agent_key") -> str:
+def _invoke_jwt(*, seconds: int = 3600, scope: object = "inference:invoke") -> str:
     return _jwt_with_claims({
         "sub": "test-user",
         "scope": scope,
@@ -209,11 +200,6 @@ def test_resolve_nous_runtime_credentials_prefers_invoke_jwt_and_mirrors(
     )
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
-    def _unexpected_mint(*args, **kwargs):
-        raise AssertionError("legacy agent-key mint should not run for invoke JWT")
-
-    monkeypatch.setattr(auth_mod, "_mint_agent_key", _unexpected_mint)
-
     creds = auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
 
     assert creds["api_key"] == token
@@ -278,15 +264,11 @@ def test_resolve_nous_runtime_credentials_invoke_jwt_is_idempotent(
     before_mtime = auth_path.stat().st_mtime_ns
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
-    def _unexpected_mint(*args, **kwargs):
-        raise AssertionError("stable invoke JWT should not mint a legacy key")
-
     def _unexpected_shared_write(*args, **kwargs):
         raise AssertionError("unchanged invoke JWT resolution should not sync shared store")
 
     sync_calls = []
 
-    monkeypatch.setattr(auth_mod, "_mint_agent_key", _unexpected_mint)
     monkeypatch.setattr(auth_mod, "_write_shared_nous_state", _unexpected_shared_write)
     monkeypatch.setattr(
         auth_mod,
@@ -330,11 +312,7 @@ def test_resolve_nous_runtime_credentials_trusts_invoke_jwt_exp_over_stale_metad
     def _unexpected_refresh(*args, **kwargs):
         raise AssertionError("valid invoke JWT should not be refreshed because metadata is stale")
 
-    def _unexpected_mint(*args, **kwargs):
-        raise AssertionError("valid invoke JWT should not fall back to legacy mint")
-
     monkeypatch.setattr(auth_mod, "_refresh_access_token", _unexpected_refresh)
-    monkeypatch.setattr(auth_mod, "_mint_agent_key", _unexpected_mint)
 
     creds = auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
 
@@ -347,7 +325,7 @@ def test_resolve_nous_runtime_credentials_trusts_invoke_jwt_exp_over_stale_metad
     assert datetime.fromisoformat(singleton["agent_key_expires_at"]).timestamp() > time.time() + 300
 
 
-def test_resolve_nous_runtime_credentials_does_not_apply_legacy_ttl_to_invoke_jwt(
+def test_resolve_nous_runtime_credentials_does_not_apply_agent_key_ttl_to_invoke_jwt(
     tmp_path,
     monkeypatch,
 ):
@@ -364,11 +342,6 @@ def test_resolve_nous_runtime_credentials_does_not_apply_legacy_ttl_to_invoke_jw
     )
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
-    def _unexpected_mint(*args, **kwargs):
-        raise AssertionError("1800s legacy min TTL should not force opaque mint for invoke JWT")
-
-    monkeypatch.setattr(auth_mod, "_mint_agent_key", _unexpected_mint)
-
     creds = auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=1800)
 
     assert creds["api_key"] == token
@@ -378,7 +351,56 @@ def test_resolve_nous_runtime_credentials_does_not_apply_legacy_ttl_to_invoke_jw
     assert payload["credential_pool"]["nous"][0]["agent_key"] == token
 
 
-def test_legacy_auth_mode_bypasses_usable_invoke_jwt(tmp_path, monkeypatch):
+def test_resolve_nous_runtime_credentials_refreshes_legacy_agent_key_to_invoke_jwt(
+    tmp_path,
+    monkeypatch,
+):
+    import hermes_cli.auth as auth_mod
+
+    hermes_home = tmp_path / "hermes"
+    refreshed_token = _invoke_jwt(seconds=3600)
+    _setup_nous_auth(
+        hermes_home,
+        access_token="legacy-access-token",
+        refresh_token="refresh-old",
+        scope=auth_mod.DEFAULT_NOUS_SCOPE,
+        expires_at=_future_iso(3600),
+        expires_in=3600,
+        agent_key="legacy-opaque-session-key",
+        agent_key_expires_at=_future_iso(3600),
+    )
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    refresh_calls = []
+
+    def _fake_refresh_access_token(*, client, portal_base_url, client_id, refresh_token):
+        del client, portal_base_url, client_id
+        refresh_calls.append(refresh_token)
+        return {
+            "access_token": refreshed_token,
+            "refresh_token": "refresh-new",
+            "expires_in": 3600,
+            "token_type": "Bearer",
+            "scope": auth_mod.DEFAULT_NOUS_SCOPE,
+        }
+
+    monkeypatch.setattr(auth_mod, "_refresh_access_token", _fake_refresh_access_token)
+
+    creds = auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
+
+    assert refresh_calls == ["refresh-old"]
+    assert creds["api_key"] == refreshed_token
+    assert creds["source"] == auth_mod.NOUS_AUTH_PATH_INVOKE_JWT
+    payload = json.loads((hermes_home / "auth.json").read_text())
+    singleton = payload["providers"]["nous"]
+    assert singleton["access_token"] == refreshed_token
+    assert singleton["refresh_token"] == "refresh-new"
+    assert singleton["agent_key"] == refreshed_token
+    assert singleton["agent_key_id"] is None
+    assert payload["credential_pool"]["nous"][0]["agent_key"] == refreshed_token
+
+
+def test_legacy_auth_mode_is_rejected(tmp_path, monkeypatch):
     import hermes_cli.auth as auth_mod
 
     hermes_home = tmp_path / "hermes"
@@ -392,28 +414,14 @@ def test_legacy_auth_mode_bypasses_usable_invoke_jwt(tmp_path, monkeypatch):
     )
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
-    mint_calls = []
-
-    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
-        del client, portal_base_url, min_ttl_seconds
-        mint_calls.append(access_token)
-        return _mint_payload(api_key="legacy-after-jwt-401")
-
-    monkeypatch.setattr(auth_mod, "_mint_agent_key", _fake_mint_agent_key)
-
-    creds = auth_mod.resolve_nous_runtime_credentials(
-        min_key_ttl_seconds=300,
-        inference_auth_mode=auth_mod.NOUS_INFERENCE_AUTH_MODE_LEGACY,
-    )
-
-    assert mint_calls == [token]
-    assert creds["api_key"] == "legacy-after-jwt-401"
-    assert creds["auth_path"] == auth_mod.NOUS_AUTH_PATH_LEGACY_SESSION_KEY_MINT
-    payload = json.loads((hermes_home / "auth.json").read_text())
-    assert payload["providers"]["nous"]["agent_key"] == "legacy-after-jwt-401"
+    with pytest.raises(ValueError, match="Invalid Nous inference auth mode"):
+        auth_mod.resolve_nous_runtime_credentials(
+            min_key_ttl_seconds=300,
+            inference_auth_mode="legacy",
+        )
 
 
-def test_resolve_nous_runtime_credentials_falls_back_when_invoke_scope_missing(
+def test_resolve_nous_runtime_credentials_reauths_when_invoke_scope_missing(
     tmp_path,
     monkeypatch,
 ):
@@ -428,32 +436,24 @@ def test_resolve_nous_runtime_credentials_falls_back_when_invoke_scope_missing(
     _setup_nous_auth(
         hermes_home,
         access_token=token,
-        scope=auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE,
+        refresh_token="",
+        scope="inference:mint_agent_key",
         expires_at=_future_iso(3600),
         expires_in=3600,
     )
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
-    calls = []
+    with pytest.raises(AuthError) as exc:
+        auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
 
-    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
-        del client, portal_base_url, min_ttl_seconds
-        calls.append(access_token)
-        return _mint_payload(api_key="opaque-agent-key")
-
-    monkeypatch.setattr(auth_mod, "_mint_agent_key", _fake_mint_agent_key)
-
-    creds = auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
-
-    assert calls == [token]
-    assert creds["api_key"] == "opaque-agent-key"
-    assert creds["source"] == "portal"
+    assert exc.value.code == "missing_inference_invoke_scope"
+    assert exc.value.relogin_required is True
     payload = json.loads((hermes_home / "auth.json").read_text())
-    assert payload["providers"]["nous"]["agent_key"] == "opaque-agent-key"
-    assert payload["credential_pool"]["nous"][0]["agent_key"] == "opaque-agent-key"
+    assert payload["providers"]["nous"]["agent_key"] is None
+    assert "credential_pool" not in payload or not payload["credential_pool"].get("nous")
 
 
-def test_nous_device_code_login_retries_legacy_scope_when_invoke_refused(monkeypatch):
+def test_nous_device_code_login_does_not_retry_legacy_scope_when_invoke_refused(monkeypatch):
     import hermes_cli.auth as auth_mod
 
     scopes = []
@@ -461,59 +461,31 @@ def test_nous_device_code_login_retries_legacy_scope_when_invoke_refused(monkeyp
     def _fake_request_device_code(*, client, portal_base_url, client_id, scope):
         del client, portal_base_url, client_id
         scopes.append(scope)
-        if len(scopes) == 1:
-            request = httpx.Request("POST", "https://portal.example.com/api/oauth/device/code")
-            response = httpx.Response(
-                400,
-                json={
-                    "error": "invalid_scope",
-                    "error_description": "unsupported inference:invoke",
-                },
-                request=request,
-            )
-            raise httpx.HTTPStatusError("invalid_scope", request=request, response=response)
-        return {
-            "device_code": "device",
-            "user_code": "user",
-            "verification_uri": "https://portal.example.com/device",
-            "verification_uri_complete": "https://portal.example.com/device?code=user",
-            "expires_in": 600,
-            "interval": 1,
-        }
-
-    def _fake_poll_for_token(**kwargs):
-        del kwargs
-        return {
-            "access_token": "access-legacy",
-            "refresh_token": "refresh-legacy",
-            "expires_in": 900,
-            "scope": auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE,
-        }
-
-    def _fake_refresh(state, **kwargs):
-        del kwargs
-        refreshed = dict(state)
-        refreshed["agent_key"] = "opaque-agent-key"
-        refreshed["agent_key_expires_at"] = _future_iso(1800)
-        return refreshed
+        request = httpx.Request("POST", "https://portal.example.com/api/oauth/device/code")
+        response = httpx.Response(
+            400,
+            json={
+                "error": "invalid_scope",
+                "error_description": "unsupported inference:invoke",
+            },
+            request=request,
+        )
+        raise httpx.HTTPStatusError("invalid_scope", request=request, response=response)
 
     monkeypatch.setattr(auth_mod, "_request_device_code", _fake_request_device_code)
-    monkeypatch.setattr(auth_mod, "_poll_for_token", _fake_poll_for_token)
-    monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _fake_refresh)
 
-    result = auth_mod._nous_device_code_login(
-        portal_base_url="https://portal.example.com",
-        inference_base_url="https://inference.example.com/v1",
-        open_browser=False,
-        timeout_seconds=1,
-    )
+    with pytest.raises(httpx.HTTPStatusError):
+        auth_mod._nous_device_code_login(
+            portal_base_url="https://portal.example.com",
+            inference_base_url="https://inference.example.com/v1",
+            open_browser=False,
+            timeout_seconds=1,
+        )
 
-    assert scopes == [auth_mod.DEFAULT_NOUS_SCOPE, auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE]
-    assert result["scope"] == auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE
-    assert result["agent_key"] == "opaque-agent-key"
+    assert scopes == [auth_mod.DEFAULT_NOUS_SCOPE]
 
 
-def test_forced_legacy_env_skips_invoke_scope_and_jwt_storage(tmp_path, monkeypatch):
+def test_legacy_session_env_is_ignored_for_invoke_scope_and_jwt_storage(tmp_path, monkeypatch):
     import hermes_cli.auth as auth_mod
 
     hermes_home = tmp_path / "hermes"
@@ -526,25 +498,16 @@ def test_forced_legacy_env_skips_invoke_scope_and_jwt_storage(tmp_path, monkeypa
         expires_in=3600,
     )
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-    monkeypatch.setenv(auth_mod.NOUS_LEGACY_SESSION_KEYS_ENV, "true")
-
-    mint_calls = []
-
-    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
-        del client, portal_base_url, min_ttl_seconds
-        mint_calls.append(access_token)
-        return _mint_payload(api_key="forced-legacy-key")
-
-    monkeypatch.setattr(auth_mod, "_mint_agent_key", _fake_mint_agent_key)
+    monkeypatch.setenv("HERMES_AGENT_USE_LEGACY_SESSION_KEYS", "true")
 
     creds = auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
 
-    assert mint_calls == [token]
-    assert creds["api_key"] == "forced-legacy-key"
+    assert creds["api_key"] == token
     payload = json.loads((hermes_home / "auth.json").read_text())
-    assert payload["providers"]["nous"]["agent_key"] == "forced-legacy-key"
+    assert payload["providers"]["nous"]["agent_key"] == token
 
     requested_scopes = []
+    login_token = _invoke_jwt(seconds=3600)
 
     def _fake_request_device_code(*, client, portal_base_url, client_id, scope):
         del client, portal_base_url, client_id
@@ -561,31 +524,24 @@ def test_forced_legacy_env_skips_invoke_scope_and_jwt_storage(tmp_path, monkeypa
     def _fake_poll_for_token(**kwargs):
         del kwargs
         return {
-            "access_token": "access-legacy",
-            "refresh_token": "refresh-legacy",
+            "access_token": login_token,
+            "refresh_token": "refresh-token",
             "expires_in": 900,
-            "scope": auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE,
+            "scope": auth_mod.DEFAULT_NOUS_SCOPE,
         }
 
-    def _fake_refresh(state, **kwargs):
-        del kwargs
-        refreshed = dict(state)
-        refreshed["agent_key"] = "forced-legacy-login-key"
-        refreshed["agent_key_expires_at"] = _future_iso(1800)
-        return refreshed
-
     monkeypatch.setattr(auth_mod, "_request_device_code", _fake_request_device_code)
     monkeypatch.setattr(auth_mod, "_poll_for_token", _fake_poll_for_token)
-    monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _fake_refresh)
 
-    auth_mod._nous_device_code_login(
+    result = auth_mod._nous_device_code_login(
         portal_base_url="https://portal.example.com",
         inference_base_url="https://inference.example.com/v1",
         open_browser=False,
         timeout_seconds=1,
     )
 
-    assert requested_scopes == [auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE]
+    assert requested_scopes == [auth_mod.DEFAULT_NOUS_SCOPE]
+    assert result["agent_key"] == login_token
 
 
 def test_nous_inference_auth_logs_do_not_include_secret_values(
@@ -596,37 +552,42 @@ def test_nous_inference_auth_logs_do_not_include_secret_values(
     import hermes_cli.auth as auth_mod
 
     hermes_home = tmp_path / "hermes"
-    token = _jwt_with_claims({
-        "sub": "secret-user",
-        "scope": "inference:mint_agent_key",
-        "exp": int(time.time() + 3600),
-    })
+    token = _invoke_jwt(seconds=3600)
+    refreshed_token = _invoke_jwt(seconds=7200)
     refresh_token = "refresh-secret-token"
-    opaque_key = "opaque-secret-agent-key"
     _setup_nous_auth(
         hermes_home,
         access_token=token,
         refresh_token=refresh_token,
-        scope=auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE,
+        scope=auth_mod.DEFAULT_NOUS_SCOPE,
         expires_at=_future_iso(3600),
         expires_in=3600,
     )
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
-    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
-        del client, portal_base_url, access_token, min_ttl_seconds
-        return _mint_payload(api_key=opaque_key)
+    def _fake_refresh_access_token(*, client, portal_base_url, client_id, refresh_token):
+        del client, portal_base_url, client_id, refresh_token
+        return {
+            "access_token": refreshed_token,
+            "refresh_token": "refresh-new",
+            "expires_in": 7200,
+            "token_type": "Bearer",
+            "scope": auth_mod.DEFAULT_NOUS_SCOPE,
+        }
 
-    monkeypatch.setattr(auth_mod, "_mint_agent_key", _fake_mint_agent_key)
+    monkeypatch.setattr(auth_mod, "_refresh_access_token", _fake_refresh_access_token)
 
     caplog.set_level(logging.INFO, logger="hermes_cli.auth")
-    auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
+    auth_mod.resolve_nous_runtime_credentials(
+        min_key_ttl_seconds=300,
+        force_refresh=True,
+    )
 
     logged = caplog.text
-    assert "legacy session key path" in logged
+    assert "using NAS invoke JWT" in logged
     assert token not in logged
+    assert refreshed_token not in logged
     assert refresh_token not in logged
-    assert opaque_key not in logged
 
 
 def test_get_nous_auth_status_checks_credential_pool(tmp_path, monkeypatch):
@@ -805,68 +766,75 @@ def test_get_nous_auth_status_empty_returns_not_logged_in(tmp_path, monkeypatch)
     assert status["logged_in"] is False
 
 
-def test_refresh_token_persisted_when_mint_returns_insufficient_credits(tmp_path, monkeypatch):
+def test_refresh_token_persisted_when_refreshed_jwt_lacks_invoke_scope(tmp_path, monkeypatch):
     hermes_home = tmp_path / "hermes"
-    _setup_nous_auth(hermes_home, refresh_token="refresh-old")
+    _setup_nous_auth(
+        hermes_home,
+        access_token="access-old",
+        refresh_token="refresh-old",
+    )
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
     refresh_calls = []
-    mint_calls = {"count": 0}
+    bad_jwt = _jwt_with_claims({
+        "sub": "test-user",
+        "scope": "profile",
+        "exp": int(time.time() + 3600),
+    })
+    good_jwt = _invoke_jwt(seconds=3600)
 
     def _fake_refresh_access_token(*, client, portal_base_url, client_id, refresh_token):
         refresh_calls.append(refresh_token)
-        idx = len(refresh_calls)
+        if len(refresh_calls) == 1:
+            token = bad_jwt
+        else:
+            token = good_jwt
         return {
-            "access_token": f"access-{idx}",
-            "refresh_token": f"refresh-{idx}",
-            "expires_in": 0,
+            "access_token": token,
+            "refresh_token": f"refresh-{len(refresh_calls)}",
+            "expires_in": 3600,
             "token_type": "Bearer",
+            "scope": "profile" if len(refresh_calls) == 1 else "inference:invoke",
         }
 
-    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
-        mint_calls["count"] += 1
-        if mint_calls["count"] == 1:
-            raise AuthError("credits exhausted", provider="nous", code="insufficient_credits")
-        return _mint_payload(api_key="agent-key-2")
-
     monkeypatch.setattr("hermes_cli.auth._refresh_access_token", _fake_refresh_access_token)
-    monkeypatch.setattr("hermes_cli.auth._mint_agent_key", _fake_mint_agent_key)
 
     with pytest.raises(AuthError) as exc:
         resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
-    assert exc.value.code == "insufficient_credits"
+    assert exc.value.code == "missing_inference_invoke_scope"
 
     state_after_failure = get_provider_auth_state("nous")
     assert state_after_failure is not None
     assert state_after_failure["refresh_token"] == "refresh-1"
-    assert state_after_failure["access_token"] == "access-1"
+    assert state_after_failure["access_token"] == bad_jwt
 
     creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
-    assert creds["api_key"] == "agent-key-2"
+    assert creds["api_key"] == good_jwt
     assert refresh_calls == ["refresh-old", "refresh-1"]
 
 
-def test_refresh_token_persisted_when_mint_times_out(tmp_path, monkeypatch):
+def test_refresh_token_persisted_when_refreshed_token_is_not_jwt(tmp_path, monkeypatch):
     hermes_home = tmp_path / "hermes"
-    _setup_nous_auth(hermes_home, refresh_token="refresh-old")
+    _setup_nous_auth(
+        hermes_home,
+        access_token="access-old",
+        refresh_token="refresh-old",
+    )
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
     def _fake_refresh_access_token(*, client, portal_base_url, client_id, refresh_token):
         return {
             "access_token": "access-1",
             "refresh_token": "refresh-1",
-            "expires_in": 0,
+            "expires_in": 3600,
             "token_type": "Bearer",
         }
 
-    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
-        raise httpx.ReadTimeout("mint timeout")
-
     monkeypatch.setattr("hermes_cli.auth._refresh_access_token", _fake_refresh_access_token)
-    monkeypatch.setattr("hermes_cli.auth._mint_agent_key", _fake_mint_agent_key)
 
-    with pytest.raises(httpx.ReadTimeout):
+    with pytest.raises(AuthError) as exc:
         resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
+    assert exc.value.code == "access_token_not_jwt"
 
     state_after_failure = get_provider_auth_state("nous")
     assert state_after_failure is not None
@@ -881,7 +849,11 @@ def test_terminal_refresh_failure_quarantines_tokens(
     from hermes_cli import auth as auth_mod
 
     hermes_home = tmp_path / "hermes"
-    _setup_nous_auth(hermes_home, refresh_token="refresh-old")
+    _setup_nous_auth(
+        hermes_home,
+        access_token="access-old",
+        refresh_token="refresh-old",
+    )
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
     from agent.credential_pool import load_pool
 
@@ -967,35 +939,36 @@ def test_managed_access_token_refresh_failure_quarantines_tokens(
     assert refresh_calls == ["refresh-old"]
 
 
-def test_mint_retry_uses_latest_rotated_refresh_token(tmp_path, monkeypatch):
+def test_unusable_access_token_refresh_uses_latest_rotated_refresh_token(tmp_path, monkeypatch):
     hermes_home = tmp_path / "hermes"
-    _setup_nous_auth(hermes_home, refresh_token="refresh-old")
+    _setup_nous_auth(
+        hermes_home,
+        access_token="access-old",
+        refresh_token="refresh-old",
+    )
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
     refresh_calls = []
-    mint_calls = {"count": 0}
+    good_jwt = _invoke_jwt(seconds=3600)
 
     def _fake_refresh_access_token(*, client, portal_base_url, client_id, refresh_token):
         refresh_calls.append(refresh_token)
-        idx = len(refresh_calls)
+        token = "access-still-not-jwt" if len(refresh_calls) == 1 else good_jwt
         return {
-            "access_token": f"access-{idx}",
-            "refresh_token": f"refresh-{idx}",
-            "expires_in": 0,
+            "access_token": token,
+            "refresh_token": f"refresh-{len(refresh_calls)}",
+            "expires_in": 3600,
             "token_type": "Bearer",
+            "scope": "inference:invoke",
         }
 
-    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
-        mint_calls["count"] += 1
-        if mint_calls["count"] == 1:
-            raise AuthError("stale access token", provider="nous", code="invalid_token")
-        return _mint_payload(api_key="agent-key")
-
     monkeypatch.setattr("hermes_cli.auth._refresh_access_token", _fake_refresh_access_token)
-    monkeypatch.setattr("hermes_cli.auth._mint_agent_key", _fake_mint_agent_key)
 
+    with pytest.raises(AuthError) as exc:
+        resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
+    assert exc.value.code == "access_token_not_jwt"
     creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
-    assert creds["api_key"] == "agent-key"
+    assert creds["api_key"] == good_jwt
     assert refresh_calls == ["refresh-old", "refresh-1"]
 
 
@@ -1170,21 +1143,23 @@ class TestLoginNousSkipKeepsCurrent:
 def _full_state_fixture() -> dict:
     """Shape of the dict returned by _nous_device_code_login /
     refresh_nous_oauth_from_state. Used as helper input."""
+    token = _invoke_jwt(seconds=3600)
+    expires_at = _future_iso(3600)
     return {
         "portal_base_url": "https://portal.example.com",
         "inference_base_url": "https://inference.example.com/v1",
         "client_id": "hermes-cli",
-        "scope": "inference:mint_agent_key",
+        "scope": "inference:invoke",
         "token_type": "Bearer",
-        "access_token": "access-tok",
+        "access_token": token,
         "refresh_token": "refresh-tok",
         "obtained_at": "2026-04-17T22:00:00+00:00",
-        "expires_at": "2026-04-17T22:15:00+00:00",
-        "expires_in": 900,
-        "agent_key": "agent-key-value",
-        "agent_key_id": "ak-id",
-        "agent_key_expires_at": "2026-04-18T22:00:00+00:00",
-        "agent_key_expires_in": 86400,
+        "expires_at": expires_at,
+        "expires_in": 3600,
+        "agent_key": token,
+        "agent_key_id": None,
+        "agent_key_expires_at": expires_at,
+        "agent_key_expires_in": 3600,
         "agent_key_reused": False,
         "agent_key_obtained_at": "2026-04-17T22:00:10+00:00",
         "tls": {"insecure": False, "ca_bundle": None},
@@ -1210,7 +1185,8 @@ def test_persist_nous_credentials_writes_both_pool_and_providers(tmp_path, monke
     }))
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
-    entry = persist_nous_credentials(_full_state_fixture())
+    state = _full_state_fixture()
+    entry = persist_nous_credentials(state)
 
     assert entry is not None
     assert entry.provider == "nous"
@@ -1220,17 +1196,17 @@ def test_persist_nous_credentials_writes_both_pool_and_providers(tmp_path, monke
 
     # providers.nous populated with the full state (new behaviour)
     singleton = payload["providers"]["nous"]
-    assert singleton["access_token"] == "access-tok"
+    assert singleton["access_token"] == state["access_token"]
     assert singleton["refresh_token"] == "refresh-tok"
-    assert singleton["agent_key"] == "agent-key-value"
-    assert singleton["agent_key_expires_at"] == "2026-04-18T22:00:00+00:00"
+    assert singleton["agent_key"] == state["agent_key"]
+    assert singleton["agent_key_expires_at"] == state["agent_key_expires_at"]
 
     # credential_pool.nous has exactly one canonical device_code entry
     pool_entries = payload["credential_pool"]["nous"]
     assert len(pool_entries) == 1, pool_entries
     pool_entry = pool_entries[0]
     assert pool_entry["source"] == NOUS_DEVICE_CODE_SOURCE
-    assert pool_entry["agent_key"] == "agent-key-value"
+    assert pool_entry["agent_key"] == state["agent_key"]
     assert pool_entry["inference_base_url"] == "https://inference.example.com/v1"
 
 
@@ -1243,7 +1219,6 @@ def test_persist_nous_credentials_allows_recovery_from_401(tmp_path, monkeypatch
     providers.nous was empty.
     """
     from hermes_cli.auth import (
-        NOUS_INFERENCE_AUTH_MODE_FRESH,
         persist_nous_credentials,
         resolve_nous_runtime_credentials,
     )
@@ -1256,29 +1231,27 @@ def test_persist_nous_credentials_allows_recovery_from_401(tmp_path, monkeypatch
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
     persist_nous_credentials(_full_state_fixture())
+    new_jwt = _invoke_jwt(seconds=3600)
 
     # Stub the network-touching steps so we don't actually contact the
     # portal — the point of this test is that state lookup succeeds and
     # doesn't raise "Hermes is not logged into Nous Portal".
     def _fake_refresh_access_token(*, client, portal_base_url, client_id, refresh_token):
         return {
-            "access_token": "access-new",
+            "access_token": new_jwt,
             "refresh_token": "refresh-new",
-            "expires_in": 900,
+            "expires_in": 3600,
             "token_type": "Bearer",
+            "scope": "inference:invoke",
         }
 
-    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
-        return _mint_payload(api_key="new-agent-key")
-
     monkeypatch.setattr("hermes_cli.auth._refresh_access_token", _fake_refresh_access_token)
-    monkeypatch.setattr("hermes_cli.auth._mint_agent_key", _fake_mint_agent_key)
 
     creds = resolve_nous_runtime_credentials(
         min_key_ttl_seconds=300,
-        inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_FRESH,
+        force_refresh=True,
     )
-    assert creds["api_key"] == "new-agent-key"
+    assert creds["api_key"] == new_jwt
 
 
 def test_persist_nous_credentials_idempotent_no_duplicate_pool_entries(tmp_path, monkeypatch):
@@ -1304,21 +1277,23 @@ def test_persist_nous_credentials_idempotent_no_duplicate_pool_entries(tmp_path,
     persist_nous_credentials(first)
 
     second = _full_state_fixture()
-    second["access_token"] = "access-second"
-    second["agent_key"] = "agent-key-second"
+    second_token = _invoke_jwt(seconds=7200)
+    second["access_token"] = second_token
+    second["agent_key"] = second_token
+    second["agent_key_expires_at"] = _future_iso(7200)
     persist_nous_credentials(second)
 
     payload = json.loads((hermes_home / "auth.json").read_text())
 
     # providers.nous reflects the latest write (singleton semantics)
-    assert payload["providers"]["nous"]["access_token"] == "access-second"
-    assert payload["providers"]["nous"]["agent_key"] == "agent-key-second"
+    assert payload["providers"]["nous"]["access_token"] == second_token
+    assert payload["providers"]["nous"]["agent_key"] == second_token
 
     # credential_pool.nous has exactly one entry, carrying the latest agent_key
     pool_entries = payload["credential_pool"]["nous"]
     assert len(pool_entries) == 1, pool_entries
     assert pool_entries[0]["source"] == NOUS_DEVICE_CODE_SOURCE
-    assert pool_entries[0]["agent_key"] == "agent-key-second"
+    assert pool_entries[0]["agent_key"] == second_token
     # And no stray `manual:device_code` / `manual:dashboard_device_code` rows
     assert not any(
         e["source"].startswith("manual:") for e in pool_entries
@@ -1339,13 +1314,14 @@ def test_persist_nous_credentials_reloads_pool_after_singleton_write(tmp_path, m
     }))
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
-    entry = persist_nous_credentials(_full_state_fixture())
+    state = _full_state_fixture()
+    entry = persist_nous_credentials(state)
     assert entry is not None
     assert entry.source == NOUS_DEVICE_CODE_SOURCE
     # Label derived by _seed_from_singletons via label_from_token; we don't
     # assert its exact value, just that the helper returned a real entry.
-    assert entry.access_token == "access-tok"
-    assert entry.agent_key == "agent-key-value"
+    assert entry.access_token == state["access_token"]
+    assert entry.agent_key == state["agent_key"]
 
 
 def test_persist_nous_credentials_embeds_custom_label(tmp_path, monkeypatch):
@@ -1658,7 +1634,8 @@ def test_shared_store_write_and_read_roundtrip(shared_store_env):
         _write_shared_nous_state,
     )
 
-    _write_shared_nous_state(_full_state_fixture())
+    state = _full_state_fixture()
+    _write_shared_nous_state(state)
 
     path = _nous_shared_store_path()
     assert path.is_file()
@@ -1670,7 +1647,7 @@ def test_shared_store_write_and_read_roundtrip(shared_store_env):
     loaded = _read_shared_nous_state()
     assert loaded is not None
     assert loaded["refresh_token"] == "refresh-tok"
-    assert loaded["access_token"] == "access-tok"
+    assert loaded["access_token"] == state["access_token"]
     assert loaded["portal_base_url"] == "https://portal.example.com"
     assert loaded["inference_base_url"] == "https://inference.example.com/v1"
     # Volatile agent_key MUST NOT be persisted to the shared store
@@ -1760,12 +1737,12 @@ def test_try_import_shared_returns_none_on_refresh_failure(
     assert auth_mod._read_shared_nous_state() is None
 
 
-def test_try_import_shared_persists_rotated_token_when_mint_fails(
+def test_try_import_shared_persists_rotated_token_when_jwt_validation_fails(
     shared_store_env, monkeypatch,
 ):
-    """A forced shared import refresh rotates the single-use token before minting.
+    """A forced shared import refresh rotates the single-use token before validation.
 
-    If the later agent-key mint fails, the shared store must still keep the
+    If the later inference-JWT validation fails, the shared store must still keep the
     rotated refresh token; otherwise the next import attempt replays the
     consumed token and trips refresh-token reuse.
     """
@@ -1785,12 +1762,7 @@ def test_try_import_shared_persists_rotated_token_when_mint_fails(
             "token_type": "Bearer",
         }
 
-    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
-        assert access_token == "access-new"
-        raise AuthError("credits exhausted", provider="nous", code="insufficient_credits")
-
     monkeypatch.setattr(auth_mod, "_refresh_access_token", _fake_refresh_access_token)
-    monkeypatch.setattr(auth_mod, "_mint_agent_key", _fake_mint_agent_key)
 
     assert auth_mod._try_import_shared_nous_state() is None
 
@@ -1801,16 +1773,17 @@ def test_try_import_shared_persists_rotated_token_when_mint_fails(
 
 
 def test_try_import_shared_rehydrates_on_success(shared_store_env, monkeypatch):
-    """Happy path: stored refresh_token is accepted, forced refresh+mint
-    returns a fresh access_token + agent_key, and the returned dict has
+    """Happy path: stored refresh_token is accepted, forced refresh
+    returns a fresh access_token JWT, and the returned dict has
     every field persist_nous_credentials() needs.
     """
     from hermes_cli import auth as auth_mod
 
     auth_mod._write_shared_nous_state(_full_state_fixture())
+    fresh_jwt = _invoke_jwt(seconds=7200)
 
     def _fake_refresh(state, **kwargs):
-        # Simulate portal returning fresh tokens + a new agent_key
+        # Simulate portal returning a fresh inference JWT.
         assert kwargs.get("force_refresh") is True
         assert (
             kwargs.get("inference_auth_mode")
@@ -1818,10 +1791,10 @@ def test_try_import_shared_rehydrates_on_success(shared_store_env, monkeypatch):
         )
         return {
             **state,
-            "access_token": "fresh-access-tok",
+            "access_token": fresh_jwt,
             "refresh_token": "fresh-refresh-tok",  # rotated
-            "agent_key": "new-agent-key",
-            "agent_key_expires_at": "2026-04-19T22:00:00+00:00",
+            "agent_key": fresh_jwt,
+            "agent_key_expires_at": _future_iso(7200),
         }
 
     monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _fake_refresh)
@@ -1829,9 +1802,9 @@ def test_try_import_shared_rehydrates_on_success(shared_store_env, monkeypatch):
     result = auth_mod._try_import_shared_nous_state()
 
     assert result is not None
-    assert result["access_token"] == "fresh-access-tok"
+    assert result["access_token"] == fresh_jwt
     assert result["refresh_token"] == "fresh-refresh-tok"
-    assert result["agent_key"] == "new-agent-key"
+    assert result["agent_key"] == fresh_jwt
     # Preserved from shared state
     assert result["portal_base_url"] == "https://portal.example.com"
     assert result["client_id"] == "hermes-cli"
@@ -1878,13 +1851,15 @@ def test_shared_store_survives_across_profile_switch(
     assert shared["refresh_token"] == "refresh-tok"
 
     # And a successful rehydrate + persist lands nous into profile B
+    b_jwt = _invoke_jwt(seconds=7200)
+
     def _fake_refresh(state, **kwargs):
         return {
             **state,
-            "access_token": "b-access-tok",
+            "access_token": b_jwt,
             "refresh_token": "b-refresh-tok",
-            "agent_key": "b-agent-key",
-            "agent_key_expires_at": "2026-04-19T22:00:00+00:00",
+            "agent_key": b_jwt,
+            "agent_key_expires_at": _future_iso(7200),
         }
 
     monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _fake_refresh)
@@ -1924,35 +1899,29 @@ def test_runtime_refresh_uses_newer_shared_token_before_local_stale_token(
     monkeypatch.setenv("HERMES_HOME", str(profile_b))
 
     shared_state = _full_state_fixture()
-    shared_state["access_token"] = "shared-fresh-access"
+    shared_token = _invoke_jwt(seconds=3600)
+    shared_state["access_token"] = shared_token
     shared_state["refresh_token"] = "shared-fresh-refresh"
     shared_state["expires_at"] = "2099-01-01T00:00:00+00:00"
+    shared_state["scope"] = "inference:invoke"
     auth_mod._write_shared_nous_state(shared_state)
 
     def _refresh_should_not_happen(**_kwargs):
         raise AssertionError("stale profile-local refresh token was used")
 
-    minted_with: list[str] = []
-
-    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
-        minted_with.append(access_token)
-        return _mint_payload(api_key="agent-key-from-shared-token")
-
     monkeypatch.setattr(auth_mod, "_refresh_access_token", _refresh_should_not_happen)
-    monkeypatch.setattr(auth_mod, "_mint_agent_key", _fake_mint_agent_key)
 
     creds = auth_mod.resolve_nous_runtime_credentials(
         min_key_ttl_seconds=300,
         inference_auth_mode=auth_mod.NOUS_INFERENCE_AUTH_MODE_FRESH,
     )
 
-    assert creds["api_key"] == "agent-key-from-shared-token"
-    assert minted_with == ["shared-fresh-access"]
+    assert creds["api_key"] == shared_token
 
     profile_state = auth_mod.get_provider_auth_state("nous")
     assert profile_state is not None
     assert profile_state["refresh_token"] == "shared-fresh-refresh"
-    assert profile_state["access_token"] == "shared-fresh-access"
+    assert profile_state["access_token"] == shared_token
 
 
 def test_managed_gateway_access_token_uses_newer_shared_token(
diff --git a/tests/hermes_cli/test_proxy.py b/tests/hermes_cli/test_proxy.py
index edc1425b5..a9eca5b6d 100644
--- a/tests/hermes_cli/test_proxy.py
+++ b/tests/hermes_cli/test_proxy.py
@@ -144,7 +144,7 @@ def test_nous_adapter_get_credential_uses_runtime_resolver(tmp_path, monkeypatch
     assert cred.token_type == "Bearer"
 
 
-def test_nous_adapter_retry_credential_forces_legacy_mint(tmp_path, monkeypatch):
+def test_nous_adapter_retry_credential_does_not_fallback_on_jwt_401(tmp_path, monkeypatch):
     monkeypatch.setenv("HERMES_HOME", str(tmp_path))
     _write_auth_store(tmp_path, {
         "access_token": "jwt-access",
@@ -155,15 +155,8 @@ def test_nous_adapter_retry_credential_forces_legacy_mint(tmp_path, monkeypatch)
         "agent_key": "jwt-access",
     })
 
-    refreshed_state = {
-        "api_key": "legacy-bearer",
-        "base_url": "https://inference-api.nousresearch.com/v1",
-        "expires_at": "2099-01-01T00:00:00Z",
-    }
-
     with patch(
         "hermes_cli.proxy.adapters.nous_portal.resolve_nous_runtime_credentials",
-        return_value=refreshed_state,
     ) as mock_resolve:
         adapter = NousPortalAdapter()
         cred = adapter.get_retry_credential(
@@ -174,9 +167,8 @@ def test_nous_adapter_retry_credential_forces_legacy_mint(tmp_path, monkeypatch)
             status_code=401,
         )
 
-    assert cred is not None
-    assert cred.bearer == "legacy-bearer"
-    assert mock_resolve.call_args.kwargs["inference_auth_mode"] == "legacy"
+    assert cred is None
+    mock_resolve.assert_not_called()
 
 
 def test_nous_adapter_retry_credential_skips_opaque_bearer(tmp_path, monkeypatch):
diff --git a/tests/hermes_cli/test_web_oauth_dispatch.py b/tests/hermes_cli/test_web_oauth_dispatch.py
index b9ee20cca..0c6b902f7 100644
--- a/tests/hermes_cli/test_web_oauth_dispatch.py
+++ b/tests/hermes_cli/test_web_oauth_dispatch.py
@@ -25,6 +25,7 @@ from datetime import datetime, timezone
 from unittest.mock import patch
 
 import httpx
+import pytest
 from fastapi.testclient import TestClient
 
 from hermes_cli.web_server import _SESSION_TOKEN, app
@@ -99,7 +100,7 @@ def test_minimax_login_does_not_launch_anthropic_flow():
     assert body["expires_in"] == 600
 
 
-def test_nous_dashboard_device_flow_honors_legacy_scope_override(monkeypatch):
+def test_nous_dashboard_device_flow_ignores_legacy_scope_override(monkeypatch):
     from hermes_cli import auth as auth_mod
     from hermes_cli import web_server as ws
 
@@ -109,24 +110,24 @@ def test_nous_dashboard_device_flow_honors_legacy_scope_override(monkeypatch):
         requested_scopes.append(kwargs["scope"])
         return _fake_nous_device_data()
 
-    monkeypatch.setenv(auth_mod.NOUS_LEGACY_SESSION_KEYS_ENV, "true")
+    monkeypatch.setenv("HERMES_AGENT_USE_LEGACY_SESSION_KEYS", "true")
     monkeypatch.setattr(auth_mod, "_request_device_code", fake_request_device_code)
     monkeypatch.setattr(ws, "_nous_poller", lambda sid: None)
 
     result = asyncio.run(ws._start_device_code_flow("nous"))
     try:
-        assert requested_scopes == [auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE]
+        assert requested_scopes == [auth_mod.DEFAULT_NOUS_SCOPE]
         assert result["flow"] == "device_code"
         assert result["user_code"] == "NOUS-1234"
         assert (
             ws._oauth_sessions[result["session_id"]]["scope"]
-            == auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE
+            == auth_mod.DEFAULT_NOUS_SCOPE
         )
     finally:
         ws._oauth_sessions.pop(result["session_id"], None)
 
 
-def test_nous_dashboard_device_flow_retries_legacy_scope_on_invoke_refusal(monkeypatch):
+def test_nous_dashboard_device_flow_does_not_retry_legacy_scope_on_invoke_refusal(monkeypatch):
     from hermes_cli import auth as auth_mod
     from hermes_cli import web_server as ws
 
@@ -134,26 +135,15 @@ def test_nous_dashboard_device_flow_retries_legacy_scope_on_invoke_refusal(monke
 
     def fake_request_device_code(**kwargs):
         requested_scopes.append(kwargs["scope"])
-        if len(requested_scopes) == 1:
-            raise _invoke_scope_refusal()
-        return _fake_nous_device_data()
+        raise _invoke_scope_refusal()
 
-    monkeypatch.delenv(auth_mod.NOUS_LEGACY_SESSION_KEYS_ENV, raising=False)
+    monkeypatch.delenv("HERMES_AGENT_USE_LEGACY_SESSION_KEYS", raising=False)
     monkeypatch.setattr(auth_mod, "_request_device_code", fake_request_device_code)
     monkeypatch.setattr(ws, "_nous_poller", lambda sid: None)
 
-    result = asyncio.run(ws._start_device_code_flow("nous"))
-    try:
-        assert requested_scopes == [
-            auth_mod.DEFAULT_NOUS_SCOPE,
-            auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE,
-        ]
-        assert (
-            ws._oauth_sessions[result["session_id"]]["scope"]
-            == auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE
-        )
-    finally:
-        ws._oauth_sessions.pop(result["session_id"], None)
+    with pytest.raises(httpx.HTTPStatusError):
+        asyncio.run(ws._start_device_code_flow("nous"))
+    assert requested_scopes == [auth_mod.DEFAULT_NOUS_SCOPE]
 
 
 def test_nous_dashboard_poller_preserves_effective_scope_when_token_omits_scope(monkeypatch):
@@ -173,13 +163,13 @@ def test_nous_dashboard_poller_preserves_effective_scope_when_token_omits_scope(
         "device_code": "device-code",
         "interval": 5,
         "expires_at": time.time() + 600,
-        "scope": auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE,
+        "scope": auth_mod.DEFAULT_NOUS_SCOPE,
     }
     captured_state = {}
 
     def fake_refresh_nous_oauth_from_state(state, **kwargs):
         captured_state.update(state)
-        return {**state, "agent_key": "legacy-agent-key"}
+        return {**state, "agent_key": "jwt-agent-key"}
 
     monkeypatch.setattr(
         auth_mod,
@@ -200,7 +190,7 @@ def test_nous_dashboard_poller_preserves_effective_scope_when_token_omits_scope(
 
     try:
         ws._nous_poller(session_id)
-        assert captured_state["scope"] == auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE
+        assert captured_state["scope"] == auth_mod.DEFAULT_NOUS_SCOPE
         assert ws._oauth_sessions[session_id]["status"] == "approved"
     finally:
         ws._oauth_sessions.pop(session_id, None)
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index 20aec422c..41e2fe19e 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -4063,7 +4063,8 @@ class TestNousCredentialRefresh:
 
         assert ok is True
         assert closed["value"] is True
-        assert captured["inference_auth_mode"] == "legacy"
+        assert captured["inference_auth_mode"] == "auto"
+        assert captured["force_refresh"] is True
         assert rebuilt["kwargs"]["api_key"] == "new-nous-key"
         assert (
             rebuilt["kwargs"]["base_url"] == "https://inference-api.nousresearch.com/v1"
@@ -4092,11 +4093,12 @@ class TestNousCredentialRefresh:
         with patch("run_agent.OpenAI", return_value=MagicMock()):
             ok = agent._try_refresh_nous_client_credentials(
                 force=False,
-                inference_auth_mode="legacy",
+                inference_auth_mode="fresh",
             )
 
         assert ok is True
-        assert captured["inference_auth_mode"] == "legacy"
+        assert captured["inference_auth_mode"] == "fresh"
+        assert captured["force_refresh"] is False
 
 
 class TestCredentialPoolRecovery: