From 439f53cab8e0e1048b1da0b1fc31aa7256888bc1 Mon Sep 17 00:00:00 2001 From: Ben Date: Fri, 5 Jun 2026 10:22:26 +1000 Subject: [PATCH] fix(desktop): gate OAuth remote connect on AT-or-RT, not access token alone MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The desktop OAuth remote-gateway path gated connectivity on hasOauthSessionCookie(), which checks only the access-token cookie (hermes_session_at, ~15 min TTL). The moment that cookie's Max-Age lapsed, Electron's cookie jar dropped it and both resolveRemoteBackend() and sanitizeDesktopConnectionConfig() reported "not signed in" — forcing a full IDP re-login every ~15 min — even though a valid 24h refresh-token cookie (hermes_session_rt) was sitting in the same jar. The desktop OAuth code (2026-06-04) was written against the obsolete "contract v1 issues no refresh token" model, two days after #37247 re-introduced server-side transparent refresh: Portal now issues a 24h rotating, reuse-detected refresh token, and the gateway middleware (_attempt_refresh) rotates a fresh AT from the RT on the next authenticated request. So an expired-AT/live-RT session is fully connectable — the desktop just never let the request through. Fix: - connection-config.cjs: add RT_COOKIE_VARIANTS + cookiesHaveLiveSession() (true when EITHER a live AT or RT cookie is present). Keep cookiesHaveSession() AT-only for callers that need that specific signal. - main.cjs: add hasLiveOauthSession(); resolveRemoteBackend()'s oauth branch now early-outs only when NEITHER cookie is present, otherwise uses the ws-ticket mint as the authoritative liveness probe (that POST carries the RT cookie and triggers the server-side AT rotation). A real 401 still surfaces as needsOauthLogin. Settings indicator + oauth-logout report against the same AT-or-RT notion. - Remove the stale "contract v1 / NO refresh token" docstrings in cookies.py and the verify_session comments in the Nous provider that contradicted #37247. Tests: +57 lines in connection-config.test.cjs covering the RT-only "still connectable" case. node --test: 32/32. dashboard-auth + nous-provider Python suites: 223/223. Note: server-side files (hermes_cli/dashboard_auth/, plugins/dashboard_auth/) are comment/docstring-only here, but this touches outside apps/desktop/ so it needs Teknium review. --- apps/desktop/electron/connection-config.cjs | 48 +++++++++++- .../electron/connection-config.test.cjs | 57 +++++++++++++- apps/desktop/electron/main.cjs | 78 ++++++++++++++++--- hermes_cli/dashboard_auth/cookies.py | 51 +++++++----- plugins/dashboard_auth/nous/__init__.py | 12 +-- 5 files changed, 208 insertions(+), 38 deletions(-) diff --git a/apps/desktop/electron/connection-config.cjs b/apps/desktop/electron/connection-config.cjs index c10a657d1..44ffb3d7a 100644 --- a/apps/desktop/electron/connection-config.cjs +++ b/apps/desktop/electron/connection-config.cjs @@ -18,11 +18,24 @@ * this via the public `/api/status` field `auth_required: true`. */ -// Bare + prefixed variants of the access-token cookie the gateway may set, +// Bare + prefixed variants of the session cookies the gateway may set, // depending on its deploy shape (HTTPS direct → __Host-, behind a path prefix // → __Secure-, loopback HTTP → bare). Mirrors // hermes_cli/dashboard_auth/cookies.py. +// +// Two cookies are in play (see that module): +// - hermes_session_at: the OAuth access token. Short-lived (~15 min); its +// Max-Age tracks the access-token TTL, so the cookie jar drops it the +// instant the AT expires. +// - hermes_session_rt: the OAuth refresh token. Long-lived (24h rotating, +// reuse-detected — Portal NAS #293 / hermes #37247). When the AT cookie +// has lapsed but the RT cookie is still present, the gateway middleware +// transparently rotates a fresh AT on the next authenticated request +// (POST /api/auth/ws-ticket), so the session is still LIVE even with no +// AT cookie. A liveness check that looked only at the AT cookie would +// force a needless full re-login every ~15 min — hence cookiesHaveLiveSession. const AT_COOKIE_VARIANTS = ['__Host-hermes_session_at', '__Secure-hermes_session_at', 'hermes_session_at'] +const RT_COOKIE_VARIANTS = ['__Host-hermes_session_rt', '__Secure-hermes_session_rt', 'hermes_session_rt'] function normalizeRemoteBaseUrl(rawUrl) { const value = String(rawUrl || '').trim() @@ -150,21 +163,52 @@ function resolveAuthMode(inputAuthMode, existingAuthMode) { } /** - * True if any cookie in `cookies` is a hermes session access-token cookie + * True if any cookie in `cookies` is a hermes session ACCESS-token cookie * with a non-empty value. `cookies` is an array of {name, value} (the shape * Electron's session.cookies.get returns). + * + * Note: this is AT-only. A session whose AT cookie has lapsed but whose RT + * cookie is still alive is STILL connectable (the gateway refreshes the AT on + * the next request) — use `cookiesHaveLiveSession` for a connectivity/display + * check. `cookiesHaveSession` remains exported for callers that specifically + * need to know whether an unexpired access token is present right now. */ function cookiesHaveSession(cookies) { if (!Array.isArray(cookies)) return false return cookies.some(c => c && AT_COOKIE_VARIANTS.includes(c.name) && c.value) } +/** + * True if the cookie jar holds a credential that can yield an authenticated + * request — EITHER a live access-token cookie OR a refresh-token cookie. The + * RT cookie outlives the AT cookie (24h vs ~15min), and the gateway middleware + * transparently rotates a fresh AT from the RT on the next authenticated + * request. Gating connectivity on the AT alone would force a full IDP + * re-login every ~15 min even though a valid 24h RT is sitting in the jar. + * + * This answers "should we even attempt to connect / show as signed in?", not + * "is the access token unexpired?". The authoritative liveness check is still + * the actual ws-ticket mint at connect time (which surfaces a true 401 when + * the RT is also dead/revoked). + */ +function cookiesHaveLiveSession(cookies) { + if (!Array.isArray(cookies)) return false + return cookies.some( + c => + c && + c.value && + (AT_COOKIE_VARIANTS.includes(c.name) || RT_COOKIE_VARIANTS.includes(c.name)) + ) +} + module.exports = { AT_COOKIE_VARIANTS, + RT_COOKIE_VARIANTS, authModeFromStatus, buildGatewayWsUrl, buildGatewayWsUrlWithTicket, cookiesHaveSession, + cookiesHaveLiveSession, normalizeRemoteBaseUrl, resolveAuthMode, resolveTestWsUrl, diff --git a/apps/desktop/electron/connection-config.test.cjs b/apps/desktop/electron/connection-config.test.cjs index 33f81f3b6..6819e6407 100644 --- a/apps/desktop/electron/connection-config.test.cjs +++ b/apps/desktop/electron/connection-config.test.cjs @@ -15,10 +15,12 @@ const assert = require('node:assert/strict') const { AT_COOKIE_VARIANTS, + RT_COOKIE_VARIANTS, authModeFromStatus, buildGatewayWsUrl, buildGatewayWsUrlWithTicket, cookiesHaveSession, + cookiesHaveLiveSession, normalizeRemoteBaseUrl, resolveAuthMode, resolveTestWsUrl, @@ -131,7 +133,10 @@ test('cookiesHaveSession is false for an empty value', () => { assert.equal(cookiesHaveSession([{ name: 'hermes_session_at', value: '' }]), false) }) -test('cookiesHaveSession ignores unrelated cookies', () => { +test('cookiesHaveSession ignores unrelated cookies (AT-only by design)', () => { + // cookiesHaveSession is deliberately access-token-only — a lone RT cookie + // is NOT an access token, so this returns false. Connectivity callers must + // use cookiesHaveLiveSession instead (see below). assert.equal(cookiesHaveSession([{ name: 'hermes_session_rt', value: 'x' }]), false) assert.equal(cookiesHaveSession([{ name: 'other', value: 'x' }]), false) }) @@ -146,6 +151,56 @@ test('AT_COOKIE_VARIANTS covers all three deploy shapes', () => { assert.deepEqual(AT_COOKIE_VARIANTS, ['__Host-hermes_session_at', '__Secure-hermes_session_at', 'hermes_session_at']) }) +test('RT_COOKIE_VARIANTS covers all three deploy shapes', () => { + assert.deepEqual(RT_COOKIE_VARIANTS, ['__Host-hermes_session_rt', '__Secure-hermes_session_rt', 'hermes_session_rt']) +}) + +// --- cookiesHaveLiveSession (AT or RT — the connectivity check) --- + +test('cookiesHaveLiveSession is true for a live access-token cookie', () => { + assert.equal(cookiesHaveLiveSession([{ name: 'hermes_session_at', value: 'x' }]), true) + assert.equal(cookiesHaveLiveSession([{ name: '__Host-hermes_session_at', value: 'x' }]), true) + assert.equal(cookiesHaveLiveSession([{ name: '__Secure-hermes_session_at', value: 'x' }]), true) +}) + +test('cookiesHaveLiveSession is true for an RT cookie even with NO access-token cookie', () => { + // This is the bug-fix case: the AT cookie has lapsed (dropped from the jar) + // but the 24h RT cookie is still alive. The session is still connectable — + // the gateway rotates a fresh AT from the RT on the next request. + assert.equal(cookiesHaveLiveSession([{ name: 'hermes_session_rt', value: 'x' }]), true) + assert.equal(cookiesHaveLiveSession([{ name: '__Host-hermes_session_rt', value: 'x' }]), true) + assert.equal(cookiesHaveLiveSession([{ name: '__Secure-hermes_session_rt', value: 'x' }]), true) +}) + +test('cookiesHaveLiveSession is true when both AT and RT are present', () => { + assert.equal( + cookiesHaveLiveSession([ + { name: 'hermes_session_at', value: 'a' }, + { name: 'hermes_session_rt', value: 'r' } + ]), + true + ) +}) + +test('cookiesHaveLiveSession is false for empty values', () => { + assert.equal(cookiesHaveLiveSession([{ name: 'hermes_session_at', value: '' }]), false) + assert.equal(cookiesHaveLiveSession([{ name: 'hermes_session_rt', value: '' }]), false) + assert.equal( + cookiesHaveLiveSession([ + { name: 'hermes_session_at', value: '' }, + { name: 'hermes_session_rt', value: '' } + ]), + false + ) +}) + +test('cookiesHaveLiveSession is false for unrelated cookies and non-arrays', () => { + assert.equal(cookiesHaveLiveSession([{ name: 'other', value: 'x' }]), false) + assert.equal(cookiesHaveLiveSession(null), false) + assert.equal(cookiesHaveLiveSession(undefined), false) + assert.equal(cookiesHaveLiveSession([]), false) +}) + // --- tokenPreview --- test('tokenPreview returns null for empty', () => { diff --git a/apps/desktop/electron/main.cjs b/apps/desktop/electron/main.cjs index 0a877719e..a78260b81 100644 --- a/apps/desktop/electron/main.cjs +++ b/apps/desktop/electron/main.cjs @@ -33,6 +33,7 @@ const { buildGatewayWsUrl, buildGatewayWsUrlWithTicket, cookiesHaveSession, + cookiesHaveLiveSession, normalizeRemoteBaseUrl, resolveAuthMode, resolveTestWsUrl, @@ -3167,8 +3168,16 @@ function installMediaPermissions() { // * WebSocket upgrades require a single-use ``?ticket=`` minted at // ``POST /api/auth/ws-ticket`` (cookie-authed). The legacy ``?token=`` // path is unconditionally rejected by gated gateways. -// * Nous Portal contract v1 issues NO refresh token; the access cookie has -// a ~15-min TTL. On 401 we must re-run the login round trip. +// * Nous Portal now issues a 24h ROTATING, reuse-detected refresh token +// alongside the ~15-min access token (Portal NAS #293 / hermes #37247). +// Both are set as HttpOnly cookies (``hermes_session_at`` ~15 min, +// ``hermes_session_rt`` 24h). When the AT cookie lapses but the RT cookie +// is still alive, the gateway middleware transparently rotates a fresh AT +// on the next authenticated request — so connectivity must NOT be gated on +// the AT cookie alone. We probe liveness by actually minting a ws-ticket +// (which triggers that server-side refresh) and treat a real 401 as +// "needs re-login"; the AT-or-RT cookie presence check is only a cheap +// "is the user signed in at all?" gate / display signal. // --------------------------------------------------------------------------- const OAUTH_SESSION_PARTITION = 'persist:hermes-remote-oauth' @@ -3179,8 +3188,9 @@ function getOauthSession() { return oauthSession } -// Bare + prefixed variants of the access-token cookie live in -// connection-config.cjs (cookiesHaveSession). See that module for details. +// Bare + prefixed variants of the session cookies live in +// connection-config.cjs (cookiesHaveSession / cookiesHaveLiveSession). See +// that module for details. async function hasOauthSessionCookie(baseUrl) { const sess = getOauthSession() @@ -3201,6 +3211,30 @@ async function hasOauthSessionCookie(baseUrl) { } } +// Like hasOauthSessionCookie, but returns true when EITHER a live access-token +// cookie OR a (longer-lived) refresh-token cookie is present. This is the right +// "is the user signed in at all?" check: an expired AT with a live RT is still +// a connectable session because the gateway rotates a fresh AT server-side on +// the next authenticated request. Gating on the AT alone forces a needless full +// re-login every ~15 min. Used for the Settings "connected" indicator and as a +// cheap early-out before attempting a network round-trip in resolveRemoteBackend. +async function hasLiveOauthSession(baseUrl) { + const sess = getOauthSession() + if (!sess) return false + const parsed = new URL(baseUrl) + try { + const cookies = await sess.cookies.get({ url: baseUrl }) + return cookiesHaveLiveSession(cookies) + } catch { + try { + const cookies = await sess.cookies.get({ domain: parsed.hostname }) + return cookiesHaveLiveSession(cookies) + } catch { + return false + } + } +} + async function clearOauthSession(baseUrl) { const sess = getOauthSession() if (!sess) return @@ -3536,7 +3570,11 @@ async function sanitizeDesktopConnectionConfig(config = readDesktopConnectionCon let remoteOauthConnected = false if (authMode === 'oauth' && remoteUrl) { try { - remoteOauthConnected = await hasOauthSessionCookie(remoteUrl) + // Display signal: treat a live RT cookie as "connected" even if the AT + // cookie has lapsed — the gateway refreshes the AT on the next request, + // so the session is still usable. The authoritative liveness check is + // the ws-ticket mint in resolveRemoteBackend at actual connect time. + remoteOauthConnected = await hasLiveOauthSession(remoteUrl) } catch { remoteOauthConnected = false } @@ -3621,11 +3659,22 @@ async function resolveRemoteBackend() { const authMode = config.remote?.authMode === 'oauth' ? 'oauth' : 'token' if (authMode === 'oauth') { - // OAuth gateway: auth comes from the session cookie in the OAuth partition. - // Verify the cookie is present, then mint a single-use WS ticket (the - // gateway rejects ?token= in gated mode). A missing cookie / 401 means the - // user needs to (re-)log in via Settings → Gateway. - if (!(await hasOauthSessionCookie(baseUrl))) { + // OAuth gateway: auth comes from the session cookies in the OAuth + // partition. Liveness is NOT "is the access-token cookie present?" — + // Portal issues a 24h rotating refresh token (hermes #37247), and the + // gateway middleware transparently rotates a fresh ~15-min access token + // from it on the next authenticated request. So a session with an expired + // AT cookie but a live RT cookie is still perfectly connectable. + // + // We therefore: + // 1. cheap early-out ONLY when the jar holds NEITHER an AT nor an RT + // cookie — a genuinely signed-out user — to avoid a pointless network + // round-trip and give a clear "sign in" message. + // 2. otherwise probe liveness by actually minting a ws-ticket. That POST + // carries the cookie jar (incl. the RT cookie); the gateway refreshes + // the AT server-side and returns a ticket. A real 401 here means the + // RT is also dead/revoked → genuine re-login needed. + if (!(await hasLiveOauthSession(baseUrl))) { const err = new Error( 'Remote Hermes gateway uses OAuth, but you are not signed in. ' + 'Open Settings → Gateway and click "Sign in", or switch back to Local.' @@ -3636,6 +3685,10 @@ async function resolveRemoteBackend() { let ticket try { + // This mint is the authoritative liveness check. If only the RT cookie + // is alive, the gateway rotates a fresh AT cookie back onto the partition + // via Set-Cookie (Electron's persistent session absorbs it), so the very + // next request is already re-authed — no user-visible re-login. ticket = await mintGatewayWsTicket(baseUrl) } catch (error) { const err = new Error( @@ -4388,7 +4441,10 @@ ipcMain.handle('hermes:connection-config:oauth-login', async (_event, rawUrl) => ipcMain.handle('hermes:connection-config:oauth-logout', async (_event, rawUrl) => { const baseUrl = rawUrl ? normalizeRemoteBaseUrl(rawUrl) : '' await clearOauthSession(baseUrl || undefined) - return { ok: true, connected: baseUrl ? await hasOauthSessionCookie(baseUrl) : false } + // Report against the SAME liveness notion the Settings indicator uses + // (AT-or-RT) so a logout that left any session cookie behind is reflected + // as still-connected rather than silently signed-out. + return { ok: true, connected: baseUrl ? await hasLiveOauthSession(baseUrl) : false } }) ipcMain.handle('hermes:connection-config:save', async (_event, payload) => { const config = coerceDesktopConnectionConfig(payload) diff --git a/hermes_cli/dashboard_auth/cookies.py b/hermes_cli/dashboard_auth/cookies.py index f8fc77f24..90c7cf34d 100644 --- a/hermes_cli/dashboard_auth/cookies.py +++ b/hermes_cli/dashboard_auth/cookies.py @@ -2,13 +2,18 @@ Three cookies in play: - hermes_session_at: the OAuth access token - (HttpOnly, lifetime = token TTL) + (HttpOnly, lifetime = token TTL, ~15 min) - hermes_session_rt: the OAuth refresh token - (HttpOnly, lifetime = 30 days) - **DEPRECATED in OAuth contract v1** — Nous Portal - does not issue refresh tokens; we keep the cookie - name and clear semantics for forward compatibility - and to flush stale cookies from old browsers. + (HttpOnly, lifetime = 24h, ROTATING + reuse-detected) + Nous Portal issues a rotating refresh token for the + dashboard auth-code grant (Portal NAS #293 / hermes + #37247). ``set_session_cookies`` writes this cookie + whenever the provider returns a non-empty + ``refresh_token``; the middleware uses it to rotate a + fresh access token transparently on AT expiry. A + provider that omits the refresh token (empty string) + degrades gracefully to access-token-only sessions — + the RT cookie is simply not written. - hermes_session_pkce: short-lived PKCE state + CSRF nonce + provider hint (HttpOnly, lifetime = 10 minutes) @@ -39,13 +44,15 @@ The setters and readers BOTH consult the active prefix because the cookie *name* changes — a reader that looked up the bare name when the setter wrote ``__Secure-hermes_session_at`` would never find the value. -.. deprecated:: contract v1 - ``set_session_cookies`` accepts ``refresh_token=""`` (the contract-v1 - default) and silently skips writing the RT cookie in that case. - ``clear_session_cookies`` still emits a Max-Age=0 deletion for the RT - cookie so users carrying a stale cookie from an earlier deployment get - it cleared on logout / session expiry. The full refresh-flow machinery - was rewritten as "401 → redirect to /login" in Phase 6. +Refresh-token handling: + ``set_session_cookies`` accepts ``refresh_token=""`` (provider omitted + it) and silently skips writing the RT cookie in that case, so a + refresh-token-less provider degrades to access-token-only sessions. + ``clear_session_cookies`` always emits a Max-Age=0 deletion for the RT + cookie on logout / session expiry so a stale cookie from an earlier + deployment gets cleared. The transparent rotation flow ("expired AT + + live RT → rotate server-side, else 401 → /login") lives in + ``middleware._attempt_refresh``. """ from __future__ import annotations @@ -66,7 +73,13 @@ PKCE_COOKIE = "hermes_session_pkce" # practice — a single request emits exactly one variant). _NAME_VARIANTS = ("__Host-", "__Secure-", "") -# 30 days — matches Portal's REFRESH_TOKEN_TTL_SECONDS +# RT cookie Max-Age. Kept at 30 days as a generous upper bound on the cookie's +# browser lifetime; Portal's actual refresh-token TTL (24h, rotating) is the +# real authority — once the RT itself expires/rotates out, a refresh attempt +# returns 400 → RefreshExpiredError → clean re-login, regardless of how long +# the cookie lingers. (Not tightened to 24h here to avoid coupling the cookie +# lifetime to a server-side TTL that can change independently; revisit if the +# stale-cookie refresh churn ever matters.) _RT_MAX_AGE = 30 * 24 * 60 * 60 _PKCE_MAX_AGE = 10 * 60 @@ -126,11 +139,11 @@ def set_session_cookies( ``access_token_expires_in`` is in seconds. Use the provider's reported TTL for the access token. - ``refresh_token`` is accepted for backward / forward compatibility but - SKIPPED when empty — Nous Portal contract v1 issues no refresh tokens - so a ``Session.refresh_token == ""`` from the provider means we don't - persist anything. If a future contract revision starts emitting refresh - tokens, this helper will write the RT cookie again with no other change. + ``refresh_token`` is written as the RT cookie when non-empty. Nous Portal + issues a 24h rotating refresh token (hermes #37247); a provider that + omits it returns ``Session.refresh_token == ""`` and we simply don't + persist the RT cookie — the session then behaves as access-token-only + until the AT expires. No other branch changes between the two cases. ``prefix`` is the normalised X-Forwarded-Prefix value (e.g. ``/hermes``) or ``""`` for a direct deploy. It influences both the cookie name diff --git a/plugins/dashboard_auth/nous/__init__.py b/plugins/dashboard_auth/nous/__init__.py index 4133b18f5..480617916 100644 --- a/plugins/dashboard_auth/nous/__init__.py +++ b/plugins/dashboard_auth/nous/__init__.py @@ -348,9 +348,10 @@ class NousDashboardAuthProvider(DashboardAuthProvider): def verify_session(self, *, access_token: str) -> Optional[Session]: - # Contract: returns None on expiry/invalidity (middleware then - # triggers redirect-to-login since refresh_session can never succeed - # under V1); raises ProviderError if the IDP is unreachable. + # Contract: returns None on expiry/invalidity (the middleware then + # tries refresh_session with the RT cookie, falling back to + # redirect-to-login if that also fails); raises ProviderError if the + # IDP is unreachable. try: claims = self._verify_jwt(access_token) except InvalidCodeError: @@ -359,8 +360,9 @@ class NousDashboardAuthProvider(DashboardAuthProvider): except ProviderError: # JWKS unreachable, etc. Bubble up so middleware emits 503. raise - # verify_session has no access to the original refresh_token; pass - # "" because in contract V1 there is none anyway. + # verify_session validates the AT in isolation and has no access to the + # refresh token (it lives in a separate cookie the middleware reads); + # pass "" here — the RT-driven rotation path is middleware's job. return self._session_from_claims(access_token, "", claims) def revoke_session(self, *, refresh_token: str) -> None: