From a8bdf5608bf7a6aebb3cffd447704907b44a0028 Mon Sep 17 00:00:00 2001 From: Michael Date: Tue, 26 May 2026 12:34:28 -0700 Subject: [PATCH] fix(csrf): self-heal missing csrf_token cookie on valid session + always send Bearer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #65 fixed csrf_failed for legacy-JWT users by adding a server-side Bearer-bypass in csrfGuard. Two follow-up edges still produced csrf_failed on POST /api/account/coolify-webhook-secret/rotate (and every other re-auth-gated mutation) for users in the field: 1. Cookie-auth users in the drift state: `__Host-remo_sid` cookie valid, `csrf_token` cookie missing (cleared by a privacy extension, expired separately, or never set on a session that pre-dated the double-submit pattern and survived across deploys via sliding-idle touch). csrfGuard had no header to compare against and 403'd with no client-side recovery short of logout+login. Fix: when csrf cookie is missing but the session cookie validates against the DAL, csrfGuard re-issues a fresh csrf_token cookie and allows the current request through. Safe because the session cookie's SameSite=Lax already blocks cross-site mutating requests, and the session token is verified before self-heal triggers. DAL errors fall through to the normal 403 so self-heal never masks real CSRF rejections or crashes on infra issues. 2. Legacy-JWT users with a stale csrf_token cookie still hanging around (e.g. they previously magic-link-logged-in, then logged out and re-logged in via bcrypt — the cookie didn't always get cleared). hubFetch's old logic only attached `Authorization: Bearer` when the csrf cookie was ABSENT, so the stale cookie caused us to send an X-CSRF-Token instead of the Bearer. PR #65's server-side Bearer bypass therefore never fired → double-submit enforcement → 403. Fix: hubFetch now always attaches `Authorization: Bearer ` whenever a real legacy token is in localStorage (sentinel 'cookie' excluded), independent of csrf cookie state. That guarantees PR #65's Bearer bypass actually triggers for legacy users. Tests: 2 new csrf middleware regression tests cover (a) bogus session token + no csrf cookie → still 403 (DAL miss falls through), and (b) session cookie present + DB unavailable → 403, never 500. All 29 csrf tests pass. The positive self-heal path (valid session token → re-issue + allow) is covered by hub/test/coolify-webhook-secret.test.ts when REMO_E2E_DB_URL is set. Docs: docs/auth.md "CSRF model" section updated to document both the client-side always-Bearer rule and the server-side self-heal branch. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/auth.md | 3 ++- hub/src/csrf.ts | 37 +++++++++++++++++++++++++++++++++++++ hub/test/csrf.test.ts | 28 ++++++++++++++++++++++++++++ web/src/lib/api.ts | 16 ++++++++++++---- 4 files changed, 79 insertions(+), 5 deletions(-) diff --git a/docs/auth.md b/docs/auth.md index fdc4677..09f21e0 100644 --- a/docs/auth.md +++ b/docs/auth.md @@ -67,7 +67,8 @@ All state-changing routes (`POST`/`PATCH`/`PUT`/`DELETE`) check **double-submit - On any cookie-session request, the hub sets a `csrf_token` cookie (non-`HttpOnly`, `SameSite=Lax`) with a random 32-byte value. - The web client reads `csrf_token` from `document.cookie` and forwards it as `X-CSRF-Token: ` on every mutation. - The hub compares the header to the cookie using `crypto.timingSafeEqual`. Mismatch = 403. -- Routes called only with the bearer-token legacy path skip the check (legacy clients never had it). +- Routes called only with the bearer-token legacy path skip the check (legacy clients never had it). The web client always attaches `Authorization: Bearer ` whenever a legacy JWT is present in localStorage, even if a stale `csrf_token` cookie is also present — this guarantees `csrfGuard`'s Bearer-bypass fires for legacy-auth users who logged in via magic-link previously and still have the cookie around. +- **Self-heal:** if a request carries a valid `__Host-remo_sid` cookie but no `csrf_token` cookie (drift state — csrf cookie cleared, expired, or never set), `csrfGuard` re-issues a fresh `csrf_token` in the response and allows the current request through. Safe because the session cookie's `SameSite=Lax` already blocks cross-site mutating requests, and the session token is verified against the DAL before self-heal triggers. Without this branch, the very next mutating call (e.g. `POST /api/account/coolify-webhook-secret/rotate`) would 403 with no in-app recovery short of logout+login. ### Re-auth (step-up) gate diff --git a/hub/src/csrf.ts b/hub/src/csrf.ts index 3caebbb..64d0997 100644 --- a/hub/src/csrf.ts +++ b/hub/src/csrf.ts @@ -16,6 +16,7 @@ import type { Context, Next } from 'hono'; import { getCookie, setCookie, deleteCookie } from 'hono/cookie'; import { createHmac, randomBytes, timingSafeEqual } from 'crypto'; import { config } from './config'; +import { readSessionCookie, verifyAuthSessionToken } from './session'; export const CSRF_COOKIE_NAME = 'csrf_token'; export const CSRF_HEADER_NAME = 'X-CSRF-Token'; @@ -135,6 +136,42 @@ export function csrfGuard() { const cookieValue = readCsrfCookie(c); const headerValue = c.req.header(CSRF_HEADER_NAME) || c.req.header(CSRF_HEADER_NAME.toLowerCase()); + + // Self-heal: cookie users can drift into a "valid `__Host-remo_sid` but + // no `csrf_token` cookie" state when the csrf cookie expires, is cleared + // by a privacy extension, or was never set (e.g. session created before + // the double-submit pattern was added, then survived across deploys via + // sliding-idle touch). Without this branch, the very next mutating call + // (e.g. POST /api/account/coolify-webhook-secret/rotate) fails 403 and + // there is NO client-side path to recover short of logout+login. + // + // This branch is safe to allow-through because: + // 1. The session cookie itself is `SameSite=Lax` — browsers do NOT + // attach it on cross-site POST/PUT/PATCH/DELETE, so the CSRF + // attacker can't even reach this code path with a valid session. + // 2. We verify the session token against the DAL row (idle/expiry/ + // user-exists checks all pass) before issuing — a forged/stale + // session cookie never gets self-healed. + // 3. We re-issue a fresh `csrf_token` cookie in the same response so + // subsequent requests use the normal double-submit path again. + if (!cookieValue) { + const sessionToken = readSessionCookie(c); + if (sessionToken) { + try { + const sessionCtx = await verifyAuthSessionToken(sessionToken); + if (sessionCtx) { + const fresh = issueCsrfToken(sessionToken); + setCsrfCookie(c, fresh); + return next(); + } + } catch { + // DB unavailable or transient — fall through to normal 403. + // Self-heal is a best-effort recovery; never let it mask a real + // CSRF rejection or crash on infra issues. + } + } + } + if (!verifyCsrfPair(cookieValue, headerValue ?? null)) { return c.json({ error: 'csrf_failed' }, 403); } diff --git a/hub/test/csrf.test.ts b/hub/test/csrf.test.ts index da0ec0d..9fae3ba 100644 --- a/hub/test/csrf.test.ts +++ b/hub/test/csrf.test.ts @@ -202,4 +202,32 @@ describe('csrfGuard middleware', () => { }); expect(res.status).toBe(200); }); + + // Self-heal regression: a request that has a `__Host-remo_sid` session + // cookie but NO `csrf_token` cookie (the drift state that produced + // `csrf_failed` on POST /api/account/coolify-webhook-secret/rotate) must + // not 403 outright. The DAL lookup for the bogus token fails closed → we + // fall through to the normal 403. The positive self-heal (valid session + // token → re-issue + allow) is covered by the e2e test in + // hub/test/coolify-webhook-secret.test.ts when REMO_E2E_DB_URL is set. + test('POST with session cookie but BOGUS session token + no csrf → 403 (DB miss falls through)', async () => { + const res = await buildApp().request('/api/foo', { + method: 'POST', + headers: { cookie: `__Host-remo_sid=remo_bogus_token_will_not_resolve_in_DAL` }, + }); + expect(res.status).toBe(403); + const body = await res.json(); + expect((body as any).error).toBe('csrf_failed'); + }); + + test('POST with session cookie + DB unavailable → does not crash, returns 403', async () => { + // The csrf middleware swallows DAL errors. With no DATABASE_URL test + // setup, the call throws inside verifyAuthSessionToken — must end as 403, + // never a 500. + const res = await buildApp().request('/api/foo', { + method: 'POST', + headers: { cookie: `__Host-remo_sid=remo_anything` }, + }); + expect(res.status).toBe(403); + }); }); diff --git a/web/src/lib/api.ts b/web/src/lib/api.ts index c3041fc..8255d3b 100644 --- a/web/src/lib/api.ts +++ b/web/src/lib/api.ts @@ -98,10 +98,18 @@ export async function hubFetch( finalHeaders['X-CSRF-Token'] = csrf } - // Soak-window fallback ONLY: if no csrf cookie present (i.e. user is still - // on a legacy bearer-token session) and a token was passed, attach it. Post - // cutover this branch is dead and `token` callers can pass null. - if (!csrf && token) { + // Soak-window: attach Authorization: Bearer whenever a legacy token is + // available AND it isn't the cookie-session sentinel. This must run + // INDEPENDENTLY of csrf-cookie presence — a legacy-JWT user can also have + // a stale `csrf_token` cookie hanging around (e.g. from a prior magic-link + // session that was later replaced by a bcrypt login). Without the Bearer + // header in that case, hub-side `csrfGuard` has no way to know this is a + // non-CSRF-eligible Bearer-authed request and falls back to double-submit + // enforcement, which fails because the stale `X-CSRF-Token` no longer + // matches the server cookie. PR #65 added the server-side Bearer bypass — + // this is the client-side half: always send the Bearer when we have a + // real legacy token so the bypass actually fires. + if (token && token !== 'cookie') { finalHeaders['Authorization'] = `Bearer ${token}` }