From 15e7cb138a59da073d162cb4dd020d35aea236ce Mon Sep 17 00:00:00 2001 From: Michael Date: Tue, 26 May 2026 11:36:53 -0700 Subject: [PATCH] fix(hub): soften license gate to permissive when JWKS unavailable Root cause: GET /api/profile returned 402 in prod because the license gate ran for every /api/* route, hit a stale cache, called validateLicenseKey which then called verifyLicenseJwt -> jose's createRemoteJWKSet got a 404 from Keygen's JWKS endpoint, jose threw a non-mapped error, mapJoseError fell through to TitaniumVerifyError('malformed', 'Unknown verify error: ...'), and refreshLicense flipped the cached ACTIVE status to INVALID -> 402. Two surgical fixes: 1. LICENSE_REQUIRED env flag (default true). When false, requireActiveLicense short-circuits to next() and logs a one-shot warning. Escape hatch for the JWKS-is-down period without redeploying schema. 2. refreshLicense now treats TitaniumVerifyError of kind 'network'/'malformed' (or any message containing /jwks/i) as TRANSIENT: preserves cached status, does NOT persist a flip to INVALID. Matches the existing TitaniumApiError semantics ("decoupled-for-read: a healthy ACTIVE user must not be locked out by a transient verify failure"). Tests: 15 existing license-gate cases still pass + 2 new cases covering the permissive bypass and the JWKS-transient preservation. The 5 failing tests in insert-run-started-at.test.ts are pre-existing on main (unrelated). Co-Authored-By: Claude Opus 4.7 (1M context) --- hub/src/config.ts | 5 ++++ hub/src/license-gate.ts | 36 +++++++++++++++++++++++- hub/test/license-gate.test.ts | 52 ++++++++++++++++++++++++++++++++++- 3 files changed, 91 insertions(+), 2 deletions(-) diff --git a/hub/src/config.ts b/hub/src/config.ts index d776fb6..aeea7c3 100644 --- a/hub/src/config.ts +++ b/hub/src/config.ts @@ -58,6 +58,10 @@ const titaniumAdminToken = const magicLinkSecret = requireMinLenIfSet("MAGIC_LINK_SECRET", process.env.MAGIC_LINK_SECRET, 32); const sessionSecret = requireMinLenIfSet("SESSION_SECRET", process.env.SESSION_SECRET, 32); const allowLegacyLogin = parseBool(process.env.ALLOW_LEGACY_LOGIN, true); +// Phase 07-D escape hatch: when LICENSE_REQUIRED=false, requireActiveLicense +// short-circuits to permissive mode (logs a warning once at boot). Used while +// Keygen JWKS endpoint is unhealthy so identity/REST routes don't 402. +const licenseRequired = parseBool(process.env.LICENSE_REQUIRED, true); // Optional Titanium -> hub webhook for license-state changes. Inert (route // returns 503) until Titanium ships the webhook and the secret is provisioned. const titaniumWebhookSecret = requireMinLenIfSet( @@ -94,4 +98,5 @@ export const config = { sessionSecret, allowLegacyLogin, titaniumWebhookSecret, + licenseRequired, }; diff --git a/hub/src/license-gate.ts b/hub/src/license-gate.ts index 47188b0..39631f4 100644 --- a/hub/src/license-gate.ts +++ b/hub/src/license-gate.ts @@ -46,6 +46,10 @@ import { const GRACE_DAYS_DEFAULT = 7; const LICENSE_CHECK_FAILED_THROTTLE_MS = 60_000; +// One-shot boot warning when LICENSE_REQUIRED=false. Logged on first request, +// not on every request, to keep prod logs scannable. +let permissiveWarned = false; + // Per-user throttle of `license_check_failed` audit writes — at most one // every 60s to keep the log scannable during a burst of mutating requests. const lastLogged = new Map(); @@ -154,7 +158,25 @@ async function refreshLicense( }; } if (err instanceof TitaniumVerifyError) { - // Map verify errors to a sensible state and persist. + // Transient verify failures (JWKS endpoint down / network blip surfaced + // as a verify error rather than TitaniumApiError) MUST NOT flip ACTIVE + // → INVALID. Treat 'network', 'malformed' (covers jose's + // JWKSInvalid / fetch errors), and unknown-kind verify errors as + // transient: keep cached value, log, continue with grace logic. + const transient = + err.kind === "network" || + err.kind === "malformed" || + /jwks/i.test(err.message); + if (transient) { + console.warn( + `[license-gate] transient verify error (${err.kind}: ${err.message}) — preserving cached status=${fields.license_status ?? "NONE"}`, + ); + return { + status: fields.license_status ?? "NONE", + license_id: fields.license_id, + }; + } + // Map definitive verify errors to a sensible state and persist. const kindStatus = err.kind === "expired" ? "EXPIRED" : err.kind === "blocked" ? "BANNED" @@ -205,6 +227,18 @@ export function requireActiveLicense( const readOnlyOk = opts.readOnlyOk ?? false; return async (c: Context, next: Next) => { + // Escape hatch: when LICENSE_REQUIRED=false, bypass the gate entirely. + // Used while Keygen JWKS is unhealthy so REST identity routes don't 402. + if (!config.licenseRequired) { + if (!permissiveWarned) { + permissiveWarned = true; + console.warn( + "[license-gate] LICENSE_REQUIRED=false → permissive mode (all authed requests pass)", + ); + } + return next(); + } + const userId = c.get("userId") as string | undefined; if (!userId) { // Auth missing — defer to the upstream 401 shape. diff --git a/hub/test/license-gate.test.ts b/hub/test/license-gate.test.ts index b435143..4bb3405 100644 --- a/hub/test/license-gate.test.ts +++ b/hub/test/license-gate.test.ts @@ -13,7 +13,7 @@ import { __resetDalForTesting, } from "../src/license-gate"; import { config } from "../src/config"; -import { BlockedSubjectError } from "../src/titanium-client"; +import { BlockedSubjectError, TitaniumVerifyError } from "../src/titanium-client"; type Fields = { license_status: string | null; @@ -299,3 +299,53 @@ describe("requireActiveLicense — audit log on denial", () => { expect(recordCalls.length).toBe(0); }); }); + +describe("requireActiveLicense — escape hatches", () => { + test("LICENSE_REQUIRED=false → bypass gate entirely", async () => { + const orig = (config as any).licenseRequired; + (config as any).licenseRequired = false; + // No DAL stubs needed — gate must not consult them. + __setDalForTesting({ + getUserLicenseFields: async () => { + throw new Error("should not be called in permissive mode"); + }, + }); + const app = makeApp(); + const res = await app.request("/", { method: "POST" }); + (config as any).licenseRequired = orig; + expect(res.status).toBe(200); + }); + + test("JWKS-unreachable verify error → preserve cached ACTIVE (no flip to INVALID)", async () => { + const origTtl = config.titanium.licenseCacheTtlSeconds; + // Force cache stale so refreshLicense runs. + (config.titanium as any).licenseCacheTtlSeconds = 1; + __setDalForTesting({ + getUserLicenseFields: async () => ({ + license_status: "ACTIVE", + license_id: "lic-1", + license_checked_at: daysAgo(1), // stale + titanium_subject: SUBJECT, + }), + updateLicenseStatus: async (uid, status, lid) => { + updateCalls.push([uid, status, lid]); + }, + recordAuthEvent: async (opts: any) => { + recordCalls.push(opts); + }, + validateLicenseKey: async () => { + throw new TitaniumVerifyError( + "malformed", + "Unknown verify error: JWKS fetch failed: 404 Not Found", + ); + }, + assertNotBlocked: async () => {}, + }); + const app = makeApp({ readOnlyOk: true }); + const res = await app.request("/", { method: "GET" }); + (config.titanium as any).licenseCacheTtlSeconds = origTtl; + expect(res.status).toBe(200); + // Critical: must NOT have persisted INVALID to the DB. + expect(updateCalls).toEqual([]); + }); +});