From 48b11c9ba5b17e9763c4403ce854799485ca8935 Mon Sep 17 00:00:00 2001 From: Scott Lovegrove Date: Tue, 19 May 2026 13:02:36 +0100 Subject: [PATCH 1/4] =?UTF-8?q?feat(auth):=20two-phase=20migrate=20write?= =?UTF-8?q?=20=E2=80=94=20atomic=20existence=20check=20+=20fallback-first?= =?UTF-8?q?=20record?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR2 of the refresh-token series. Hardens `migrateLegacyAuth` against two classes of issue exposed by PR1's bundle contract: 1. Retry-after-failure clobbers a freshly-logged-in v2 record. Pre-existing behaviour used `upsert` (replace-not-merge), which would destroy any newer state if a previous run's `markMigrated()` had failed and the user has since logged in afresh between attempts. 2. A partial migration leaves the user without working credentials. The keyring-first write meant a crash between `setSecret` and `upsert` left a keyring orphan and no record — the CLI couldn't read the token until the next migration attempt completed. Both are solved by restructuring the migration write into two phases: - **Phase 1 (record-first)**: write the v2 record with `fallbackToken: legacyToken` and `hasRefreshToken: false`. Atomic via the new `UserRecordStore.tryInsert?` hook (added in PR1) when the store implements it; falls back to list-then-upsert otherwise (narrow TOCTOU window, tolerable for a one-time migration). Returns `false` when a v2 record for the account already exists — Phase 2 is then skipped to preserve the live state. - **Phase 2 (keyring move)**: only when Phase 1 wrote the record. Delegates to `writeRecordWithKeyringFallback`, which moves the token to the per-user keyring slot and upserts a clean record without `fallbackToken`. Best-effort: a failure leaves the Phase 1 fallback in place and the CLI continues to work via the plaintext fallback; a later `set()` or `setBundle()` upgrades the credential naturally. Behaviour preserved for all existing callers: - The contract surface (`migrateLegacyAuth` signature, `MigrateAuthResult`, `MigrateSkipReason`) is unchanged. - Single-token records still end up with `hasRefreshToken: false` (PR1's `writeRecordWithKeyringFallback` already writes this). - Migrated records get `setDefaultId(account.id)` best-effort unless another default is already pinned (existing behaviour). - `markMigrated` is still the one-way gate; cleanup still runs concurrently. Test surface (382 → 386): - `tryInsert` path exercised when the store opts in. - Pre-existing v2 record skips Phase 2 (both atomic and legacy paths). - Phase 2 keyring failure is benign — Phase 1 fallback survives, marker + cleanup still run. - Existing legacy-keyring / WSL / cleanup tests updated for the new two-upsert happy-path call count. - `buildUserRecords` test harness now accepts `withTryInsert: true` so tests can pick the atomic vs legacy code path. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/auth/keyring/migrate.test.ts | 130 +++++++++++++++++++++++++++++- src/auth/keyring/migrate.ts | 99 ++++++++++++++++++----- src/test-support/keyring-mocks.ts | 22 ++++- 3 files changed, 229 insertions(+), 22 deletions(-) diff --git a/src/auth/keyring/migrate.test.ts b/src/auth/keyring/migrate.test.ts index 0204295..5c2ed51 100644 --- a/src/auth/keyring/migrate.test.ts +++ b/src/auth/keyring/migrate.test.ts @@ -127,7 +127,9 @@ describe('migrateLegacyAuth', () => { expect(km.slots.get(LEGACY)?.secret).toBeNull() expect(state.records.get('99')?.fallbackToken).toBeUndefined() expect(state.defaultId).toBe('99') - expect(harness.upsertSpy).toHaveBeenCalledTimes(1) + // Phase 1 writes the fallback record; Phase 2 upserts the clean + // keyring-backed record. Two upserts is the expected steady state. + expect(harness.upsertSpy).toHaveBeenCalledTimes(2) expect(cleanup).toHaveBeenCalledTimes(1) expect(markMigrated).toHaveBeenCalledTimes(1) expect(marker.migrated).toBe(true) @@ -298,6 +300,132 @@ describe('migrateLegacyAuth', () => { expect(km.slots.get('user-99')?.secret).toBeUndefined() }) + it('uses tryInsert (atomic) when the store implements it', async () => { + const km = buildKeyringMap() + km.slots.set(LEGACY, { secret: 'legacy_tok' }) + mockedCreateSecureStore.mockImplementation(km.create) + const harness = buildUserRecords({ withTryInsert: true }) + const marker = { migrated: false } + + const result = await migrateLegacyAuth({ + serviceName: SERVICE, + legacyAccount: LEGACY, + userRecords: harness.store, + hasMigrated: async () => marker.migrated, + markMigrated: async () => { + marker.migrated = true + }, + loadLegacyPlaintextToken: async () => null, + identifyAccount: async () => ({ id: '99', email: 'me@x.io' }), + silent: true, + }) + + expect(result.status).toBe('migrated') + expect(harness.tryInsertSpy).toHaveBeenCalledTimes(1) + // Phase 2 still calls upsert once to clear `fallbackToken`. + expect(harness.upsertSpy).toHaveBeenCalledTimes(1) + expect(km.slots.get('user-99')?.secret).toBe('legacy_tok') + }) + + it('skips Phase 2 when a v2 record already exists (tryInsert returns false)', async () => { + // Retry scenario: a previous run wrote the v2 record + setDefaultId + // and persisted no marker; the user then logged in afresh, which + // mutated the record. On retry, the existence check (via tryInsert) + // returns false and we MUST NOT clobber the live record. + const km = buildKeyringMap() + km.slots.set(LEGACY, { secret: 'legacy_tok' }) + km.slots.set('user-99', { secret: 'fresh_login_tok' }) + mockedCreateSecureStore.mockImplementation(km.create) + const harness = buildUserRecords({ withTryInsert: true }) + // Pre-existing v2 record from the fresh login. + const existing: UserRecord = { + account: { id: '99', email: 'me@x.io', label: 'updated-label' }, + hasRefreshToken: false, + } + harness.state.records.set('99', existing) + const marker = { migrated: false } + + const result = await migrateLegacyAuth({ + serviceName: SERVICE, + legacyAccount: LEGACY, + userRecords: harness.store, + hasMigrated: async () => marker.migrated, + markMigrated: async () => { + marker.migrated = true + }, + loadLegacyPlaintextToken: async () => null, + identifyAccount: async () => ({ id: '99', email: 'me@x.io' }), + silent: true, + }) + + expect(result.status).toBe('migrated') + expect(harness.tryInsertSpy).toHaveBeenCalledTimes(1) + // Phase 2 must NOT run — the existing record (and its keyring slot) + // are preserved untouched. upsert never fires. + expect(harness.upsertSpy).not.toHaveBeenCalled() + expect(harness.state.records.get('99')).toBe(existing) + expect(km.slots.get('user-99')?.secret).toBe('fresh_login_tok') + // Cleanup still runs — the legacy state is being retired. + expect(km.slots.get(LEGACY)?.secret).toBeNull() + expect(marker.migrated).toBe(true) + }) + + it('skips Phase 2 when a v2 record already exists (no tryInsert path)', async () => { + const km = buildKeyringMap() + km.slots.set(LEGACY, { secret: 'legacy_tok' }) + km.slots.set('user-99', { secret: 'fresh_login_tok' }) + mockedCreateSecureStore.mockImplementation(km.create) + const harness = buildUserRecords() + const existing: UserRecord = { + account: { id: '99', email: 'me@x.io' }, + hasRefreshToken: false, + } + harness.state.records.set('99', existing) + const marker = { migrated: false } + + const result = await migrateLegacyAuth({ + serviceName: SERVICE, + legacyAccount: LEGACY, + userRecords: harness.store, + hasMigrated: async () => marker.migrated, + markMigrated: async () => { + marker.migrated = true + }, + loadLegacyPlaintextToken: async () => null, + identifyAccount: async () => ({ id: '99', email: 'me@x.io' }), + silent: true, + }) + + expect(result.status).toBe('migrated') + expect(harness.upsertSpy).not.toHaveBeenCalled() + expect(harness.state.records.get('99')).toBe(existing) + expect(km.slots.get('user-99')?.secret).toBe('fresh_login_tok') + }) + + it('still returns migrated when Phase 2 keyring write fails (Phase 1 fallback survives)', async () => { + // Phase 1 writes the fallback record; Phase 2 fails to move the + // token into the keyring slot. The Phase 1 record continues to + // serve reads via `fallbackToken`, so the migration is still + // functionally complete — marker is set, cleanup runs. + const { km, state, result, marker } = await runMigration({ + slots: { + [LEGACY]: { secret: 'legacy_tok' }, + 'user-99': { setErr: new Error('keyring write blocked') }, + }, + options: { + identifyAccount: async () => ({ id: '99', email: 'me@x.io' }), + }, + }) + + expect(result.status).toBe('migrated') + // Phase 2 setSecret threw — the Phase 1 fallback remains in place. + expect(state.records.get('99')?.fallbackToken).toBe('legacy_tok') + expect(km.slots.get('user-99')?.secret).toBeNull() + // Marker + cleanup still ran. + expect(marker.migrated).toBe(true) + expect(km.slots.get(LEGACY)?.secret).toBeNull() + }) + it('prefers the legacy keyring token over the plaintext fallback when both are populated', async () => { // Locks the keyring-first precedence — a refactor that flipped the // order would silently surface a stale plaintext token even when a diff --git a/src/auth/keyring/migrate.ts b/src/auth/keyring/migrate.ts index 9b407ba..0aec09e 100644 --- a/src/auth/keyring/migrate.ts +++ b/src/auth/keyring/migrate.ts @@ -7,7 +7,7 @@ import { type SecureStore, SecureStoreUnavailableError, } from './secure-store.js' -import type { UserRecordStore } from './types.js' +import type { UserRecord, UserRecordStore } from './types.js' export type MigrateLegacyAuthOptions = { serviceName: string @@ -97,17 +97,30 @@ type LegacyTokenResult = * the user, …) leaves the v1 state untouched so the consumer's runtime * fallback can keep serving the legacy token until the next attempt. * - * Order of operations is deliberate so the migration is genuinely one-way: + * Order of operations is deliberate so the migration is genuinely one-way + * AND safe under retry: * * 1. `hasMigrated()` short-circuits if the durable marker is already set. * 2. Read the v1 token (legacy keyring slot first, then plaintext). * 3. `identifyAccount(token)` resolves the v2 `account` shape. - * 4. `writeRecordWithKeyringFallback` writes the v2 record. - * 5. Best-effort `setDefaultId(account.id)` so the new record is active. - * 6. `markMigrated()` persists the marker. **If this fails, we return + * 4. **Phase 1 (record-first)**: ensure a v2 record exists for the + * account, written with `fallbackToken: legacyToken` so reads work + * even before Phase 2 lands. Atomic via `UserRecordStore.tryInsert?` + * when available; otherwise list-then-upsert (narrow TOCTOU window, + * tolerable for one-time migration). When a v2 record for the + * account already exists, Phase 1 is a no-op and Phase 2 is + * skipped — preserving any later state that may have landed via a + * fresh login. + * 5. **Phase 2 (keyring move)**: only when Phase 1 wrote the record. + * `setSecret` moves the token into the per-user keyring slot, then + * a clean `upsert` clears `fallbackToken`. Best-effort throughout — + * a failure leaves the Phase 1 fallback in place and reads continue + * to work; a later `set()` or `setBundle()` upgrades the credential. + * 6. Best-effort `setDefaultId(account.id)` so the new record is active. + * 7. `markMigrated()` persists the marker. **If this fails, we return * `skipped` even though the v2 record is on disk** — the marker is * what prevents re-migration on the next run. - * 7. Best-effort legacy keyring delete + `cleanupLegacyConfig()` run + * 8. Best-effort legacy keyring delete + `cleanupLegacyConfig()` run * concurrently. Failures here are harmless because the marker is set. */ export async function migrateLegacyAuth( @@ -153,23 +166,37 @@ export async function migrateLegacyAuth( return skipped(silent, logPrefix, 'identify-failed', getErrorMessage(error)) } - // `writeRecordWithKeyringFallback` swallows `SecureStoreUnavailableError` - // internally (writing to `fallbackToken` instead), so any error here is - // a non-keyring failure — typically a `userRecords.upsert` rejection. + // Phase 1: ensure a v2 record exists. Either we write a self-sufficient + // fallback-bearing record (the token survives a Phase 2 crash), or a + // v2 record was already there from a fresh login and we leave it alone. + let phase1Wrote: boolean try { - await writeRecordWithKeyringFallback({ - secureStore: createSecureStore({ - serviceName, - account: accountForUser(account.id), - }), - userRecords, - account, - token: legacyToken.token, - }) + phase1Wrote = await ensureV2Record(userRecords, account, legacyToken.token) } catch (error) { return skipped(silent, logPrefix, 'user-record-write-failed', getErrorMessage(error)) } + // Phase 2: move the token from `fallbackToken` to the per-user keyring + // slot. Best-effort — `writeRecordWithKeyringFallback` already degrades + // to a fallback record on `SecureStoreUnavailableError`, so this branch + // covers the rare non-keyring failure too. The Phase 1 record keeps + // serving reads regardless. + if (phase1Wrote) { + try { + await writeRecordWithKeyringFallback({ + secureStore: createSecureStore({ + serviceName, + account: accountForUser(account.id), + }), + userRecords, + account, + token: legacyToken.token, + }) + } catch { + // best-effort + } + } + // Default promotion is best-effort and **only fires when nothing is // already pinned**. A retry after a previous `markMigrated()` failure // can land on a store where the user has since logged in to a different @@ -217,6 +244,42 @@ export async function migrateLegacyAuth( return { status: 'migrated', account } } +/** + * Phase 1 of the migration write. Atomic when the store exposes + * `tryInsert?`; otherwise list-then-upsert. + * + * Returns `true` when this call wrote the record (Phase 2 should proceed); + * `false` when a v2 record for the account already existed (Phase 2 must be + * skipped to preserve any later state). + * + * The Phase 1 shape is deliberately `fallbackToken`-bearing rather than + * keyring-first: a crash between Phase 1 and Phase 2 leaves a record that + * already works for reads. Phase 2 promotes the credential to the keyring; + * if it crashes too, the next migration retry will pick up where this one + * left off. + */ +async function ensureV2Record( + userRecords: UserRecordStore, + account: TAccount, + legacyToken: string, +): Promise { + const record: UserRecord = { + account, + fallbackToken: legacyToken, + hasRefreshToken: false, + } + if (userRecords.tryInsert) { + return userRecords.tryInsert(record) + } + // Non-atomic fallback. Narrow TOCTOU window between `list()` and + // `upsert()`; tolerable for a one-time migration since concurrent + // runs would write the same shape anyway. + const existing = await userRecords.list() + if (existing.some((r) => r.account.id === account.id)) return false + await userRecords.upsert(record) + return true +} + async function readLegacyToken( legacyStore: SecureStore, loadLegacyPlaintextToken: () => Promise, diff --git a/src/test-support/keyring-mocks.ts b/src/test-support/keyring-mocks.ts index 4d34a7d..8a49680 100644 --- a/src/test-support/keyring-mocks.ts +++ b/src/test-support/keyring-mocks.ts @@ -116,10 +116,18 @@ type UserRecordsHarness = { upsertSpy: ReturnType removeSpy: ReturnType setDefaultSpy: ReturnType + /** Defined only when the harness was built with `withTryInsert: true`. */ + tryInsertSpy?: ReturnType } -/** In-memory `UserRecordStore` with spies on the mutating methods. */ -export function buildUserRecords(): UserRecordsHarness { +/** + * In-memory `UserRecordStore` with spies on the mutating methods. Opt into + * the atomic `tryInsert` path via `withTryInsert: true` — the default + * preserves the legacy list-then-upsert shape exercised by older tests. + */ +export function buildUserRecords( + options: { withTryInsert?: boolean } = {}, +): UserRecordsHarness { const state = { records: new Map>(), defaultId: null as string | null, @@ -133,6 +141,13 @@ export function buildUserRecords(): UserRecordsHar const setDefaultSpy = vi.fn(async (id: string | null) => { state.defaultId = id }) + const tryInsertSpy = options.withTryInsert + ? vi.fn(async (record: UserRecord) => { + if (state.records.has(record.account.id)) return false + state.records.set(record.account.id, record) + return true + }) + : undefined const store: UserRecordStore = { async list() { return [...state.records.values()] @@ -143,6 +158,7 @@ export function buildUserRecords(): UserRecordsHar return state.defaultId }, setDefaultId: setDefaultSpy, + ...(tryInsertSpy ? { tryInsert: tryInsertSpy } : {}), } - return { store, state, upsertSpy, removeSpy, setDefaultSpy } + return { store, state, upsertSpy, removeSpy, setDefaultSpy, tryInsertSpy } } From 57b8b6258d524ae8f53fe33c84de6b9f101459dc Mon Sep 17 00:00:00 2001 From: Scott Lovegrove Date: Tue, 19 May 2026 13:53:27 +0100 Subject: [PATCH 2/4] fix(auth): address PR review on the two-phase migrate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 8 findings on #38. All resolved. P1 — correctness - Verify the existing v2 record is readable before retiring the legacy state. A clean record from a prior `set()` / `setBundle()` has no `fallbackToken` and depends on the keyring being reachable; if it isn't, cleaning up the legacy slot would brick the user. New `verifyExistingRecordReadable` short-circuits with `skipped(legacy-keyring-unreachable)` so the next retry has a working credential to fall back to. P2 — correctness - Inline Phase 2 instead of delegating to `writeRecordWithKeyringFallback`. The delegated path double-upserted the same fallback record on the offline branch (Phase 1 had already written it). Phase 2 now calls `setSecret` directly, falls through on `SecureStoreUnavailableError`, and only upserts the clean shape when the keyring write actually succeeded. - Narrow the Phase 2 catch. Only `SecureStoreUnavailableError` is swallowed; other failures surface as `skipped(user-record-write-failed)` so operators see transient issues and the marker stays unset for retry. P2 — refactor / DRY - Extract `buildSingleTokenRecord(account, fallbackToken?)` into `record-write.ts`. Migration Phase 1 and `writeBundleWithKeyringFallback` both use it — no more drift between the two record-shape sites. - `runMigration` test helper takes `withTryInsert?: boolean` and threads it through `buildUserRecords`. The three new manually-wired-up tests collapse into the standard helper, and pre-existing v2 record cases use `it.each([true, false])` to cover both paths with one body. P2 — tests - Replace the "exact upsert count" assertion with end-state assertions (token in keyring, no plaintext residue, legacy slot empty). - New `listSpy` on `buildUserRecords` so the `tryInsert` test can assert `list()` was never consulted — the TOCTOU race the atomic path is meant to prevent. - New regression: existing v2 record + keyring offline returns `skipped(legacy-keyring-unreachable)` without touching legacy state. - New regression: existing v2 record with `fallbackToken` skips the readability probe entirely (record is self-sufficient). - New regression: Phase 2 `SecureStoreUnavailable` falls back silently (Phase 1 record survives, marker still set, cleanup runs). - New regression: Phase 2 non-keyring `setSecret` failure surfaces as `skipped(user-record-write-failed)`. P3 — comment hygiene - Drop the "Phase 1 + Phase 2 = two upserts" restatement comment. - Drop the meta-comment justifying end-state assertions. - JSDoc on `ensureV2Record` now spells out "time-of-check, time-of-use" rather than the bare TOCTOU acronym. 386 → 389 tests pass. Check / type-check / build all clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/auth/keyring/migrate.test.ts | 197 ++++++++++++++++-------------- src/auth/keyring/migrate.ts | 123 +++++++++++++++---- src/auth/keyring/record-write.ts | 19 +++ src/test-support/keyring-mocks.ts | 8 +- 4 files changed, 228 insertions(+), 119 deletions(-) diff --git a/src/auth/keyring/migrate.test.ts b/src/auth/keyring/migrate.test.ts index 5c2ed51..35537b2 100644 --- a/src/auth/keyring/migrate.test.ts +++ b/src/auth/keyring/migrate.test.ts @@ -42,6 +42,8 @@ async function runMigration( seedRecords?: Record> seedDefaultId?: string marker?: MarkerState + /** Opt the harness into the atomic `tryInsert?` path. */ + withTryInsert?: boolean options?: Partial> } = {}, ) { @@ -51,7 +53,7 @@ async function runMigration( } mockedCreateSecureStore.mockImplementation(km.create) - const harness = buildUserRecords() + const harness = buildUserRecords({ withTryInsert: opts.withTryInsert }) for (const [id, rec] of Object.entries(opts.seedRecords ?? {})) { harness.state.records.set(id, rec) } @@ -110,7 +112,7 @@ describe('migrateLegacyAuth', () => { it('migrates a legacy keyring token into a per-user slot and clears the legacy entry', async () => { const cleanup = vi.fn(async () => undefined) - const { km, state, harness, result, marker, markMigrated } = await runMigration({ + const { km, state, result, marker, markMigrated } = await runMigration({ slots: { [LEGACY]: { secret: 'legacy_tok' } }, options: { identifyAccount: async (token) => { @@ -127,9 +129,6 @@ describe('migrateLegacyAuth', () => { expect(km.slots.get(LEGACY)?.secret).toBeNull() expect(state.records.get('99')?.fallbackToken).toBeUndefined() expect(state.defaultId).toBe('99') - // Phase 1 writes the fallback record; Phase 2 upserts the clean - // keyring-backed record. Two upserts is the expected steady state. - expect(harness.upsertSpy).toHaveBeenCalledTimes(2) expect(cleanup).toHaveBeenCalledTimes(1) expect(markMigrated).toHaveBeenCalledTimes(1) expect(marker.migrated).toBe(true) @@ -300,117 +299,114 @@ describe('migrateLegacyAuth', () => { expect(km.slots.get('user-99')?.secret).toBeUndefined() }) - it('uses tryInsert (atomic) when the store implements it', async () => { - const km = buildKeyringMap() - km.slots.set(LEGACY, { secret: 'legacy_tok' }) - mockedCreateSecureStore.mockImplementation(km.create) - const harness = buildUserRecords({ withTryInsert: true }) - const marker = { migrated: false } - - const result = await migrateLegacyAuth({ - serviceName: SERVICE, - legacyAccount: LEGACY, - userRecords: harness.store, - hasMigrated: async () => marker.migrated, - markMigrated: async () => { - marker.migrated = true + it('uses tryInsert (atomic) and never calls list() when the store implements it', async () => { + // Atomic path must avoid the list-then-upsert TOCTOU race entirely. + const { km, harness, result } = await runMigration({ + slots: { [LEGACY]: { secret: 'legacy_tok' } }, + withTryInsert: true, + options: { + identifyAccount: async () => ({ id: '99', email: 'me@x.io' }), }, - loadLegacyPlaintextToken: async () => null, - identifyAccount: async () => ({ id: '99', email: 'me@x.io' }), - silent: true, }) expect(result.status).toBe('migrated') expect(harness.tryInsertSpy).toHaveBeenCalledTimes(1) - // Phase 2 still calls upsert once to clear `fallbackToken`. + // Phase 2 promotes the record to the clean (no-fallback) shape. expect(harness.upsertSpy).toHaveBeenCalledTimes(1) + // Atomic insert means `list()` is never consulted on Phase 1. + expect(harness.listSpy).not.toHaveBeenCalled() expect(km.slots.get('user-99')?.secret).toBe('legacy_tok') }) - it('skips Phase 2 when a v2 record already exists (tryInsert returns false)', async () => { - // Retry scenario: a previous run wrote the v2 record + setDefaultId - // and persisted no marker; the user then logged in afresh, which - // mutated the record. On retry, the existence check (via tryInsert) - // returns false and we MUST NOT clobber the live record. - const km = buildKeyringMap() - km.slots.set(LEGACY, { secret: 'legacy_tok' }) - km.slots.set('user-99', { secret: 'fresh_login_tok' }) - mockedCreateSecureStore.mockImplementation(km.create) - const harness = buildUserRecords({ withTryInsert: true }) - // Pre-existing v2 record from the fresh login. + it.each([true, false])( + 'skips Phase 2 when a v2 record already exists (withTryInsert=%s)', + async (withTryInsert) => { + const existing: UserRecord = { + account: { id: '99', email: 'me@x.io', label: 'updated-label' }, + hasRefreshToken: false, + } + const { km, state, result, marker, harness } = await runMigration({ + slots: { + [LEGACY]: { secret: 'legacy_tok' }, + 'user-99': { secret: 'fresh_login_tok' }, + }, + seedRecords: { '99': existing }, + withTryInsert, + options: { + identifyAccount: async () => ({ id: '99', email: 'me@x.io' }), + }, + }) + + expect(result.status).toBe('migrated') + expect(harness.upsertSpy).not.toHaveBeenCalled() + expect(state.records.get('99')).toBe(existing) + expect(km.slots.get('user-99')?.secret).toBe('fresh_login_tok') + expect(km.slots.get(LEGACY)?.secret).toBeNull() + expect(marker.migrated).toBe(true) + }, + ) + + it('returns skipped(legacy-keyring-unreachable) when an existing v2 record cannot be read', async () => { + // A clean v2 record from a prior set/setBundle has no fallbackToken + // and depends on the keyring being reachable. With the keyring + // offline, cleaning up the legacy state would brick the user — we + // must abort the migration and leave the legacy token in place. const existing: UserRecord = { - account: { id: '99', email: 'me@x.io', label: 'updated-label' }, + account: { id: '99', email: 'me@x.io' }, hasRefreshToken: false, } - harness.state.records.set('99', existing) - const marker = { migrated: false } - - const result = await migrateLegacyAuth({ - serviceName: SERVICE, - legacyAccount: LEGACY, - userRecords: harness.store, - hasMigrated: async () => marker.migrated, - markMigrated: async () => { - marker.migrated = true + const { km, state, result, marker } = await runMigration({ + slots: { + [LEGACY]: { secret: 'legacy_tok' }, + 'user-99': { getErr: new SecureStoreUnavailableError('no dbus') }, + }, + seedRecords: { '99': existing }, + options: { + identifyAccount: async () => ({ id: '99', email: 'me@x.io' }), }, - loadLegacyPlaintextToken: async () => null, - identifyAccount: async () => ({ id: '99', email: 'me@x.io' }), - silent: true, }) - expect(result.status).toBe('migrated') - expect(harness.tryInsertSpy).toHaveBeenCalledTimes(1) - // Phase 2 must NOT run — the existing record (and its keyring slot) - // are preserved untouched. upsert never fires. - expect(harness.upsertSpy).not.toHaveBeenCalled() - expect(harness.state.records.get('99')).toBe(existing) - expect(km.slots.get('user-99')?.secret).toBe('fresh_login_tok') - // Cleanup still runs — the legacy state is being retired. - expect(km.slots.get(LEGACY)?.secret).toBeNull() - expect(marker.migrated).toBe(true) + expect(result).toMatchObject({ + status: 'skipped', + reason: 'legacy-keyring-unreachable', + }) + expect(marker.migrated).toBe(false) + expect(km.slots.get(LEGACY)?.secret).toBe('legacy_tok') + expect(state.records.get('99')).toBe(existing) }) - it('skips Phase 2 when a v2 record already exists (no tryInsert path)', async () => { - const km = buildKeyringMap() - km.slots.set(LEGACY, { secret: 'legacy_tok' }) - km.slots.set('user-99', { secret: 'fresh_login_tok' }) - mockedCreateSecureStore.mockImplementation(km.create) - const harness = buildUserRecords() + it('skips the readability check when an existing v2 record carries fallbackToken', async () => { + // Self-sufficient record — even if the keyring is offline, the + // fallbackToken serves reads. Migration proceeds with cleanup. const existing: UserRecord = { account: { id: '99', email: 'me@x.io' }, + fallbackToken: 'external_plaintext', hasRefreshToken: false, } - harness.state.records.set('99', existing) - const marker = { migrated: false } - - const result = await migrateLegacyAuth({ - serviceName: SERVICE, - legacyAccount: LEGACY, - userRecords: harness.store, - hasMigrated: async () => marker.migrated, - markMigrated: async () => { - marker.migrated = true + const { km, result, marker } = await runMigration({ + slots: { + [LEGACY]: { secret: 'legacy_tok' }, + 'user-99': { getErr: new SecureStoreUnavailableError('no dbus') }, + }, + seedRecords: { '99': existing }, + options: { + identifyAccount: async () => ({ id: '99', email: 'me@x.io' }), }, - loadLegacyPlaintextToken: async () => null, - identifyAccount: async () => ({ id: '99', email: 'me@x.io' }), - silent: true, }) expect(result.status).toBe('migrated') - expect(harness.upsertSpy).not.toHaveBeenCalled() - expect(harness.state.records.get('99')).toBe(existing) - expect(km.slots.get('user-99')?.secret).toBe('fresh_login_tok') + expect(marker.migrated).toBe(true) + expect(km.slots.get(LEGACY)?.secret).toBeNull() }) - it('still returns migrated when Phase 2 keyring write fails (Phase 1 fallback survives)', async () => { - // Phase 1 writes the fallback record; Phase 2 fails to move the - // token into the keyring slot. The Phase 1 record continues to - // serve reads via `fallbackToken`, so the migration is still - // functionally complete — marker is set, cleanup runs. - const { km, state, result, marker } = await runMigration({ + it('Phase 2 SecureStoreUnavailable falls back silently; Phase 1 record survives', async () => { + // Headless / WSL: the per-user slot can't accept a write. Phase 1's + // fallback record is self-sufficient, so the migration completes + // with the token in plaintext (until a future login upgrades it). + const { km, state, result, marker, harness } = await runMigration({ slots: { [LEGACY]: { secret: 'legacy_tok' }, - 'user-99': { setErr: new Error('keyring write blocked') }, + 'user-99': { setErr: new SecureStoreUnavailableError('no dbus') }, }, options: { identifyAccount: async () => ({ id: '99', email: 'me@x.io' }), @@ -418,12 +414,35 @@ describe('migrateLegacyAuth', () => { }) expect(result.status).toBe('migrated') - // Phase 2 setSecret threw — the Phase 1 fallback remains in place. expect(state.records.get('99')?.fallbackToken).toBe('legacy_tok') expect(km.slots.get('user-99')?.secret).toBeNull() - // Marker + cleanup still ran. expect(marker.migrated).toBe(true) expect(km.slots.get(LEGACY)?.secret).toBeNull() + // No clean Phase 2 upsert in this branch — Phase 1 wrote the only + // record, with `fallbackToken` still populated. + expect(harness.upsertSpy).toHaveBeenCalledTimes(1) + }) + + it('Phase 2 non-keyring setSecret failure surfaces as skipped (marker stays unset)', async () => { + // A transient backend error must not be silently swallowed — the + // operator needs visibility, and the marker stays unset so a future + // retry can re-attempt Phase 2. + const { result, marker, km } = await runMigration({ + slots: { + [LEGACY]: { secret: 'legacy_tok' }, + 'user-99': { setErr: new Error('keyring backend exploded') }, + }, + options: { + identifyAccount: async () => ({ id: '99', email: 'me@x.io' }), + }, + }) + + expect(result).toMatchObject({ + status: 'skipped', + reason: 'user-record-write-failed', + }) + expect(marker.migrated).toBe(false) + expect(km.slots.get(LEGACY)?.secret).toBe('legacy_tok') }) it('prefers the legacy keyring token over the plaintext fallback when both are populated', async () => { diff --git a/src/auth/keyring/migrate.ts b/src/auth/keyring/migrate.ts index 0aec09e..18011ab 100644 --- a/src/auth/keyring/migrate.ts +++ b/src/auth/keyring/migrate.ts @@ -1,13 +1,13 @@ import { getErrorMessage } from '../../errors.js' import type { AuthAccount } from '../types.js' -import { writeRecordWithKeyringFallback } from './record-write.js' +import { buildSingleTokenRecord } from './record-write.js' import { createSecureStore, DEFAULT_ACCOUNT_FOR_USER, type SecureStore, SecureStoreUnavailableError, } from './secure-store.js' -import type { UserRecord, UserRecordStore } from './types.js' +import type { UserRecordStore } from './types.js' export type MigrateLegacyAuthOptions = { serviceName: string @@ -176,24 +176,56 @@ export async function migrateLegacyAuth( return skipped(silent, logPrefix, 'user-record-write-failed', getErrorMessage(error)) } - // Phase 2: move the token from `fallbackToken` to the per-user keyring - // slot. Best-effort — `writeRecordWithKeyringFallback` already degrades - // to a fallback record on `SecureStoreUnavailableError`, so this branch - // covers the rare non-keyring failure too. The Phase 1 record keeps - // serving reads regardless. + const userSlot = createSecureStore({ + serviceName, + account: accountForUser(account.id), + }) + if (phase1Wrote) { + // Phase 2: move the token from `fallbackToken` into the per-user + // keyring slot. Inlined (rather than delegating to + // `writeRecordWithKeyringFallback`) so the offline-keyring path + // doesn't double-upsert — Phase 1 already wrote the fallback + // record. Only `SecureStoreUnavailableError` is swallowed; other + // failures leave the marker unset so the operator can retry. + let keyringStored = false try { - await writeRecordWithKeyringFallback({ - secureStore: createSecureStore({ - serviceName, - account: accountForUser(account.id), - }), - userRecords, - account, - token: legacyToken.token, - }) - } catch { - // best-effort + await userSlot.setSecret(legacyToken.token) + keyringStored = true + } catch (error) { + if (!(error instanceof SecureStoreUnavailableError)) { + return skipped( + silent, + logPrefix, + 'user-record-write-failed', + getErrorMessage(error), + ) + } + // Phase 1's fallback record already serves reads. Fall through. + } + if (keyringStored) { + // Promote the record to the clean shape (no fallback) so the + // plaintext copy doesn't outlive the secure-store write. + try { + await userRecords.upsert(buildSingleTokenRecord(account)) + } catch (error) { + return skipped( + silent, + logPrefix, + 'user-record-write-failed', + getErrorMessage(error), + ) + } + } + } else { + // Existing v2 record: verify it's readable in the current + // environment before retiring the legacy state. A record from a + // prior `set()` / `setBundle()` has no `fallbackToken` and depends + // on the keyring being reachable; if it isn't, cleaning up legacy + // would brick the user. + const readabilityError = await verifyExistingRecordReadable(userRecords, account, userSlot) + if (readabilityError) { + return skipped(silent, logPrefix, readabilityError.reason, readabilityError.detail) } } @@ -263,23 +295,62 @@ async function ensureV2Record( account: TAccount, legacyToken: string, ): Promise { - const record: UserRecord = { - account, - fallbackToken: legacyToken, - hasRefreshToken: false, - } + const record = buildSingleTokenRecord(account, legacyToken) if (userRecords.tryInsert) { return userRecords.tryInsert(record) } - // Non-atomic fallback. Narrow TOCTOU window between `list()` and - // `upsert()`; tolerable for a one-time migration since concurrent - // runs would write the same shape anyway. + // Non-atomic fallback. Narrow race window between `list()` and + // `upsert()` (time-of-check, time-of-use): a concurrent writer could + // sneak a record in between the two calls and our upsert would + // replace-not-merge it. Tolerable for one-time migration — concurrent + // migration runs would write the same shape anyway, and the user + // typically isn't running other auth writes simultaneously. const existing = await userRecords.list() if (existing.some((r) => r.account.id === account.id)) return false await userRecords.upsert(record) return true } +/** + * After Phase 1 returns `false` (existing v2 record), verify the record is + * actually readable in the current environment before retiring the legacy + * state. A clean v2 record from a prior `set()` / `setBundle()` has no + * `fallbackToken` and depends on the keyring being reachable; if it isn't, + * cleaning up the legacy slot would brick the user. + * + * Returns `null` when the existing record is safely readable. Returns a + * skip descriptor when it isn't — the caller surfaces the migration as + * `skipped` without touching the legacy state, so the next retry has a + * working credential to fall back to. + */ +async function verifyExistingRecordReadable( + userRecords: UserRecordStore, + account: TAccount, + userSlot: SecureStore, +): Promise<{ reason: MigrateSkipReason; detail: string } | null> { + const existing = (await userRecords.list()).find((r) => r.account.id === account.id) + if (existing?.fallbackToken?.trim()) return null + try { + const raw = await userSlot.getSecret() + if (raw?.trim()) return null + return { + reason: 'user-record-write-failed', + detail: 'existing v2 record has no fallback token and its keyring slot is empty', + } + } catch (error) { + if (error instanceof SecureStoreUnavailableError) { + return { + reason: 'legacy-keyring-unreachable', + detail: 'existing v2 record has no fallback token and the per-user keyring slot is unreachable', + } + } + return { + reason: 'user-record-write-failed', + detail: getErrorMessage(error), + } + } +} + async function readLegacyToken( legacyStore: SecureStore, loadLegacyPlaintextToken: () => Promise, diff --git a/src/auth/keyring/record-write.ts b/src/auth/keyring/record-write.ts index 40039ca..a46f047 100644 --- a/src/auth/keyring/record-write.ts +++ b/src/auth/keyring/record-write.ts @@ -179,6 +179,25 @@ export async function writeBundleWithKeyringFallback( + account: TAccount, + fallbackToken?: string, +): UserRecord { + return { + account, + ...(fallbackToken ? { fallbackToken } : {}), + hasRefreshToken: false, + } +} + const NOOP_SECURE_STORE: SecureStore = { async getSecret() { return null diff --git a/src/test-support/keyring-mocks.ts b/src/test-support/keyring-mocks.ts index 8a49680..c471daf 100644 --- a/src/test-support/keyring-mocks.ts +++ b/src/test-support/keyring-mocks.ts @@ -113,6 +113,7 @@ type UserRecordsHarness = { records: Map> defaultId: string | null } + listSpy: ReturnType upsertSpy: ReturnType removeSpy: ReturnType setDefaultSpy: ReturnType @@ -132,6 +133,7 @@ export function buildUserRecords( records: new Map>(), defaultId: null as string | null, } + const listSpy = vi.fn(async () => [...state.records.values()]) const upsertSpy = vi.fn(async (record: UserRecord) => { state.records.set(record.account.id, record) }) @@ -149,9 +151,7 @@ export function buildUserRecords( }) : undefined const store: UserRecordStore = { - async list() { - return [...state.records.values()] - }, + list: listSpy, upsert: upsertSpy, remove: removeSpy, async getDefaultId() { @@ -160,5 +160,5 @@ export function buildUserRecords( setDefaultId: setDefaultSpy, ...(tryInsertSpy ? { tryInsert: tryInsertSpy } : {}), } - return { store, state, upsertSpy, removeSpy, setDefaultSpy, tryInsertSpy } + return { store, state, listSpy, upsertSpy, removeSpy, setDefaultSpy, tryInsertSpy } } From 2dd77fad49b062ef2dc0eab9ddbc93d769e59c00 Mon Sep 17 00:00:00 2001 From: Scott Lovegrove Date: Tue, 19 May 2026 14:08:48 +0100 Subject: [PATCH 3/4] =?UTF-8?q?chore(auth):=20trim=20migrate.ts=20JSDoc/co?= =?UTF-8?q?mment=20bloat=20+=20rename=20phase1Wrote=20=E2=86=92=20phase1Wr?= =?UTF-8?q?itten?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No behaviour change. Two passes: 1. JSDoc audit — collapsed restated detail across the main function's numbered list, the two helper JSDocs, and the inline comments. The main `migrateLegacyAuth` block goes from 32 lines to 19; helper docs compress to 6 lines each; inline comments either drop (when they duplicated JSDoc) or trim to the WHY only. 2. Naming nit (per Scott on #38): `phase1Wrote` → `phase1Written`. Past participle reads cleanly as a boolean flag. Net -52 LOC. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/auth/keyring/migrate.ts | 142 ++++++++++++------------------------ 1 file changed, 45 insertions(+), 97 deletions(-) diff --git a/src/auth/keyring/migrate.ts b/src/auth/keyring/migrate.ts index 18011ab..a3d0649 100644 --- a/src/auth/keyring/migrate.ts +++ b/src/auth/keyring/migrate.ts @@ -93,35 +93,23 @@ type LegacyTokenResult = /** * One-time migration of a v1 single-user auth state into a v2 multi-user - * shape. Best-effort: any failure (offline keyring, network error fetching - * the user, …) leaves the v1 state untouched so the consumer's runtime - * fallback can keep serving the legacy token until the next attempt. + * shape. Best-effort: any failure leaves v1 untouched so the runtime + * fallback keeps serving the legacy token until the next attempt. * - * Order of operations is deliberate so the migration is genuinely one-way - * AND safe under retry: + * Order is deliberate so the migration is one-way AND safe under retry: * - * 1. `hasMigrated()` short-circuits if the durable marker is already set. - * 2. Read the v1 token (legacy keyring slot first, then plaintext). - * 3. `identifyAccount(token)` resolves the v2 `account` shape. - * 4. **Phase 1 (record-first)**: ensure a v2 record exists for the - * account, written with `fallbackToken: legacyToken` so reads work - * even before Phase 2 lands. Atomic via `UserRecordStore.tryInsert?` - * when available; otherwise list-then-upsert (narrow TOCTOU window, - * tolerable for one-time migration). When a v2 record for the - * account already exists, Phase 1 is a no-op and Phase 2 is - * skipped — preserving any later state that may have landed via a - * fresh login. - * 5. **Phase 2 (keyring move)**: only when Phase 1 wrote the record. - * `setSecret` moves the token into the per-user keyring slot, then - * a clean `upsert` clears `fallbackToken`. Best-effort throughout — - * a failure leaves the Phase 1 fallback in place and reads continue - * to work; a later `set()` or `setBundle()` upgrades the credential. - * 6. Best-effort `setDefaultId(account.id)` so the new record is active. - * 7. `markMigrated()` persists the marker. **If this fails, we return - * `skipped` even though the v2 record is on disk** — the marker is - * what prevents re-migration on the next run. - * 8. Best-effort legacy keyring delete + `cleanupLegacyConfig()` run - * concurrently. Failures here are harmless because the marker is set. + * 1. `hasMigrated()` short-circuits when the marker is set. + * 2. Read the v1 token (legacy keyring first, then plaintext). + * 3. `identifyAccount(token)` resolves the v2 `account`. + * 4. **Phase 1** — `ensureV2Record` writes a fallback-bearing record (or + * no-ops when a v2 record already exists). + * 5. **Phase 2** — when Phase 1 wrote: move the token to the per-user + * keyring slot and upsert the clean record. When Phase 1 didn't: + * verify the existing record is readable before retiring legacy. + * 6. Best-effort `setDefaultId(account.id)`. + * 7. `markMigrated()` — the one-way gate. Failure here surfaces as + * `skipped(marker-write-failed)` so the caller retries. + * 8. Best-effort legacy cleanup runs concurrently. */ export async function migrateLegacyAuth( options: MigrateLegacyAuthOptions, @@ -144,8 +132,7 @@ export async function migrateLegacyAuth( return { status: 'already-migrated' } } - // One legacy-keyring handle covers both the initial read and the - // post-success cleanup delete. + // One handle for both the initial read and the cleanup delete. const legacyStore = createSecureStore({ serviceName, account: legacyAccount }) const legacyToken = await readLegacyToken(legacyStore, loadLegacyPlaintextToken) @@ -166,12 +153,9 @@ export async function migrateLegacyAuth( return skipped(silent, logPrefix, 'identify-failed', getErrorMessage(error)) } - // Phase 1: ensure a v2 record exists. Either we write a self-sufficient - // fallback-bearing record (the token survives a Phase 2 crash), or a - // v2 record was already there from a fresh login and we leave it alone. - let phase1Wrote: boolean + let phase1Written: boolean try { - phase1Wrote = await ensureV2Record(userRecords, account, legacyToken.token) + phase1Written = await ensureV2Record(userRecords, account, legacyToken.token) } catch (error) { return skipped(silent, logPrefix, 'user-record-write-failed', getErrorMessage(error)) } @@ -181,13 +165,12 @@ export async function migrateLegacyAuth( account: accountForUser(account.id), }) - if (phase1Wrote) { - // Phase 2: move the token from `fallbackToken` into the per-user - // keyring slot. Inlined (rather than delegating to - // `writeRecordWithKeyringFallback`) so the offline-keyring path - // doesn't double-upsert — Phase 1 already wrote the fallback - // record. Only `SecureStoreUnavailableError` is swallowed; other - // failures leave the marker unset so the operator can retry. + if (phase1Written) { + // Phase 2 inlined (not via `writeRecordWithKeyringFallback`) so the + // offline-keyring branch doesn't double-upsert the same fallback + // record Phase 1 just wrote. Only `SecureStoreUnavailableError` is + // swallowed — other failures leave the marker unset so a retry + // re-attempts the keyring move. let keyringStored = false try { await userSlot.setSecret(legacyToken.token) @@ -201,11 +184,9 @@ export async function migrateLegacyAuth( getErrorMessage(error), ) } - // Phase 1's fallback record already serves reads. Fall through. } if (keyringStored) { - // Promote the record to the clean shape (no fallback) so the - // plaintext copy doesn't outlive the secure-store write. + // Clear `fallbackToken` so plaintext doesn't outlive the keyring write. try { await userRecords.upsert(buildSingleTokenRecord(account)) } catch (error) { @@ -218,22 +199,14 @@ export async function migrateLegacyAuth( } } } else { - // Existing v2 record: verify it's readable in the current - // environment before retiring the legacy state. A record from a - // prior `set()` / `setBundle()` has no `fallbackToken` and depends - // on the keyring being reachable; if it isn't, cleaning up legacy - // would brick the user. const readabilityError = await verifyExistingRecordReadable(userRecords, account, userSlot) if (readabilityError) { return skipped(silent, logPrefix, readabilityError.reason, readabilityError.detail) } } - // Default promotion is best-effort and **only fires when nothing is - // already pinned**. A retry after a previous `markMigrated()` failure - // can land on a store where the user has since logged in to a different - // account and picked it as default — blindly setting the legacy account - // back as default would silently switch the user. + // Only promote when nothing is pinned — a retry must not overwrite a + // default the user chose between attempts. try { const existingDefault = await userRecords.getDefaultId() if (!existingDefault) { @@ -243,33 +216,24 @@ export async function migrateLegacyAuth( // best-effort } - // Persist the one-way marker BEFORE legacy cleanup. If this fails, the - // v2 record is already written but the gate is unset — surface as - // `skipped` so the caller retries. Without this ordering, a later - // `logout` could let the next run re-migrate the stale v1 token. + // Marker BEFORE cleanup: the gate, not cleanup, is what prevents the + // next run from re-migrating after a later `logout`. try { await markMigrated() } catch (error) { return skipped(silent, logPrefix, 'marker-write-failed', getErrorMessage(error)) } - // Best-effort legacy cleanup — runs concurrently so we don't pay - // keyring latency followed by config-write latency on every install. - // The marker is already set, so a failure here can't cause - // re-migration on the next run. The `Promise.resolve().then(...)` - // wrappers convert any *synchronous* throw from a consumer-supplied - // `cleanupLegacyConfig` (or an oddly-implemented `SecureStore`) into - // a rejected promise that `Promise.allSettled` can swallow. + // `Promise.resolve().then(...)` converts any *synchronous* throw from + // a consumer's `cleanupLegacyConfig` into a rejection that + // `allSettled` can swallow. await Promise.allSettled([ Promise.resolve().then(() => legacyStore.deleteSecret()), Promise.resolve().then(() => cleanupLegacyConfig?.()), ]) if (!silent) { - // No identifier in the log line — `account.id` is typed as `string` - // but consumers can legitimately use an email or other PII there. - // Callers that need richer telemetry can compose it from the - // returned `account`. + // Account id may carry PII (email, etc.) — keep it out of logs. console.error(`${logPrefix}: migrated existing token to multi-user store.`) } @@ -277,18 +241,10 @@ export async function migrateLegacyAuth( } /** - * Phase 1 of the migration write. Atomic when the store exposes - * `tryInsert?`; otherwise list-then-upsert. - * - * Returns `true` when this call wrote the record (Phase 2 should proceed); - * `false` when a v2 record for the account already existed (Phase 2 must be - * skipped to preserve any later state). - * - * The Phase 1 shape is deliberately `fallbackToken`-bearing rather than - * keyring-first: a crash between Phase 1 and Phase 2 leaves a record that - * already works for reads. Phase 2 promotes the credential to the keyring; - * if it crashes too, the next migration retry will pick up where this one - * left off. + * Phase 1. Writes a `fallbackToken`-bearing record so a crash before + * Phase 2 still leaves a working credential. Returns `true` when this + * call wrote (Phase 2 proceeds), `false` when a v2 record already existed + * (Phase 2 skipped — preserves later state). */ async function ensureV2Record( userRecords: UserRecordStore, @@ -299,12 +255,9 @@ async function ensureV2Record( if (userRecords.tryInsert) { return userRecords.tryInsert(record) } - // Non-atomic fallback. Narrow race window between `list()` and - // `upsert()` (time-of-check, time-of-use): a concurrent writer could - // sneak a record in between the two calls and our upsert would - // replace-not-merge it. Tolerable for one-time migration — concurrent - // migration runs would write the same shape anyway, and the user - // typically isn't running other auth writes simultaneously. + // Non-atomic path. Narrow time-of-check, time-of-use race between + // `list()` and `upsert()`; acceptable for one-time migration since + // concurrent runs would write the same shape. const existing = await userRecords.list() if (existing.some((r) => r.account.id === account.id)) return false await userRecords.upsert(record) @@ -312,16 +265,11 @@ async function ensureV2Record( } /** - * After Phase 1 returns `false` (existing v2 record), verify the record is - * actually readable in the current environment before retiring the legacy - * state. A clean v2 record from a prior `set()` / `setBundle()` has no - * `fallbackToken` and depends on the keyring being reachable; if it isn't, - * cleaning up the legacy slot would brick the user. - * - * Returns `null` when the existing record is safely readable. Returns a - * skip descriptor when it isn't — the caller surfaces the migration as - * `skipped` without touching the legacy state, so the next retry has a - * working credential to fall back to. + * Before retiring legacy state for an existing v2 record, confirm reads + * still work. A record from a prior `set()` / `setBundle()` has no + * `fallbackToken` and needs the keyring online; if it's offline, cleaning + * up would brick the user. Returns `null` when safe, or a skip descriptor + * the caller surfaces (leaving legacy state intact for retry). */ async function verifyExistingRecordReadable( userRecords: UserRecordStore, From 588391b019b784f5407c98d05f08bf143e9a0c9c Mon Sep 17 00:00:00 2001 From: Scott Lovegrove Date: Tue, 19 May 2026 14:29:03 +0100 Subject: [PATCH 4/4] fix(auth): address PR review round 2 on the two-phase migrate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 10 findings on #38. All resolved. P1 — correctness - Phase 2 retry on existing fallback-bearing record. When the existence check returns false AND the existing record's `fallbackToken` matches our legacy token, treat it as a prior incomplete migration and finish Phase 2 (move to keyring, clear fallback). Without this, a transient Phase 2 failure stuck the user on plaintext fallback indefinitely; future retries skipped Phase 2 because "record exists". - `userRecords.list()` failures inside the readability branch are now caught — the existence/readability path runs through the outer `migrateLegacyAuth` try/catch via `ensureV2Record`'s richer return, and `runPhase2` similarly bubbles errors to the caller as `skipped(user-record-write-failed)`. P2 — refactor / DRY - New `keyring/internal.ts` with `readAccessTokenForRecord(record, secureStore)` returning a structured `ReadAccessTokenOutcome` union. Single source for "fallback first, then keyring slot, then error variants". `KeyringTokenStore.active()` and the migration's readability probe both call it; consumers map the outcome to their own error contract (typed `CliError` vs. `MigrateSkipReason`). - `ensureV2Record` returns `{ written: true } | { written: false; existing }` so the orchestration doesn't pay a second `list()` to re-fetch the existing record. Phase 2 retry and the external-record readability branch both reuse the returned record. - Phase 2 extracted into `runPhase2(userRecords, userSlot, account, legacyToken)` so the new "initial write" and "retry" paths share one implementation. P2 — observability - New `user-keyring-unreachable` skip reason. The per-user slot being offline is now distinct from the legacy slot being offline; operators no longer see "legacy credential unreachable" when the legacy slot was just read successfully. Both reasons stay retryable. P2 — docs - `buildSingleTokenRecord` JSDoc no longer claims `writeBundleWithKeyringFallback` uses it. The bundle path carries expiry fields the single-token shape doesn't, so they're intentionally separate; the helper captures only the `hasRefreshToken: false` + optional `fallbackToken` pair that the migration paths share. P3 — tests - Dropped the `upsertSpy` count assertion in the Phase 2 SecureStoreUnavailable test (coupled to non-atomic Phase 1). - New regression: existing clean v2 record + per-user slot empty → `skipped(user-record-write-failed)`, marker stays unset. - New regression: existing record with external `fallbackToken` skips Phase 2 AND never probes the per-user slot (`km.getCalls` assertion locks the no-IPC behaviour). - New regression: existing record carrying our legacy token retries Phase 2 cleanly (setSecret + clean upsert; final state matches a one-shot migration). - Dropped the "One handle for both…" restated comment per the repo's "comments explain why, not what" guidance. 386 → 391 tests pass. Check / type-check / build all clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/auth/keyring/internal.ts | 44 ++++++++++ src/auth/keyring/migrate.test.ts | 73 ++++++++++++++-- src/auth/keyring/migrate.ts | 140 +++++++++++++++---------------- src/auth/keyring/record-write.ts | 13 +-- src/auth/keyring/token-store.ts | 44 +++------- 5 files changed, 196 insertions(+), 118 deletions(-) create mode 100644 src/auth/keyring/internal.ts diff --git a/src/auth/keyring/internal.ts b/src/auth/keyring/internal.ts new file mode 100644 index 0000000..8145f75 --- /dev/null +++ b/src/auth/keyring/internal.ts @@ -0,0 +1,44 @@ +import { getErrorMessage } from '../../errors.js' +import type { AuthAccount } from '../types.js' +import { type SecureStore, SecureStoreUnavailableError } from './secure-store.js' +import type { UserRecord } from './types.js' + +/** + * Outcome of resolving the access token for a record. Callers map the + * structured failure variants to whatever error contract they expose — + * `KeyringTokenStore.active()` throws `CliError('AUTH_STORE_READ_FAILED', …)`; + * `migrateLegacyAuth` translates each variant into a `MigrateSkipReason`. + */ +export type ReadAccessTokenOutcome = + | { ok: true; token: string } + | { ok: false; reason: 'slot-empty' | 'slot-unavailable' | 'slot-error'; detail: string } + +/** + * `fallbackToken` first (so an offline-keyring write is preferred over a + * stale slot), then the keyring slot. Single-source for "is this record + * readable in the current environment" — `KeyringTokenStore.active()` and + * `migrateLegacyAuth`'s readability probe both call this. + */ +export async function readAccessTokenForRecord( + record: UserRecord, + secureStore: SecureStore, +): Promise { + const fallback = record.fallbackToken?.trim() + if (fallback) return { ok: true, token: fallback } + + try { + const raw = await secureStore.getSecret() + const trimmed = raw?.trim() + if (trimmed) return { ok: true, token: trimmed } + return { + ok: false, + reason: 'slot-empty', + detail: 'keyring slot returned no credential', + } + } catch (error) { + if (error instanceof SecureStoreUnavailableError) { + return { ok: false, reason: 'slot-unavailable', detail: error.message } + } + return { ok: false, reason: 'slot-error', detail: getErrorMessage(error) } + } +} diff --git a/src/auth/keyring/migrate.test.ts b/src/auth/keyring/migrate.test.ts index 35537b2..233eaaa 100644 --- a/src/auth/keyring/migrate.test.ts +++ b/src/auth/keyring/migrate.test.ts @@ -346,7 +346,7 @@ describe('migrateLegacyAuth', () => { }, ) - it('returns skipped(legacy-keyring-unreachable) when an existing v2 record cannot be read', async () => { + it('returns skipped(user-keyring-unreachable) when an existing v2 record cannot be read', async () => { // A clean v2 record from a prior set/setBundle has no fallbackToken // and depends on the keyring being reachable. With the keyring // offline, cleaning up the legacy state would brick the user — we @@ -368,16 +368,46 @@ describe('migrateLegacyAuth', () => { expect(result).toMatchObject({ status: 'skipped', - reason: 'legacy-keyring-unreachable', + reason: 'user-keyring-unreachable', }) expect(marker.migrated).toBe(false) expect(km.slots.get(LEGACY)?.secret).toBe('legacy_tok') expect(state.records.get('99')).toBe(existing) }) - it('skips the readability check when an existing v2 record carries fallbackToken', async () => { - // Self-sufficient record — even if the keyring is offline, the - // fallbackToken serves reads. Migration proceeds with cleanup. + it('returns skipped(user-record-write-failed) when an existing v2 record has empty fallback AND empty slot', async () => { + // Corrupted state: the record carries no plaintext fallback and + // the per-user keyring slot is empty (deleted out of band). We + // must NOT cleanup legacy — the user has no readable credential. + const existing: UserRecord = { + account: { id: '99', email: 'me@x.io' }, + hasRefreshToken: false, + } + const { km, state, result, marker } = await runMigration({ + slots: { + [LEGACY]: { secret: 'legacy_tok' }, + // No `secret` and no `getErr` → getSecret returns null. + 'user-99': {}, + }, + seedRecords: { '99': existing }, + options: { + identifyAccount: async () => ({ id: '99', email: 'me@x.io' }), + }, + }) + + expect(result).toMatchObject({ + status: 'skipped', + reason: 'user-record-write-failed', + }) + expect(marker.migrated).toBe(false) + expect(km.slots.get(LEGACY)?.secret).toBe('legacy_tok') + expect(state.records.get('99')).toBe(existing) + }) + + it('skips the readability check when an existing v2 record carries an external fallbackToken', async () => { + // External record's fallback differs from the legacy token → not a + // prior-run Phase 1 → don't attempt Phase 2. The fallback alone + // makes the record readable, so the per-user slot is never probed. const existing: UserRecord = { account: { id: '99', email: 'me@x.io' }, fallbackToken: 'external_plaintext', @@ -397,13 +427,41 @@ describe('migrateLegacyAuth', () => { expect(result.status).toBe('migrated') expect(marker.migrated).toBe(true) expect(km.slots.get(LEGACY)?.secret).toBeNull() + // Per-user slot must NEVER have been read — the fallback short-circuits. + expect(km.getCalls.has('user-99')).toBe(false) + }) + + it('retries Phase 2 when the existing record carries our legacy token (prior incomplete migration)', async () => { + // Scenario: a previous run wrote the Phase 1 fallback record but + // Phase 2 failed (transient backend error). Marker stayed unset. + // On retry, the existence check finds the record, recognises the + // fallback token as our own legacy token, and finishes Phase 2. + const existing: UserRecord = { + account: { id: '99', email: 'me@x.io' }, + fallbackToken: 'legacy_tok', + hasRefreshToken: false, + } + const { km, state, result, marker, harness } = await runMigration({ + slots: { [LEGACY]: { secret: 'legacy_tok' } }, + seedRecords: { '99': existing }, + options: { + identifyAccount: async () => ({ id: '99', email: 'me@x.io' }), + }, + }) + + expect(result.status).toBe('migrated') + expect(km.slots.get('user-99')?.secret).toBe('legacy_tok') + expect(state.records.get('99')?.fallbackToken).toBeUndefined() + expect(marker.migrated).toBe(true) + // Phase 2 ran exactly once: setSecret + clean upsert. + expect(harness.upsertSpy).toHaveBeenCalledTimes(1) }) it('Phase 2 SecureStoreUnavailable falls back silently; Phase 1 record survives', async () => { // Headless / WSL: the per-user slot can't accept a write. Phase 1's // fallback record is self-sufficient, so the migration completes // with the token in plaintext (until a future login upgrades it). - const { km, state, result, marker, harness } = await runMigration({ + const { km, state, result, marker } = await runMigration({ slots: { [LEGACY]: { secret: 'legacy_tok' }, 'user-99': { setErr: new SecureStoreUnavailableError('no dbus') }, @@ -418,9 +476,6 @@ describe('migrateLegacyAuth', () => { expect(km.slots.get('user-99')?.secret).toBeNull() expect(marker.migrated).toBe(true) expect(km.slots.get(LEGACY)?.secret).toBeNull() - // No clean Phase 2 upsert in this branch — Phase 1 wrote the only - // record, with `fallbackToken` still populated. - expect(harness.upsertSpy).toHaveBeenCalledTimes(1) }) it('Phase 2 non-keyring setSecret failure surfaces as skipped (marker stays unset)', async () => { diff --git a/src/auth/keyring/migrate.ts b/src/auth/keyring/migrate.ts index a3d0649..488c2fa 100644 --- a/src/auth/keyring/migrate.ts +++ b/src/auth/keyring/migrate.ts @@ -1,5 +1,6 @@ import { getErrorMessage } from '../../errors.js' import type { AuthAccount } from '../types.js' +import { readAccessTokenForRecord } from './internal.js' import { buildSingleTokenRecord } from './record-write.js' import { createSecureStore, @@ -7,7 +8,7 @@ import { type SecureStore, SecureStoreUnavailableError, } from './secure-store.js' -import type { UserRecordStore } from './types.js' +import type { UserRecord, UserRecordStore } from './types.js' export type MigrateLegacyAuthOptions = { serviceName: string @@ -59,18 +60,21 @@ export type MigrateLegacyAuthOptions = { } /** - * Stable skip reasons. `legacy-keyring-unreachable` is retryable (a later - * run with the keyring online would succeed); the others are diagnostic. + * Stable skip reasons. `*-keyring-unreachable` variants are retryable (a + * later run with the keyring online would succeed); the others are + * diagnostic. */ export type MigrateSkipReason = | 'identify-failed' | 'legacy-keyring-unreachable' + | 'user-keyring-unreachable' | 'user-record-write-failed' | 'marker-write-failed' const SKIP_REASON_MESSAGES: Record = { 'identify-failed': 'could not identify user', 'legacy-keyring-unreachable': 'legacy credential is unreachable (keyring offline)', + 'user-keyring-unreachable': 'per-user credential slot is unreachable (keyring offline)', 'user-record-write-failed': 'failed to update user records', 'marker-write-failed': 'failed to persist migration marker', } @@ -132,7 +136,6 @@ export async function migrateLegacyAuth( return { status: 'already-migrated' } } - // One handle for both the initial read and the cleanup delete. const legacyStore = createSecureStore({ serviceName, account: legacyAccount }) const legacyToken = await readLegacyToken(legacyStore, loadLegacyPlaintextToken) @@ -153,9 +156,9 @@ export async function migrateLegacyAuth( return skipped(silent, logPrefix, 'identify-failed', getErrorMessage(error)) } - let phase1Written: boolean + let phase1: Phase1Result try { - phase1Written = await ensureV2Record(userRecords, account, legacyToken.token) + phase1 = await ensureV2Record(userRecords, account, legacyToken.token) } catch (error) { return skipped(silent, logPrefix, 'user-record-write-failed', getErrorMessage(error)) } @@ -165,43 +168,27 @@ export async function migrateLegacyAuth( account: accountForUser(account.id), }) - if (phase1Written) { - // Phase 2 inlined (not via `writeRecordWithKeyringFallback`) so the - // offline-keyring branch doesn't double-upsert the same fallback - // record Phase 1 just wrote. Only `SecureStoreUnavailableError` is - // swallowed — other failures leave the marker unset so a retry - // re-attempts the keyring move. - let keyringStored = false - try { - await userSlot.setSecret(legacyToken.token) - keyringStored = true - } catch (error) { - if (!(error instanceof SecureStoreUnavailableError)) { - return skipped( - silent, - logPrefix, - 'user-record-write-failed', - getErrorMessage(error), - ) - } - } - if (keyringStored) { - // Clear `fallbackToken` so plaintext doesn't outlive the keyring write. - try { - await userRecords.upsert(buildSingleTokenRecord(account)) - } catch (error) { - return skipped( - silent, - logPrefix, - 'user-record-write-failed', - getErrorMessage(error), - ) - } + // Run Phase 2 when EITHER Phase 1 just wrote the fallback record OR + // the existing record's fallback matches our legacy token — that's a + // prior-run Phase 1 we owe an upgrade. Other existing records are + // external state and get a readability check instead. + const isOurPriorPhase1 = + !phase1.written && phase1.existing.fallbackToken?.trim() === legacyToken.token + if (phase1.written || isOurPriorPhase1) { + const phase2Error = await runPhase2(userRecords, userSlot, account, legacyToken.token) + if (phase2Error) { + return skipped(silent, logPrefix, phase2Error.reason, phase2Error.detail) } } else { - const readabilityError = await verifyExistingRecordReadable(userRecords, account, userSlot) - if (readabilityError) { - return skipped(silent, logPrefix, readabilityError.reason, readabilityError.detail) + // External record — cleaning up legacy is safe only if it can be + // read in the current environment. + const outcome = await readAccessTokenForRecord(phase1.existing, userSlot) + if (!outcome.ok) { + const reason: MigrateSkipReason = + outcome.reason === 'slot-unavailable' + ? 'user-keyring-unreachable' + : 'user-record-write-failed' + return skipped(silent, logPrefix, reason, outcome.detail) } } @@ -240,63 +227,72 @@ export async function migrateLegacyAuth( return { status: 'migrated', account } } +type Phase1Result = + | { written: true } + | { written: false; existing: UserRecord } + /** * Phase 1. Writes a `fallbackToken`-bearing record so a crash before - * Phase 2 still leaves a working credential. Returns `true` when this - * call wrote (Phase 2 proceeds), `false` when a v2 record already existed - * (Phase 2 skipped — preserves later state). + * Phase 2 still leaves a working credential. Returns `{ written: true }` + * when this call wrote, or `{ written: false, existing }` when a v2 + * record already existed — the existing record is returned so callers + * decide whether to upgrade it (Phase 2 retry) or treat it as external + * state, without paying a second `list()`. */ async function ensureV2Record( userRecords: UserRecordStore, account: TAccount, legacyToken: string, -): Promise { +): Promise> { const record = buildSingleTokenRecord(account, legacyToken) if (userRecords.tryInsert) { - return userRecords.tryInsert(record) + const wrote = await userRecords.tryInsert(record) + if (wrote) return { written: true } + const existing = (await userRecords.list()).find((r) => r.account.id === account.id) + if (!existing) { + throw new Error('tryInsert returned false but no matching record was listed') + } + return { written: false, existing } } // Non-atomic path. Narrow time-of-check, time-of-use race between // `list()` and `upsert()`; acceptable for one-time migration since // concurrent runs would write the same shape. - const existing = await userRecords.list() - if (existing.some((r) => r.account.id === account.id)) return false + const all = await userRecords.list() + const existing = all.find((r) => r.account.id === account.id) + if (existing) return { written: false, existing } await userRecords.upsert(record) - return true + return { written: true } } /** - * Before retiring legacy state for an existing v2 record, confirm reads - * still work. A record from a prior `set()` / `setBundle()` has no - * `fallbackToken` and needs the keyring online; if it's offline, cleaning - * up would brick the user. Returns `null` when safe, or a skip descriptor - * the caller surfaces (leaving legacy state intact for retry). + * Phase 2: move the legacy token into the per-user keyring slot and + * upsert a clean (no `fallbackToken`) record. Inlined rather than + * delegating to `writeRecordWithKeyringFallback` so the offline-keyring + * branch doesn't double-upsert the same fallback record Phase 1 just + * wrote. Returns `null` on success (including the silently-handled + * SecureStoreUnavailable case); a skip descriptor when a non-keyring + * failure leaves the marker unset for retry. */ -async function verifyExistingRecordReadable( +async function runPhase2( userRecords: UserRecordStore, - account: TAccount, userSlot: SecureStore, + account: TAccount, + legacyToken: string, ): Promise<{ reason: MigrateSkipReason; detail: string } | null> { - const existing = (await userRecords.list()).find((r) => r.account.id === account.id) - if (existing?.fallbackToken?.trim()) return null try { - const raw = await userSlot.getSecret() - if (raw?.trim()) return null - return { - reason: 'user-record-write-failed', - detail: 'existing v2 record has no fallback token and its keyring slot is empty', - } + await userSlot.setSecret(legacyToken) } catch (error) { if (error instanceof SecureStoreUnavailableError) { - return { - reason: 'legacy-keyring-unreachable', - detail: 'existing v2 record has no fallback token and the per-user keyring slot is unreachable', - } - } - return { - reason: 'user-record-write-failed', - detail: getErrorMessage(error), + return null // Phase 1 fallback record continues to serve reads. } + return { reason: 'user-record-write-failed', detail: getErrorMessage(error) } + } + try { + await userRecords.upsert(buildSingleTokenRecord(account)) + } catch (error) { + return { reason: 'user-record-write-failed', detail: getErrorMessage(error) } } + return null } async function readLegacyToken( diff --git a/src/auth/keyring/record-write.ts b/src/auth/keyring/record-write.ts index a46f047..4c82d39 100644 --- a/src/auth/keyring/record-write.ts +++ b/src/auth/keyring/record-write.ts @@ -181,11 +181,14 @@ export async function writeBundleWithKeyringFallback( account: TAccount, diff --git a/src/auth/keyring/token-store.ts b/src/auth/keyring/token-store.ts index a2cbd47..8fb61bd 100644 --- a/src/auth/keyring/token-store.ts +++ b/src/auth/keyring/token-store.ts @@ -1,12 +1,12 @@ -import { CliError, getErrorMessage } from '../../errors.js' +import { CliError } from '../../errors.js' import type { AccountRef, AuthAccount, TokenBundle, TokenStore } from '../types.js' import { accountNotFoundError } from '../user-flag.js' +import { readAccessTokenForRecord } from './internal.js' import { writeBundleWithKeyringFallback, writeRecordWithKeyringFallback } from './record-write.js' import { createSecureStore, DEFAULT_ACCOUNT_FOR_USER, SECURE_STORE_DESCRIPTION, - SecureStoreUnavailableError, type SecureStore, } from './secure-store.js' import { refreshAccountSlot } from './slot-naming.js' @@ -223,36 +223,16 @@ export function createKeyringTokenStore( // returns the pre-PR1 snapshot shape — a future bundle-aware // read path lights up the refresh slot only when callers // actually need it (silent refresh). - const fallback = record.fallbackToken?.trim() - if (fallback) return { token: fallback, account: record.account } - - let raw: string | null - try { - raw = await secureStoreFor(record.account).getSecret() - } catch (error) { - if (error instanceof SecureStoreUnavailableError) { - throw new CliError( - 'AUTH_STORE_READ_FAILED', - `${SECURE_STORE_DESCRIPTION} unavailable; could not read stored token (${error.message})`, - ) - } - // Non-keyring backend failures wrap into the typed code too — - // a raw exception escaping `active()` would crash the CLI - // with no useful exit signal. - throw new CliError( - 'AUTH_STORE_READ_FAILED', - `Access-slot read failed (${getErrorMessage(error)})`, - ) - } - - const token = raw?.trim() - if (token) return { token, account: record.account } - - // Record exists, no `fallbackToken`, slot empty — corruption. - throw new CliError( - 'AUTH_STORE_READ_FAILED', - `${SECURE_STORE_DESCRIPTION} returned no credential for the stored account; the keyring entry may have been removed externally.`, - ) + const outcome = await readAccessTokenForRecord(record, secureStoreFor(record.account)) + if (outcome.ok) return { token: outcome.token, account: record.account } + // Map structured outcomes to the typed error contract. + const message = + outcome.reason === 'slot-empty' + ? `${SECURE_STORE_DESCRIPTION} returned no credential for the stored account; the keyring entry may have been removed externally.` + : outcome.reason === 'slot-unavailable' + ? `${SECURE_STORE_DESCRIPTION} unavailable; could not read stored token (${outcome.detail})` + : `Access-slot read failed (${outcome.detail})` + throw new CliError('AUTH_STORE_READ_FAILED', message) }, async set(account, token) {