From a9b9cd00d09204e32ad317948c6fa91ae7dbc1d5 Mon Sep 17 00:00:00 2001
From: Burak Yigit Kaya <byk@sentry.io>
Date: Fri, 26 Jun 2026 11:43:19 +0000
Subject: [PATCH 1/3] fix(debug-files): exit non-zero when files are dropped
 for size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A partial size-drop during upload silently exited 0. The all-dropped
cases already fail loudly (filterBySize throws ValidationError; the
scan-time path exits via doNothingToUpload), but a partial drop did
not: oversized files left no result entry, so they were never counted
as failures.

- uploadDebugFiles now returns oversized (filterBySize-dropped) files as
  `error` results — most relevant for in-memory --include-sources
  bundles, which bypass the scan-time gate.
- doUpload now receives oversizedCount/maxFileSize and exits non-zero
  when scan-time oversized files were dropped, matching doNothingToUpload.

Surfaces Warden finding 9LL-87A on #1140.
---
 src/commands/debug-files/upload.ts       | 27 +++++++++++-
 src/lib/api/debug-files.ts               | 56 ++++++++++++++++++++----
 test/commands/debug-files/upload.test.ts | 46 +++++++++++++++++++
 test/lib/api/debug-files.test.ts         | 14 ++++--
 4 files changed, 129 insertions(+), 14 deletions(-)

diff --git a/src/commands/debug-files/upload.ts b/src/commands/debug-files/upload.ts
index 079a8e868..9a6fde14c 100644
--- a/src/commands/debug-files/upload.ts
+++ b/src/commands/debug-files/upload.ts
@@ -372,7 +372,11 @@ function* doNothingToUpload(
 
 /**
  * Perform the upload, yield the result, and return a hint. Non-terminal states
- * (error, not_found) set the exit code and return a descriptive hint.
+ * (error, not_found) set the exit code and return a descriptive hint. Files
+ * skipped for exceeding the maximum file size — at scan time (`oversizedCount`)
+ * or at upload time (returned as `error` results by `uploadDebugFiles`) — also
+ * set a non-zero exit, so a partial size-drop is never reported as a clean
+ * success (consistent with the all-dropped path in `doNothingToUpload`).
  * Also honors `--require-all` against the requested `--id` values.
  */
 async function* doUpload(
@@ -385,6 +389,8 @@ async function* doUpload(
     maxWaitMs: number;
     missingRequestedIds: string[];
     requireAll: boolean;
+    oversizedCount: number;
+    maxFileSize: number;
     serverOptions?: ChunkServerOptions;
   }
 ) {
@@ -403,6 +409,14 @@ async function* doUpload(
     filesUploaded: results.length,
   });
 
+  // Scan-time oversized files were dropped before the queue was built, so they
+  // never appear in `results`. Note them on any failure hint and treat them as
+  // a failure in their own right below.
+  const scanOversize =
+    params.oversizedCount > 0
+      ? ` ${params.oversizedCount} file(s) were skipped for exceeding the maximum file size (${params.maxFileSize} bytes).`
+      : "";
+
   const failures = results.filter(
     (r) => r.state === "error" || r.state === "not_found"
   );
@@ -415,7 +429,14 @@ async function* doUpload(
       )
       .join("; ");
     return {
-      hint: `${failures.length === 1 ? "1 file" : `${failures.length} files`} had failures: ${details}`,
+      hint: `${failures.length === 1 ? "1 file" : `${failures.length} files`} had failures: ${details}.${scanOversize}`,
+    };
+  }
+
+  if (params.oversizedCount > 0) {
+    setExitCode(1);
+    return {
+      hint: `Uploaded ${results.length} debug file(s) to ${params.org}/${params.project}, but ${params.oversizedCount} file(s) were skipped for exceeding the maximum file size (${params.maxFileSize} bytes).`,
     };
   }
 
@@ -643,6 +664,8 @@ export const uploadCommand = buildCommand({
       maxWaitMs,
       missingRequestedIds: missingIds,
       requireAll,
+      oversizedCount,
+      maxFileSize,
     });
   },
 });
diff --git a/src/lib/api/debug-files.ts b/src/lib/api/debug-files.ts
index 0921f47d7..94a32273d 100644
--- a/src/lib/api/debug-files.ts
+++ b/src/lib/api/debug-files.ts
@@ -300,30 +300,61 @@ function buildResults(
 // ── API Functions ───────────────────────────────────────────────────
 
 /**
- * Drop files larger than the server-advertised per-file cap.
+ * Split files into those within the server-advertised per-file cap and those
+ * over it.
  *
  * @param difs - Files queued for upload.
  * @param maxFileSize - Server's per-file byte cap; `0` disables the gate.
- * @returns The subset within the cap. Oversized files are logged at `warn`.
+ * @returns `{ accepted, dropped }`. Oversized files are logged at `warn` and
+ *   returned in `dropped` so the caller can report them as failures rather
+ *   than silently discarding them.
  */
 function filterBySize(
   difs: DebugFileUpload[],
   maxFileSize: number
-): DebugFileUpload[] {
+): { accepted: DebugFileUpload[]; dropped: DebugFileUpload[] } {
   if (maxFileSize <= 0) {
-    return difs;
+    return { accepted: difs, dropped: [] };
   }
   const accepted: DebugFileUpload[] = [];
+  const dropped: DebugFileUpload[] = [];
   for (const dif of difs) {
     if (dif.content.length > maxFileSize) {
       log.warn(
         `Skipping ${dif.name}: size ${dif.content.length} exceeds server maximum file size ${maxFileSize}`
       );
+      dropped.push(dif);
       continue;
     }
     accepted.push(dif);
   }
-  return accepted;
+  return { accepted, dropped };
+}
+
+/**
+ * Build failed result entries for files dropped by the size gate.
+ *
+ * Oversized files are never chunked or assembled, so the server never returns a
+ * state for them. They are reported as `error` results — not silently skipped —
+ * so the command surfaces a non-zero exit and lists them. A partial size-drop
+ * (some files accepted, some over the cap) must not look like a clean success,
+ * mirroring the all-dropped case which throws.
+ *
+ * @param dropped - Files that exceeded `maxFileSize`.
+ * @param maxFileSize - The effective per-file cap, for the detail message.
+ * @returns One `error` result per dropped file (empty `checksum`, never hashed).
+ */
+function buildOversizeResults(
+  dropped: DebugFileUpload[],
+  maxFileSize: number
+): DebugFileUploadResult[] {
+  return dropped.map((dif) => ({
+    name: dif.name,
+    debugId: dif.debugId,
+    checksum: "",
+    state: "error" as const,
+    detail: `Exceeds server maximum file size of ${maxFileSize} bytes (was ${dif.content.length} bytes)`,
+  }));
 }
 
 /**
@@ -355,8 +386,10 @@ function clampMaxWait(requestedMs: number, serverMaxWaitSec: number): number {
  * The server re-parses each uploaded file and indexes every contained object
  * slice itself; `debugId` is advisory.
  *
- * Files larger than the server-advertised `maxFileSize` are dropped with a
- * warning, and the caller's `maxWaitMs` is clamped to the server's `maxWait`.
+ * Files larger than the server-advertised `maxFileSize` are not uploaded and
+ * are returned as `error` results (so a partial drop yields a non-zero exit
+ * rather than a misleading success); the caller's `maxWaitMs` is clamped to the
+ * server's `maxWait`.
  *
  * @param options - Upload configuration (org, project, files, wait mode).
  * @returns Per-file terminal/last-observed assembly state.
@@ -384,7 +417,7 @@ export async function uploadDebugFiles(
     serverOptions.maxFileSize && serverOptions.maxFileSize > 0
       ? serverOptions.maxFileSize
       : DEFAULT_MAX_DIF_SIZE;
-  const accepted = filterBySize(difs, effectiveMaxFileSize);
+  const { accepted, dropped } = filterBySize(difs, effectiveMaxFileSize);
 
   // `difs` is non-empty (checked above), so an empty `accepted` means every
   // file was dropped by the size gate. Fail loudly instead of silently
@@ -456,7 +489,12 @@ export async function uploadDebugFiles(
     });
   }
 
-  return buildResults(chunked, response);
+  // Append oversized files (dropped before assembly) as failed results so a
+  // partial size-drop produces a non-zero exit instead of a silent success.
+  return [
+    ...buildResults(chunked, response),
+    ...buildOversizeResults(dropped, effectiveMaxFileSize),
+  ];
 }
 
 /** Default maximum wait for server-side DIF processing (`--wait`). */
diff --git a/test/commands/debug-files/upload.test.ts b/test/commands/debug-files/upload.test.ts
index 02ac4ba40..3905ce5ce 100644
--- a/test/commands/debug-files/upload.test.ts
+++ b/test/commands/debug-files/upload.test.ts
@@ -346,6 +346,52 @@ describe("sentry debug-files upload", () => {
     expect(exitCode).not.toBe(0);
   });
 
+  test("a partial size-drop still uploads the rest but exits non-zero", async () => {
+    process.env.SENTRY_ORG = "test-org";
+    process.env.SENTRY_PROJECT = "test-project";
+    // One in-cap file and one valid-header file well over the cap.
+    await writeBreakpad("small.sym");
+    const bigBody = `${[
+      "MODULE Linux x86_64 1A23B5DA412AFBF7C8662048F3294F3D0 big",
+      "INFO CODE_ID DAA5130F2A41F7FBC8662048F3294F3D439CA7FF",
+    ].join("\n")}\n${Array.from(
+      { length: 500 },
+      (_, i) => `PUBLIC ${i.toString(16)} 0 sym_${i}`
+    ).join("\n")}`;
+    await writeFile(join(tempDir, "big.sym"), bigBody);
+
+    // Cap between the two sizes: the small file passes the scan gate, the big
+    // one is dropped (counted in oversizedCount) before it is ever read fully.
+    vi.spyOn(chunkUpload, "getChunkUploadOptions").mockResolvedValue({
+      url: "https://us.sentry.io/api/0/chunk-upload/",
+      chunkSize: 8192,
+      chunksPerRequest: 64,
+      maxRequestSize: 1_048_576,
+      maxFileSize: 1000,
+      hashAlgorithm: "sha1",
+      concurrency: 8,
+      compression: ["gzip"],
+    } as Awaited<ReturnType<typeof chunkUpload.getChunkUploadOptions>>);
+
+    const spy = vi.spyOn(debugFilesApi, "uploadDebugFiles").mockResolvedValue([
+      {
+        name: "small.sym",
+        debugId: KNOWN_DEBUG_ID,
+        checksum: "a".repeat(40),
+        state: "ok",
+        detail: null,
+      },
+    ]);
+
+    const { exitCode } = await runUpload([tempDir]);
+    // The in-cap file is still uploaded...
+    expect(spy).toHaveBeenCalledTimes(1);
+    expect(spy.mock.calls[0]?.[0]?.difs).toHaveLength(1);
+    // ...but the dropped oversized file makes the command fail loudly rather
+    // than reporting a clean success.
+    expect(exitCode).toBe(1);
+  });
+
   test("wait mode surfaces processing errors with a non-zero exit", async () => {
     process.env.SENTRY_ORG = "test-org";
     process.env.SENTRY_PROJECT = "test-project";
diff --git a/test/lib/api/debug-files.test.ts b/test/lib/api/debug-files.test.ts
index 279005344..c11509f34 100644
--- a/test/lib/api/debug-files.test.ts
+++ b/test/lib/api/debug-files.test.ts
@@ -373,7 +373,7 @@ describe("uploadDebugFiles", () => {
     expect(passedMissing.size).toBeGreaterThan(0);
   });
 
-  test("drops files larger than the server maxFileSize before assembling", async () => {
+  test("does not assemble oversized files but reports them as failures", async () => {
     setServerOptions({ maxFileSize: 5 });
     apiRequestToRegionMock.mockImplementationOnce(
       async (_url: string, _endpoint: string, init: { body: object }) => {
@@ -397,11 +397,19 @@ describe("uploadDebugFiles", () => {
       maxWaitMs: 1000,
     });
 
+    // Only the in-cap file is actually assembled.
     const body = lastAssembleBody();
     const names = Object.values(body).map((e) => e.name);
     expect(names).toEqual(["small.so"]);
-    expect(results).toHaveLength(1);
-    expect(results[0]?.name).toBe("small.so");
+
+    // But the oversized file is surfaced as an `error` result so a partial
+    // drop yields a non-zero exit instead of a silent success.
+    expect(results).toHaveLength(2);
+    const small = results.find((r) => r.name === "small.so");
+    const big = results.find((r) => r.name === "big.so");
+    expect(small?.state).toBe("ok");
+    expect(big?.state).toBe("error");
+    expect(big?.detail).toMatch(/maximum file size/);
   });
 
   test("throws when every file exceeds the server maxFileSize", async () => {

From 40f8047d7514f75421d61d5c1498862d266ee25d Mon Sep 17 00:00:00 2001
From: Burak Yigit Kaya <byk@sentry.io>
Date: Fri, 26 Jun 2026 11:46:18 +0000
Subject: [PATCH 2/3] chore: update .lore.md

---
 .lore.md | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 62 insertions(+), 5 deletions(-)

diff --git a/.lore.md b/.lore.md
index f87035bfe..fa40dda7c 100644
--- a/.lore.md
+++ b/.lore.md
@@ -82,6 +82,9 @@
 <!-- lore:019eeff8-90ac-7755-865e-3d6ab104de96 -->
 * **Issue list sort values: SortValue type, VALID\_SORT\_VALUES, getComparator, and SDK IssueSort**: In \`src/commands/issue/list.ts\`: \`SortValue\` type (line 141, @internal) and \`VALID\_SORT\_VALUES\` array (line 143) are the CLI's local sort constraints. \`IssueSort\` in \`src/lib/api/issues.ts\` (lines 40–42) is derived from \`@sentry/api\` SDK via \`NonNullable\<NonNullable\<ListAnOrganizationSissuesData\['query']>\['sort']>\` — no API layer change needed when adding new sort values. SDK already includes \`'date' | 'freq' | 'inbox' | 'new' | 'recommended' | 'trends' | 'user'\`. To add a new sort value: (1) add to \`SortValue\` union and \`VALID\_SORT\_VALUES\`, (2) add case to \`getComparator()\` switch (default falls back to date), (3) update flag \`brief\` string, (4) change \`default\` if needed. \`appendIssueFlags()\` guards \`--sort\` omission only when \`flags.sort !== 'date'\` — update guard if default changes.
 
+<!-- lore:019f03b8-30f2-70bc-8c5e-065ffdba05e2 -->
+* **maxZipTotalSize: 2GiB memory-safety budget separate from server maxFileSize policy**: \`PrepareDifsOptions.maxZipTotalSize\` (default \`DEFAULT\_MAX\_ZIP\_TOTAL\_SIZE\` = 2GiB) is a cumulative uncompressed-extraction budget per \`.zip\` archive and a container size cap — it bounds peak decompression memory. It is distinct from \`maxFileSize\` (per-entry server upload policy). Commands do not need to pass \`maxZipTotalSize\` explicitly; \`prepareDifs\` applies the 2GiB default automatically. \`0\` disables the budget. Passed through \`prepareZipDifs\` → \`readZipDifEntries\` as \`maxTotalSize\`.
+
 <!-- lore:019e4fae-a4fc-7425-a545-e10c37ec810e -->
 * **Node SEA ink sidecar: node:sea.getAsset() replaces Bun /$bunfs/ virtual FS**: (architecture) Node SEA ink sidecar: \`node:sea.getAsset()\` replaces Bun \`/$bunfs/\` virtual FS. Ink UI sidecar embedded via \`fossilize --assets dist-build/ink-app.js\`; asset key = raw CLI arg. At runtime: \`sea.getRawAsset('dist-build/ink-app.js')\`. Main bundle never calls \`import('ink')\` — sidecar pre-bundled by text-import-plugin. Dual-mode: detect SEA via \`createRequire(import.meta.url)('node:sea')\` with try/catch fallback. \`useSnapshot: true\` BROKEN. \`useCodeCache: true\` ~15% startup improvement but platform-specific V8 blob. Suppress \`ExperimentalWarning: SQLite\`: \`process.on('warning', ...)\` at very top of \`src/bin.ts\` BEFORE any imports. fossilize asset manifest key = \`basename(manifestPath)\`; entry keys = \`entry.file\`. \`new Worker(new URL(...))\` HANGS in SEA — use Blob+URL.createObjectURL.
 
@@ -92,7 +95,7 @@
 * **Sentry CLI resolve-target cascade has 5 priority levels with env var support**: Resolve-target cascade: (1) CLI flags, (2) SENTRY\_ORG/SENTRY\_PROJECT env vars, (3) SQLite defaults, (4) DSN auto-detection, (5) directory name inference. SENTRY\_PROJECT supports \`org/project\` combo — SENTRY\_ORG ignored if set. Schema v13 merged \`defaults\` table into \`metadata\` KV with keys \`defaults.{org,project,telemetry,url}\`; getters/setters in \`src/lib/db/defaults.ts\`. Prefer dedicated SQLite tables + migrations over \`metadata\` KV for non-trivial caches. Hidden global \`--org\`/\`--project\` flags: \`mergeGlobalFlags()\` in command.ts injects hidden flag shapes, \`applyOrgProjectFlags()\` writes to \`SENTRY\_ORG\`/\`SENTRY\_PROJECT\` before auth guard. No short aliases (\`-p\` conflicts). \`@sentry/api\` SDK: wrap types at \`src/lib/api/\*.ts\` with \`as unknown as SentryX\` casts; never leak to commands. \`unwrapResult\`/\`unwrapPaginatedResult\` must stay CLI-owned. \`apiRequestToRegion\` auto-sets JSON Content-Type; \`rawApiRequest\` preserves strings.
 
 <!-- lore:019f00ac-ffa5-7edd-93fb-6ff73f6a5075 -->
-* **sentry-cli skill install paths: ~/.claude/ and ~/.agents/ only — OpenCode is never a target**: Skill source-of-truth: \`plugins/sentry-cli/skills/sentry-cli/SKILL.md\` (602 lines) + \`references/\` (28 per-command .md files). \`installAgentSkills()\` in \`src/lib/agent-skills.ts\` installs to \`~/.agents/skills/sentry-cli/\` and \`~/.claude/skills/sentry-cli/\` only — no OpenCode path. OpenCode IS detected via \`OPENCODE\_CLIENT\` env in \`src/lib/detect-agent.ts\` but only for telemetry, not skill installation. \`.opencode/\` and \`opencode.json\*\` are gitignored (lines 72–74). Cursor symlinks live at \`.cursor/skills/sentry-cli/\` pointing into \`plugins/\`. OpenCode scans \`~/.claude/skills/\*\*/SKILL.md\` and \`~/.agents/\*\*/SKILL.md\` — \`.cursor/\` is NOT scanned. \`installAgentSkills()\` never creates top-level agent roots — their presence is the detection signal that the user already has a compatible agent installed. Skills are updated on every version bump (write-if-changed optimization is unnecessary). Writes use atomic rename: temp file \`.\<name>.\<pid>.\<rand>.tmp\` in same dir → \`rename()\` into place, guaranteeing readers never observe a partial file.
+* **sentry-cli skill install paths: ~/.claude/ and ~/.agents/ only — OpenCode is never a target**: Skill source-of-truth: \`plugins/sentry-cli/skills/sentry-cli/SKILL.md\` (602 lines) + \`references/\` (28 per-command .md files). \`installAgentSkills()\` in \`src/lib/agent-skills.ts\` installs to \`~/.agents/skills/sentry-cli/\` and \`~/.claude/skills/sentry-cli/\` only — no OpenCode path. OpenCode IS detected via \`OPENCODE\_CLIENT\` env in \`src/lib/detect-agent.ts\` but only for telemetry, not skill installation. \`.opencode/\` and \`opencode.json\*\` are gitignored. Cursor symlinks live at \`.cursor/skills/sentry-cli/\` pointing into \`plugins/\`. \`installAgentSkills()\` never creates top-level agent roots — their presence is the detection signal. Skills are updated on every version bump. Writes use atomic rename: temp file \`.\<name>.\<pid>.\<rand>.tmp\` in same dir → \`rename()\` into place. Temp file dot-prefix ensures it never matches the \`SKILL.md\` glob agents scan. \`SKILL\_FILES\` is a \`ReadonlyMap\<string, string>\` with 28 entries; \`SKILL.md\` is written last (entry 28) — favorable ordering for discovery race. Windows \`rename()\` raises EPERM/EBUSY if dest is open — atomicity guarantee is POSIX-only.
 
 <!-- lore:019e51e9-4acc-7f5e-aceb-c647d96a1bcc -->
 * **src/cli.ts: middleware chain, completion optimization, sensitive argv redaction**: \`src/cli.ts\` exports \`startCli()\`, \`runCli()\`, \`runCompletion()\`. Middleware chain (innermost-first): \`\[seerTrialMiddleware, autoAuthMiddleware]\` — auth is outermost. \`autoAuthMiddleware\` uses \`isatty(0)\` not \`process.stdin.isTTY\` (Bun returns undefined). \`runCompletion()\` sets \`SENTRY\_CLI\_NO\_TELEMETRY=1\` to skip \`@sentry/node-core\` lazy-require (~280ms). \`redactArgv()\` handles \`--flag=value\` and \`--flag \<value>\` forms; \`SENSITIVE\_ARGV\_FLAGS\` includes \`token\` and \`auth-token\`. \`reportUnknownCommand()\` wrapped in try/catch — telemetry must never crash CLI. \`preloadProjectContext()\` calls \`captureEnvTokenHost()\` BEFORE any env mutation.
@@ -118,7 +121,7 @@
 * **Decided to use job ID instead**: decided to use job ID instead.
 
 <!-- lore:019e5ae3-dee1-78a6-96b5-30ea1d55e82a -->
-* **Migrated to node**: migrated from Bun to Node.
+* **Migrated to node**: Migrated to Node.js (from Bun). Migration complete as of fossilize 0.7.0 update. Stack: pnpm, vitest, tsx, Node SEA via fossilize. PRs #1017, #1018, #1019 on getsentry/cli. Benchmarks vs v0.34.0 (Bun): download 32MB→30MB, startup ~1s both, shell completions 180ms→150ms. All macOS binaries signed+notarized. fossilize handles SEA builds with V8 code cache on linux+macOS, strips debug symbols automatically. \`bun.lock\` deleted, \`vitest.config.ts\` added, all test files migrated to vitest. \`script/build.ts\` uses fossilize (\`--no-bundle\`) with esbuild for bundling — does NOT use \`Bun.build({ compile: true })\`.
 
 <!-- lore:019e56fb-b48b-7625-a591-85cdc2df10d3 -->
 * **Node.js slim build flags for SEA binary size reduction**: Node.js configure.py size-reduction flags for SEA builds: \`--with-intl=small-icu\` (English-only ICU, saves ~26-28 MiB — biggest win; CLI uses hardcoded en-US/sv-SE locales, safe); \`--with-intl=none\` (saves ~28-30 MiB but breaks \`Intl.NumberFormat\`/\`String.normalize()\` — NOT safe for this CLI); \`--without-inspector\` saves ~2-4 MiB; \`--without-amaro\` saves ~0.5 MiB; \`--v8-disable-maglev\` saves ~1-2 MiB; \`--enable-lto\` saves ~3-5 MiB. AVOID: \`--without-ssl\` (breaks HTTPS), \`--without-lief\` (BREAKS SEA), \`--without-sqlite\` (BREAKS CLI — uses node:sqlite), \`--disable-single-executable-application\` (BREAKS EVERYTHING), \`--v8-lite-mode\` (10x slower). Custom build deferred indefinitely — requires 5 native CI runners, ~3.5h cold build vs 5min fossilize. Cross-compilation from Linux to darwin NOT officially supported.
@@ -146,6 +149,9 @@
 <!-- lore:019ef48a-9ebc-78b8-926b-65582ac4d5ca -->
 * **acquireLock ENOENT: parent directory may not exist — must mkdir before writeFileSync**: Trap: \`acquireLock(lockPath)\` calls \`writeFileSync(lockPath, pid, { flag: 'wx' })\` — looks safe because the lock path is derived from the install path. But the parent directory (e.g. \`~/.sentry/bin\` or a stale \`/tmp/sentry-test-install/\`) may not exist, causing ENOENT (Sentry issue CLI-1E1). Fix: call \`mkdirSync(dirname(lockPath), { recursive: true, mode: 0o755 })\` BEFORE the try block in \`acquireLock\` (\`src/lib/binary.ts\`). CRITICAL: keep mkdir OUTSIDE the try block — if inside, mkdir's EEXIST error routes into \`handleExistingLock\`, obscuring root cause (comment claiming infinite recursion is inaccurate: actual path is EEXIST→handleExistingLock→readFileSync→ENOTDIR, single throw). The directory creation is idempotent — \`recursive: true\` is a no-op if it already exists. Merged in PR #1125 (squash \`ee768454c\`).
 
+<!-- lore:019f03bb-f9cf-7208-a183-d4f0074480f9 -->
+* **Atomic-write test tautology: 'no temp files' test passes even without atomic behavior**: Trap: a test that checks 'no \`.tmp\` files left behind after install' looks like it guards atomic write behavior. But if the non-atomic path (\`writeFile\` directly) never creates \`.tmp\` files in the first place, the \`leftovers\` filter is trivially empty and the test passes with the atomic code removed entirely — confirmed via mutation test on \`byk/fix/agent-skills-atomic-write\`. Fix: the test must exercise the overwrite/update path (where a concurrent reader could observe truncation) AND inject a fault (e.g., mock \`rename\` to throw) to verify cleanup. A test that only runs the happy-path fresh-install never touches the race condition the feature guards against. Per red-green directive: if the test passes with the guard deleted, it isn't testing the guard.
+
 <!-- lore:019e6b25-30c1-7fa5-9f5e-c6fd35b2e396 -->
 * **batch-queue.ts: 404 from upstream treated as transient — provider never disabled**: Trap: \`BatchProvider.submit()\` returns \`null\` for any non-401/403 HTTP error, including 404. \`submitBatch()\` treats \`null\` as transient and falls back — no disable happens. For providers that don't implement \`/v1/messages/batches\` (e.g. MiniMax), this causes a wasted HTTP round-trip every 30s forever. Fix: add \`"not-found"\` return value for 404 in both Anthropic and OpenAI submit methods. In \`submitBatch()\`, handle \`"not-found"\` with provider-level disable: add \`disabledBatchProviders: Set\<string>\` (keyed by provider name), persist to \`kv\_meta\` via \`setKV()\`, restore on startup. Add fast-path bypass in both \`flush()\` and \`prompt()\`. Provider-level (not per-session) because the URL is baked in at construction — one provider per process. \`groupKey()\` = \`authFingerprint(cred)|providerID\`; per-credential disable was removed in favor of per-session historically.
 
@@ -155,6 +161,12 @@
 <!-- lore:019ef4b1-50f9-7612-8918-c3e086a9ffbf -->
 * **Biome noParameterProperties: never use TypeScript constructor parameter properties in class definitions**: Trap: TypeScript parameter properties (\`constructor(private readonly handle: FileHandle)\`) look like idiomatic shorthand and compile fine. But Biome enforces \`noParameterProperties\` and will error. Fix: always declare explicit class fields and assign in constructor body. Applies to all new classes in \`src/lib/\*\*/\*.ts\`. Caught during \`FileOldReader\`/\`MemoryOldReader\` addition in \`bspatch.ts\` — 4 Biome errors at lines 281, 310, 311, 312.
 
+<!-- lore:019f03bb-fa01-76c2-9acf-42a059c4fe6a -->
+* **Biome stdin check reports 'contents aren't fixed' false positive — use file path instead**: Trap: running \`biome check --stdin-file-path=\<file>\` and piping content exits with code 1 and 'contents aren't fixed' even when \`--write\` produces no diff — looks like a real formatting violation. Fix: run Biome directly on the file path (\`biome check \<file>\`) rather than via stdin mode. The stdin mode false positive was confirmed during the \`agent-skills.ts\` review: exit code 1 from stdin, but \`--write\` produced zero changes. Always use file-path invocation for authoritative Biome results.
+
+<!-- lore:019f03b9-97b3-7520-be29-bd6ac87986fd -->
+* **build-binary job in ci.yml uses setup-node — not Bun-only as commonly assumed**: Trap: \`build-binary\` job in \`ci.yml\` is assumed to use only Bun (not \`setup-node\`) because the binary build pipeline uses fossilize/esbuild. But \`ci.yml\` lines 261–263 contain an \`actions/setup-node@v6\` step inside \`build-binary\` — PR #1145 changed it from \`node-version: "22"\` to \`${{ env.NODE\_VERSION\_22 }}\`. The PR description incorrectly stated \`build-binary\` is unaffected. Fix: always grep \`ci.yml\` for \`setup-node\` rather than assuming job intent from its name. The Node install in \`build-binary\` is likely for pnpm/tooling, not the compiled artifact.
+
 <!-- lore:019ef4c9-4cc4-78d0-9f45-fdd69b883ae5 -->
 * **bundle-sources.ts exit code 1 for no-sources relies on cli.ts never resetting exitCode — fragile invariant**: Trap: using \`this.process.exitCode = 1\` directly (at \`bundle-sources.ts:145\`) instead of throwing \`OutputError\`(=60) looks like a valid shortcut. It works today because \`cli.ts:622-649\` never resets \`exitCode\` on a clean return. But this is a fragile invariant — if \`cli.ts\` ever resets \`exitCode\`, the no-sources case silently exits 0. The \`OutputError\`(=60) idiom is the correct pattern. The \`exitCode=1\` approach was kept for consistency with \`check.ts\` pattern; a comment was added explaining the choice. Consider migrating to \`OutputError\` in a future cleanup.
 
@@ -176,6 +188,9 @@
 <!-- lore:019e6121-ffcc-74cd-acfa-49b4ef875755 -->
 * **DEVELOPMENT.md hand-written prose is not covered by any staleness check**: RESOLVED in PR #1024. \`DEVELOPMENT.md\` hand-written prose is now wrapped in \`GENERATED:START/END\` markers: \`dev-prereq\` (lines 4-7) and \`build-toolchain\` (lines 91-97). These sections are now auto-generated from \`package.json\` by \`generate-docs-sections.ts\` and validated by \`check:docs-sections --check\`. The only remaining non-generated prose in \`DEVELOPMENT.md\` is the OAuth app setup instructions and architecture description — these don't reference toolchain versions. \`generate-docs-sections.ts\` no longer contains any Bun references; \`extractPnpmVersion()\` and \`extractNodeVersion()\` throw on mismatch.
 
+<!-- lore:019f03b9-975b-7854-8a18-e1b79f1c2e2d -->
+* **eval-skill-fork.yml missing setup-node — fork PRs exposed to Node regression**: Trap: PR #1145 pinned Node versions in all 4 main workflow files and the PR description claimed completeness. But \`.github/workflows/eval-skill-fork.yml\` job \`eval\` (lines 27–57) has no \`actions/setup-node\` step — it runs on the runner's system Node. This is the exact workflow exhibiting \`ERR\_STREAM\_PREMATURE\_CLOSE\` (CVE-2026-48931 regression in Node 24.17.0/22.23.0). Fix: add \`actions/setup-node@v6\` with \`node-version: ${{ env.NODE\_VERSION\_24 }}\` before \`pnpm install\` in that job. When pinning Node versions across CI, always grep ALL workflow files for \`setup-node\` AND check for jobs that invoke Node without an explicit setup step.
+
 <!-- lore:019e84e0-a08b-7eab-8da9-2da09019883c -->
 * **existsSync+realpathSync TOCTOU: catch ENOENT instead**: Trap: \`if (!existsSync(p)) return resolve(p); return realpathSync(p)\` looks safe but has a TOCTOU race. Also: \`realpathSync\` inside async is inconsistent. Fix: call \`await realpath(p)\` (node:fs/promises) directly; catch \`ENOENT\` to fall back to \`resolve(p)\`; log non-ENOENT errors via \`logger.debug(msg, error)\` before falling back. When mocking in vitest, mock \`node:fs/promises\` not \`node:fs\`. RELATED: In cleanup/unlink catch blocks, only log non-ENOENT errors — \`ENOENT\` during cleanup is expected. Pattern: \`if ((error as NodeJS.ErrnoException).code !== 'ENOENT') logger.debug(msg, error)\`. Pre-existing silent \`catch { // Ignore }\` blocks must be fixed to log non-ENOENT errors. Confirmed fixed in PR #1046 (\`fix/install-binary-symlink-self-copy\`).
 
@@ -239,6 +254,12 @@
 <!-- lore:019f0072-5148-7a13-afdb-e5be810ae3d0 -->
 * **pnpm test triggers generate:docs + generate:sdk pre-steps — always times out in CI-like environments**: Trap: \`pnpm test\` looks like the standard way to run tests. But \`test:unit\` = \`pnpm run generate:docs && pnpm run generate:sdk && vitest run test/lib test/commands test/types --coverage\` — the doc/SDK generation pre-steps cause 120s+ timeouts. Fix: run vitest directly on specific test files: \`npx vitest run test/lib/dif test/commands/debug-files\` (or similar scoped paths). Use \`pnpm test\` only for final pre-commit validation. \`test:init-eval\` is the only script without the preamble (uses \`--testTimeout 600000\` instead).
 
+<!-- lore:019f03b8-30b0-7c00-82f5-c2ab2ff9c8e4 -->
+* **prepareZipDifs null return means 'not a zip' OR 'container skipped' — non-null (even empty) means fully handled**: Trap: \`null\` from \`prepareZipDifs\` previously meant only 'not a ZIP'. After the maxTotalSize refactor, \`null\` also means 'container skipped wholesale (too large / malformed)' — both cases should fall through to normal file handling. Fix: the contract is now 'non-null result (even empty array) means the path was a \`.zip\` and is fully handled; \`null\` means fall through.' The \`continue\` in \`prepareDifs\` fires on any non-null result, so an empty array correctly skips the file without treating it as a non-ZIP. Document this dual-null semantics in JSDoc on \`prepareZipDifs\`.
+
+<!-- lore:019f03b8-3069-7e71-87b6-97ddf1fb5363 -->
+* **prepareZipDifs oversizedCount must NOT feed exit-driving counter — format unknown pre-decompression**: Trap: zip entry oversized warnings look like they should increment \`oversizedCount\` in \`prepareDifs\`, just like on-disk file oversize does — both are 'too large' signals. But a compressed entry's format is unknown until inflated, so it can't be attributed to the requested \`--type\`. Counting it would turn an unrelated oversized asset inside a \`.zip\` into a false 'all matched files too large' failure and wrong exit code. Fix: \`prepareZipDifs\` returns \`PreparedDif\[]\` (not \`{ prepared, oversizedCount }\`); oversized zip entries warn per-entry inside \`readZipDifEntries\` only. The \`oversizedCount\` counter in \`prepareDifs\` is exclusively for format-accurate on-disk files.
+
 <!-- lore:019efa43-b28c-727b-a2d2-d41070cf7975 -->
 * **print-sources must never claim to preview bundle-sources while listing sources bundle-sources would skip**: Trap: \`print-sources\` iterates ALL objects in an archive (full inspection value), while \`bundle-sources\` only bundles the first object with debug info (\`selectBundledObject\`). Listing all objects then showing a \`bundle-sources\` hint looks like a preview of that command — but it isn't. Fix: add multi-object warning naming the exact slice that would be bundled (via \`selectBundledObject\`), add \`hasDebugInfo\` to JSON output so consumers can apply the same selection rule, and ensure footer hint distinguishes no-objects / enumeration-failure / no-sources. 🔴 Directive: never claim to preview \`bundle-sources\` while collecting sources \`bundle-sources\` would never collect.
 
@@ -293,6 +314,9 @@
 <!-- lore:019db0c9-9cc7-7352-b1f2-61b34b87b252 -->
 * **Whole-buffer matchAll slower than split+test when aggregated over many files**: Grep/scan traps in \`src/lib/scan/\`: (1) Whole-buffer \`regex.exec\` 12× faster per-file but ~1.6× SLOWER over 10k files — early-exit at \`maxResults\` via \`mapFilesConcurrent.onResult\` wins. (2) Literal prefilter is FILE-LEVEL gate (\`indexOf\`→skip); per-line verify breaks cross-newline patterns and Unicode length-changing \`toLowerCase\`. (3) Extractor \`hasTopLevelAlternation\`+\`skipGroup\` must call \`skipCharacterClass\`. (4) Wake-latch race: use latched \`pendingWake\` flag, not \`let notify=null; await new Promise(r=>notify=r)\`. (5) \`mapFilesConcurrent\` filters \`null\` but NOT \`\[]\` — return \`null\` for no-op files. (6) \`collectGlob\`/\`collectGrep\` must NOT forward \`maxResults\` to iterator; drain uncapped, set \`truncated=true\`. Worker pool: lazy singleton, size \`min(8, max(2, availableParallelism()))\`. Matches encoded as \`Uint32Array\` quads transferred via \`postMessage\` (~40% faster). \`new Worker(new URL(...))\` HANGS in SEA binaries — use Blob+URL.createObjectURL. FIFO \`pending\` queue per worker. \`ref()\`/\`unref()\` idempotent — only unref when \`inflight\` drops to 0. Disable via \`SENTRY\_SCAN\_DISABLE\_WORKERS=1\`.
 
+<!-- lore:019f03bb-fa39-7b96-8527-f11ac2b9e67b -->
+* **Windows rename() raises EPERM/EBUSY on open destination — atomic write is POSIX-only**: Trap: \`rename()\` for atomic file swap looks cross-platform because Node.js exposes it on all OSes. But on Windows, if the destination file is open by a concurrent reader, \`rename()\` raises EPERM or EBUSY — the swap fails rather than being atomic. The JSDoc claim 'eliminates the truncation race' is unqualified but only holds on POSIX. \`win32-x64\` is a shipped sentry-cli target. Impact is graceful (write returns null, captureException fires) but the atomicity guarantee must be documented as POSIX-only. Fix: qualify JSDoc with 'on POSIX systems'; on Windows the fallback is non-atomic \`writeFile\` (same as before the patch).
+
 <!-- lore:019ef403-c9d7-7122-ac42-71eaee7d2312 -->
 * **worktree node\_modules symlink causes esbuild host/binary version mismatch**: Trap: when running multiple agent worktrees in parallel, \`node\_modules\` may be a symlink to a sibling worktree's install — looks fine until esbuild runs. Error: 'Host version X does not match binary version Y' crashes all generate scripts and typecheck. Fix: remove the symlink and run \`pnpm install\` in the worktree root to get an independent \`node\_modules\`. The symlink is created by mutation-test workflows that intentionally share \`node\_modules\` for speed — but only safe when both worktrees are on the same branch/lockfile.
 
@@ -364,6 +388,9 @@
 <!-- lore:019e6abf-8e5c-7a3c-9333-55d15534fb6d -->
 * **IssueViewOutputSchema: extends SentryIssueSchema with enrichment fields for --fields discoverability**: \`IssueViewOutputSchema\` in \`src/types/sentry.ts\` extends \`SentryIssueSchema\` with 4 enrichment fields added by \`jsonTransformIssueView\`: \`event\` (z.unknown().nullable().optional() — too complex to enumerate; documenting all sub-fields would bloat the table without adding useful \`--fields\` discoverability), \`org\` (string | null), \`replayIds\` (array), \`trace\` (object with \`traceId\`+\`spans\`, nullable). Wired via \`schema: IssueViewOutputSchema\` on the \`output\` config in \`view.ts\`. Pattern mirrors \`ReplayViewOutputSchema\` in \`src/types/replay.ts:356-367\`. Exported from \`src/types/index.ts\`. Generates a JSON Fields table in \`references/issue.md\` and \`--help\` output. \`event\` uses \`z.unknown()\` because documenting all sub-fields (stacktraces, breadcrumbs, contexts) would bloat the table without adding useful \`--fields\` discoverability.
 
+<!-- lore:019f03b9-97f4-7bb6-aeba-27fa5aeca79b -->
+* **Node version pinning convention: workflow-level env vars NODE\_VERSION\_22 / NODE\_VERSION\_24**: As of PR #1145, all GitHub Actions workflows in sentry-cli (TypeScript) centralize Node version pins as workflow-level \`env\` vars: \`NODE\_VERSION\_22: "22.23.1"\` and \`NODE\_VERSION\_24: "24.18.0"\`. All \`actions/setup-node\` steps reference \`${{ env.NODE\_VERSION\_22 }}\` or \`${{ env.NODE\_VERSION\_24 }}\` — no bare \`"22"\`/\`"24"\` strings. Matrix jobs use ternary: \`${{ matrix.node == '24' && env.NODE\_VERSION\_24 || env.NODE\_VERSION\_22 }}\`. Motivation: Node 24.17.0/22.23.0 shipped \`ERR\_STREAM\_PREMATURE\_CLOSE\` regression (CVE-2026-48931 http.Agent fix); fixed in 24.18.0/22.23.1 (nodejs/node#64004). When bumping Node, update the \`env\` block in each workflow file.
+
 <!-- lore:019da137-2058-7cab-8b2d-f1b78f110db3 -->
 * **Preserve ApiError type so classifySilenced can silence 4xx errors**: Preserve ApiError type for classifySilenced: \`classifySilenced\` (src/lib/error-reporting.ts) only silences \`ApiError\` with status 401-499 — wrapping in generic \`CliError\` loses \`status\` and causes 403s to be captured. Re-throw via \`new ApiError(msg, error.status, error.detail, error.endpoint)\` with terse message (\`ApiError.format()\` appends detail/endpoint). \`ValidationError\` without \`field\` collapses unfielded errors into one fingerprint; always pass \`field\`. Fingerprint rule changes don't retroactively re-fingerprint — manually merge new groups into canonical old parents. \`ApiError\` rule keys by \`api\_status + command\`.
 
@@ -408,6 +435,9 @@
 <!-- lore:019efaa6-2b5c-74ac-8655-2b8f786d2370 -->
 * **Always check git status after tests pass before committing**: Always check git status after tests pass before committing: After a full test suite run completes successfully, check git status to review all changed files before staging. Run \`git status\` (and often \`git diff\`) after confirming a clean test run to enumerate modified/new files — including generated files, docs, and skill files — before committing. Also verify \`git log\` and \`git diff origin/\<branch>\` to confirm no unexpected commits from parallel agent sessions (parallel agents can commit stray \`.lore.md\` changes to wrong branches).
 
+<!-- lore:019f037e-e883-7b71-96c0-7e7b5f3f0e13 -->
+* **Always check VCS type (jj vs git) before performing version control operations**: Before pushing branches, creating PRs, or performing any VCS operations, the user expects the assistant to first determine whether the repository uses jj (check for .jj/ directory) or plain git. This check should happen automatically as part of the PR/commit workflow without needing to be asked. If jj is detected, use jj-specific commands and the jj-guide skill. If plain git, proceed with standard git/gh commands. This VCS detection step is a prerequisite to any branch or PR operation.
+
 <!-- lore:019e6147-fc4d-750f-9eeb-8dc4e1261f21 -->
 * **Always clarify that the repo uses plain git (not jj) when jj commands fail**: When a jj command fails with 'no jj repo in .', the user consistently clarifies that the repo is a plain git repo and that jj's 'never fails on conflict' behavior is being referenced conceptually — meaning conflicts should be recorded/resolved rather than aborting operations. The agent should: (1) fall back to git commands immediately without retrying jj, (2) handle merge conflicts by stashing, pulling, and resolving (e.g., \`git checkout --theirs\` for files like \`.lore.md\`), and (3) not attempt \`jj git init\` or any jj initialization. This pattern appears at the start of every build session.
 
@@ -423,9 +453,15 @@
 <!-- lore:019f010e-4d23-7188-921d-9d2956b1e504 -->
 * **Always design tests with explicit edge case rationale before implementation**: When planning tests, the user specifies not just what to test but \*why\* each test case exists — documenting the exact scenario, the specific input values chosen (e.g., 'two entries of 10,000 zero bytes — highly compressible'), and the expected behavior with reasoning (e.g., 'old behavior was buggy'). Tests are categorized by regression type (M1, M2, H1) and designed to target specific failure modes like zip-bomb bypasses, budget edge cases, and unsupported compression methods. Follow this pattern by proposing test cases with named categories, explicit input rationale, and documented expected outputs before writing any test code.
 
+<!-- lore:019f03bc-8d8a-70ca-bb1e-43e566a02e87 -->
+* **Always distinguish E2E skill-eval failures as Anthropic API flakes, not code regressions**: When E2E tests fail on \`skill-eval.test.ts\`, the user consistently expects the assistant to immediately check whether the failure is due to Anthropic API outages ('Premature close' errors on api.anthropic.com) rather than code regressions. The assistant should: (1) confirm all other checks (lint, unit, warden, build) are green, (2) identify the specific error pattern in logs, (3) classify the failure as infrastructure flake vs. regression, (4) avoid unnecessary re-runs during confirmed sustained outages, and (5) present the user with clear options (retry vs. admin override) rather than making the merge decision unilaterally. The user treats this as a known flaky test category requiring triage, not automatic re-runs.
+
 <!-- lore:019e6896-afff-7bbc-828a-dcec56e22c4f -->
 * **Always document invariants and non-obvious design decisions as inline code comments or JSDoc**: When implementing functions, types, or test utilities with non-obvious invariants, the user consistently adds explicit inline comments or JSDoc explaining \*why\* a design choice was made — not just what it does. Examples: why a function always returns non-null, why a branch is intentionally omitted, why a specific API is used over an alternative, and what would break if the pattern changed. These comments are written as assertions ('Always returned (never null)...', 'Always restore — never delete.') and reference the downstream consumer or failure mode. When generating or modifying code, always include this kind of explanatory commentary for invariants, idempotency guarantees, and intentional omissions.
 
+<!-- lore:019f03a1-2300-7925-bbf5-1554a9e11b56 -->
+* **Always establish repo/worktree context and toolchain constraints before starting code review**: At the start of every adversarial code review session, the user explicitly declares: the repo path and any worktree paths, the active branch, the Node/pnpm/volta binary locations, how to run tests (vitest) and linting (biome) including exact commands and which directory to run them from, where node\_modules exist (main checkout only, not worktree), which files are under review, and the merge base. This context is treated as immutable constraints for the session. The assistant should absorb all of this upfront metadata before doing any analysis, and never assume defaults — always use the explicitly stated paths, commands, and file locations.
+
 <!-- lore:019e69c4-387a-79c9-971c-94ea2c1053c9 -->
 * **Always explore codebase systematically before implementing changes**: When investigating a feature or bug, the user consistently requests reads of multiple related files in sequence before any implementation: the command file, the API layer, the type definitions, the formatters, and any generated/skill files. They want to understand the complete data flow end-to-end (API response → types → command → output/formatter) before making changes. Always read all relevant files across these layers when asked to investigate or analyze a feature, rather than jumping straight to implementation or reading only one file.
 
@@ -444,11 +480,20 @@
 <!-- lore:019f011e-25ed-78a8-9ac2-a89177197198 -->
 * **Always investigate unexpected file modifications before committing**: When reviewing git status or diffs before committing, the user expects the agent to flag and investigate any modified files that the agent did not explicitly change during the session. The agent should identify the provenance of unexpected changes (e.g., leftover from prior sessions, side effects of test runs, or generate steps) before staging and committing. The agent should never silently include unrecognized modifications in a commit without first explaining why those files are present in the diff.
 
+<!-- lore:019f03ad-d611-7ba1-b1c9-d79d45e1d244 -->
+* **Always isolate atomic fixes into separate branches/worktrees away from feature work**: When the user has an unrelated atomic fix discovered while working on a feature branch, they consistently isolate it into a separate branch and worktree rather than committing it to the feature branch. The pattern: stash only the relevant files, create a new worktree off origin/main, apply the stash there, and open a separate PR. This keeps the feature working tree undisturbed and produces a clean, reviewable atomic commit. The user expects the assistant to proactively suggest and execute this isolation strategy when a fix is unrelated to the current feature branch.
+
 <!-- lore:019edfc1-0d4d-7418-92b9-b925020bff8f -->
 * **Always keep work-in-progress repos at permanent paths under ~/Code/getsentry/**: When working on a repository across sessions, the user expects it to live at a permanent location under \`~/Code/getsentry/\<repo-name>\` (e.g., \`~/Code/getsentry/symbolic\`, \`~/Code/getsentry/cli\`), not in ephemeral directories like \`/tmp\`. If a clone exists in \`/tmp\`, move it to the permanent location and remove the temporary one. This applies to any \`getsentry\` org repo being actively developed on. Always use the permanent path when referencing or operating on the repo in subsequent steps.
 
 <!-- lore:019e517a-f897-7dbd-8c96-a96ee5b9abaa -->
-* **Always migrate Bun-specific APIs and tooling to Node.js equivalents**: 🔴 Directive (repeated 25+ sessions): ALWAYS prefer editing existing files in the codebase. NEVER write new files unless explicitly required. NEVER proactively create documentation files (\*.md) or README files — only create documentation files if explicitly requested by the user.
+* **Always migrate Bun-specific APIs and tooling to Node.js equivalents**: Migrating from Bun to Node.js/pnpm. Replace Bun-specific APIs: \`Bun.spawn\`→\`node:child\_process\`, \`Bun.sleep\`→\`node:timers/promises\`, \`bun:sqlite\`→\`node:sqlite\`, \`bun run\`→\`pnpm run\`/\`tsx\`, \`bun.lock\`→pnpm lockfile. All packages in \`devDependencies\` (never \`dependencies\`). Exception: \`script/build.ts\` uses fossilize (not \`Bun.build\`) and stays on Bun for the build-binary CI job. \`script/bundle.ts\` (npm bundle) uses esbuild via tsx and is Node-native. \`packageManager\` field in package.json: \`pnpm@10.11.0\`. After each migration phase, ensure lint and tests pass before committing. Migration is complete as of main branch (bun.lock deleted, vitest.config.ts added, all test files migrated to vitest).
+
+<!-- lore:019f03b7-69b6-765e-9d5d-7b1fc9a0de4a -->
+* **Always mutation-test new tests to verify they fail without the guarded behavior**: When reviewing or writing tests, the user performs mutation testing to confirm a test is non-tautological: they revert or remove the specific implementation behavior the test is supposed to guard (e.g., replacing \`atomicWriteFile\` with plain \`writeFile\`, or moving \`mkdirSync\` inside a try block), then re-run the suite. If the test still passes after the mutation, the user flags it as a blocking defect ('vacuous' or 'tautological') and requires a fix before the PR can ship. This applies especially to tests that assert absence of side effects (e.g., no leftover temp files, no recursion) where the assertion may be trivially true even without the feature.
+
+<!-- lore:019f03a8-6f27-7ef6-a29e-388092f310fa -->
+* **Always perform adversarial/mutation testing to verify tests actually guard the feature**: The user consistently validates that tests are non-tautological by performing mutation testing — removing or replacing the feature under test (e.g., swapping atomic writes for plain writes) and confirming the tests fail. When tests pass despite the mutation, the user flags them as blocking issues ('tautological guard'). Apply this standard when writing or reviewing tests: a test that passes both with and without the implementation is considered a blocking defect, not a passing test. Always ensure tests would fail if the feature were removed.
 
 <!-- lore:019e6a25-bb28-7a14-9e33-476862ccbf31 -->
 * **Always perform thorough codebase exploration before designing or implementing fixes**: When investigating a bug or feature, the user consistently requests comprehensive upfront exploration across multiple files before any code changes. This includes: reading relevant command and API files completely, searching for all references to key terms/parameters, checking type definitions in SDK/node\_modules, and understanding the full data flow from flags to API calls. The user expects the assistant to map out the entire call chain, identify misleading comments, and surface all related code paths before proposing a solution. Do not jump to fixes — first read all relevant files thoroughly and report findings.
@@ -471,9 +516,15 @@
 <!-- lore:019e56e7-1757-7b5c-9d3b-059491b69f0e -->
 * **Always reference external tools and prior art when exploring build/size optimization approaches**: When investigating build pipeline improvements or binary size reduction, the user consistently references specific external tools, repos, and contacts (e.g., Vercel's build-binary.mjs, binpunch, fossilize, Melkey's work) as starting points for evaluation. They expect the assistant to analyze whether each referenced approach actually applies to their specific setup before recommending it. The user wants a clear breakdown of what's relevant vs. irrelevant given their actual architecture (e.g., 'we already use esbuild full bundling, so node\_modules stripping doesn't apply'), followed by concrete alternative opportunities ranked by impact.
 
+<!-- lore:019f03ba-2bcf-7bf6-9511-26658b3b9578 -->
+* **Always request critical objective code reviews before merging PRs**: Before merging a PR, the user requests a thorough, critical, and objective code review with a specific checklist of verification items. Reviews cover: completeness of changes (grep-based verification), correctness of logic (matrix conditions, guards, version accuracy), YAML/scoping correctness, absence of unintended changes, accuracy of PR description, and CI/linting findings. The user explicitly states 'Do NOT modify any files. Review only.' Reviews are structured around numbered verification items the user provides upfront. The assistant should read all changed files, run grep/shell commands to verify completeness, and report findings per the user's checklist without making any edits.
+
 <!-- lore:019e56f4-d4d9-7d15-b729-2f11a05dd23d -->
 * **Always research technical approaches thoroughly before implementation**: When facing a significant technical decision or migration, the user consistently requests deep research into multiple approaches before writing any code. This includes: fetching specific upstream documentation/source files (e.g., BUILDING.md, configure.py), identifying concrete flags/options, estimating build times, and evaluating cross-compilation feasibility. The user wants tradeoffs between paths laid out explicitly. Only after research is complete does implementation begin. When presenting research, include specific flags, URLs, estimated costs (time/size), and platform constraints.
 
+<!-- lore:019f03a8-6ae6-73df-a33d-8a5e56a98a35 -->
+* **Always respond to bot review comments by either fixing the issue or explicitly declining with a rationale**: When bot review comments (cursor\[bot], sentry\[bot], sentry-warden\[bot], etc.) appear on a PR, the user consistently acts on each one: either implementing a fix (often with a new test and commit) or explicitly replying to decline it as a false positive with a clear technical rationale. The user does not ignore bot findings. For false positives, the reply explains why the flagged code is actually safe (e.g., referencing the guard conditions that make the scenario impossible). For valid findings, the user fixes the root cause, adds/updates tests, verifies with lint+typecheck+tests, then commits and pushes.
+
 <!-- lore:019efa9a-90d2-7958-8585-87e721daa9a3 -->
 * **Always run Biome auto-fix first before manually addressing lint errors**: Lint/typecheck workflow: run Biome auto-fix first, then manual fixes, then tsc. Always: (1) \`biome check --write\` for auto-fixable issues, (2) \`--unsafe\` if needed, (3) manually fix remaining (hoisted regexes, parameter properties, cognitive complexity >15 via helper extraction, import ordering), (4) \`tsc\` exit 0. When complexity exceeds 15, extract named helper functions — do not restructure conditionals or suppress the rule. Pre-existing errors in unrelated files are noted but not required to fix.
 
@@ -510,6 +561,9 @@
 <!-- lore:019e6107-5329-700f-b5b8-e7fd5804d036 -->
 * **Always verify code claims against actual file contents before accepting them as true**: When evaluating PRs, documentation, or assertions about code behavior, the user systematically cross-checks every claim against the actual source files at specific line numbers. They expect the assistant to read the real files, confirm exact line locations, quote the relevant code/comments, and flag discrepancies between what is claimed and what the code actually does. The user marks confirmed findings with 🟡 (verified) and actionable directives with 🔴 (user assertion/directive). Never accept a PR description or assertion at face value — always ground-truth it against the codebase with precise line references.
 
+<!-- lore:019f03ba-f014-7215-ab46-a33c30c37eb4 -->
+* **Always verify facts with tool results before accepting claims as correct**: The user consistently cross-checks assertions — including their own prior statements and PR descriptions — against actual tool results (changelogs, diffs, grep output, CI logs). When a claim is found to be inaccurate (e.g., 'build-binary uses Bun not setup-node', or which Node version first contained a CVE fix), the user expects it to be flagged explicitly with the correct finding. The user treats unverified claims as hypotheses, not facts, and expects the assistant to surface discrepancies rather than accept stated motivations or descriptions at face value. Apply this skepticism to PR descriptions, commit messages, and version numbers alike.
+
 <!-- lore:019e6105-099f-7869-907c-891c78ee3c38 -->
 * **Always verify PR claims against actual codebase before accepting changes**: When reviewing a PR, the user consistently directs the assistant to check each stated claim against the real source files on the main branch rather than trusting the PR description or commit messages. This applies especially to documentation PRs: the user wants specific file paths, line numbers, and code excerpts cited as evidence. The user also cross-checks automated tooling (scripts, CI configs) against what they actually produce. When a PR introduces fixes, the user wants confirmation that the underlying problem genuinely existed and that the fix is correct — not just that the PR author says so. Always run the relevant check scripts and grep the codebase directly rather than reasoning from PR metadata alone.
 
@@ -520,7 +574,7 @@
 * **Always work from a structured plan file before executing multi-step tasks**: When tackling multi-step or multi-file changes, the user consistently creates a formal plan file (e.g., \`.opencode/plans/\<id>-\<name>.md\`) during a planning phase before any edits are made. The plan enumerates discrete numbered tasks with priorities and target files. Execution only begins after the user explicitly approves the plan. During execution, tasks are marked in\_progress and completed sequentially. The user expects this plan-then-execute workflow to be followed strictly — no file edits during planning, and tasks tracked against the approved plan.
 
 <!-- lore:019e5258-8b4f-7c31-a445-8f11513f58c7 -->
-* **Always write tests after implementing new modules or features**: Always write tests after implementing new modules or features. Tests cover validation paths, flag combinations, error states, happy paths, and env var management. Follow established patterns from similar existing test files, use \`vi.spyOn\` for API mocking, and use the project's standard runner patterns (\`run(app, ...)\` or direct loader invocation). Test writing is a required step, not optional. Also: always read existing sibling command implementations before writing new ones to extract reusable patterns (auth settings, positional/flag definitions, error handling, shared utilities, exit code patterns).
+* **Always write tests after implementing new modules or features**: After implementing a new module or integrating a feature, the user consistently adds corresponding tests — both a dedicated test file for the new module (e.g., \`semantic-display.test.ts\`) and additional tests in existing test files for integration points (e.g., new describe blocks or test cases in \`local.test.ts\`). The user also reads existing test files first to understand patterns before writing new tests. Tests are added as a required step in the todo list, not as an afterthought, and are followed by typecheck/test runs to verify correctness.
 
 <!-- lore:019ef5ed-63c5-7b44-844c-ec00d724a6ee -->
 * **Create dedicated branch for dependabot/security fixes tracking origin/main**: When fixing Dependabot alerts or security advisories, create a dedicated branch named \`chore/fix-\<description>-\<date>\` tracking origin/main (not a feature branch). Include all fix changes in a single focused commit. Push, create PR, and address CI failures immediately. This ensures security fixes are isolated from feature work and can be merged independently. Branch name example: \`chore/fix-dependabot-alerts-2026-06-23\`.
@@ -535,7 +589,10 @@
 * **Never uses form-data at runtime — only dev transitive dependency via @types/node-fetch**: User stated never uses form-data at runtime — only dev transitive dependency via @types/node-fetch.
 
 <!-- lore:019e5026-1e77-7046-a0cc-9251e57550de -->
-* **Prefers Bun-native APIs; use buildCommand from lib/command.js (never @stricli/core directly); use buildRouteMap from lib/route-map.js; silent catch blocks prohibited; every new src/lib/\*\*/\*.ts must start with module-level JSDoc; test isolation via useTestConfigDir(); prefer property-based and model-based tests over unit tests; DEFAULT\_NUM\_RUNS = 50; architecture tree documented; error exit code ranges: 1x=auth**: Project conventions (AGENTS.md): use \`pnpm run\`/\`pnpm install\`/\`pnpm add -D\` (NOT bun for package management); use buildCommand from lib/command.js (never @stricli/core directly); use buildRouteMap from lib/route-map.js; silent catch blocks prohibited — every catch must re-throw, log with log.debug()/log.warn(), or return fallback WITH a log.debug() call; every new src/lib/\*\*/\*.ts must start with module-level JSDoc; test isolation via useTestConfigDir(); prefer property-based and model-based tests over unit tests; DEFAULT\_NUM\_RUNS=50; error exit code ranges: 1x=auth, 2x=input/config, 3x=API/network, 4x=feature/billing, 5x=operations, 6x=command-specific. Testing: vitest + fast-check (NEVER bun:test). All packages in devDependencies (CI enforces via \`pnpm run check:deps\`). NEVER merge a PR if CI is failing unless explicitly told to ignore. Always use \`pnpm add -D \<package>\` — never add to \`dependencies\`.
+* **Prefers Bun-native APIs; use buildCommand from lib/command.js (never @stricli/core directly); use buildRouteMap from lib/route-map.js; silent catch blocks prohibited; every new src/lib/\*\*/\*.ts must start with module-level JSDoc; test isolation via useTestConfigDir(); prefer property-based and model-based tests over unit tests; DEFAULT\_NUM\_RUNS = 50; architecture tree documented; error exit code ranges: 1x=auth**: prefer Bun-native APIs over Node.
 
 <!-- lore:019f00a1-a3e0-79c1-9bf7-b3e8257823f4 -->
 * **Prefers freshly-created location over existing**: \`installAgentSkills()\` in \`src/lib/agent-skills.ts\` returns \`results.find(location => location.created) ?? results\[0] ?? null\` — prefers freshly-created location over existing. Installs to \`~/.agents/skills/sentry-cli/\` and \`~/.claude/skills/sentry-cli/\` only. Detection: \`detectClaudeCode()\` checks \`existsSync(join(homeDir, '.claude'))\`; installer never creates top-level agent roots — presence of root dir is the detection signal. OpenCode is detected via \`OPENCODE\_CLIENT\` env var in \`detect-agent.ts\` for telemetry only — NOT for skill installation. No OpenCode skill install path exists anywhere in the repo.
+
+<!-- lore:019f03bd-10e7-779d-942f-c85cc596a3e8 -->
+* **Review code before committing**: Behavioral pattern detected across 4 sessions (action: requested-review). The user consistently demonstrates this behavior.

From d876264fa71db22a997ed76c07972aad898647e8 Mon Sep 17 00:00:00 2001
From: Burak Yigit Kaya <byk@sentry.io>
Date: Fri, 26 Jun 2026 14:07:19 +0000
Subject: [PATCH 3/3] chore: update .lore.md

---
 .lore.md | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/.lore.md b/.lore.md
index fa40dda7c..81029e971 100644
--- a/.lore.md
+++ b/.lore.md
@@ -182,12 +182,18 @@
 <!-- lore:019e610e-c114-7b76-8172-dc6535a408ff -->
 * **dashboard revisions/restore and issue events subcommands are undocumented in fragment files**: RESOLVED in PR #1024. \`docs/src/fragments/commands/dashboard.md\` now documents \`revisions\` and \`restore\`. \`docs/src/fragments/commands/issue.md\` now documents \`events\` and \`@latest\`/\`@most\_frequent\` selectors. \`docs/src/fragments/commands/cli.md\` now documents \`defaults\` and \`import\`. \`check:fragments\` (Check 5) now validates subcommand coverage within fragment files — not just file existence. When adding new subcommands, always update the corresponding fragment in \`docs/src/fragments/commands/\` AND run \`pnpm run check:fragments\` to verify coverage.
 
+<!-- lore:019f03e5-1eeb-7cbd-a643-f77f8badbbb5 -->
+* **debug-files upload: partial size-drop silently exits 0 — doUpload must receive oversizedCount**: Trap: \`doNothingToUpload\` (all-dropped path) correctly calls \`setExitCode(1)\` when \`oversizedCount > 0\`, establishing the contract that oversized files → non-zero exit. But \`doUpload\` (partial-drop path) did not receive \`oversizedCount\`/\`maxFileSize\` — so uploading 3 files where 1 was oversized exited 0 silently. Fix (PR #1146): pass \`oversizedCount\` and \`maxFileSize\` to \`doUpload\`; append scan-oversize message to failures hint; call \`setExitCode(1)\` when \`oversizedCount > 0\` even with no upload failures. Warden finding 9LL-87A on PR #1140.
+
 <!-- lore:019ef3dd-a41f-75f7-82a4-7314053b22cd -->
 * **delta-upgrade intermediate files: always alternate .patching.a/.b — never write to source path**: Trap: writing patch output to the same path as input looks like a simple in-place update. Fix: \`applyPatchesSequentially()\` alternates between \`${destPath}.patching.a\` and \`${destPath}.patching.b\` because the old binary is mmap'd for reading — writing to the source path truncates it and corrupts output. Single-patch chains apply old→dest directly (safe, different paths). \`cleanupIntermediates()\` called in \`finally\` block removes both intermediates via \`unlinkSync\` (ignoring errors) — always runs even on failure. This is a standing directive: 'Always clean up intermediate files, even on failure' and 'never target the same path.'
 
 <!-- lore:019e6121-ffcc-74cd-acfa-49b4ef875755 -->
 * **DEVELOPMENT.md hand-written prose is not covered by any staleness check**: RESOLVED in PR #1024. \`DEVELOPMENT.md\` hand-written prose is now wrapped in \`GENERATED:START/END\` markers: \`dev-prereq\` (lines 4-7) and \`build-toolchain\` (lines 91-97). These sections are now auto-generated from \`package.json\` by \`generate-docs-sections.ts\` and validated by \`check:docs-sections --check\`. The only remaining non-generated prose in \`DEVELOPMENT.md\` is the OAuth app setup instructions and architecture description — these don't reference toolchain versions. \`generate-docs-sections.ts\` no longer contains any Bun references; \`extractPnpmVersion()\` and \`extractNodeVersion()\` throw on mismatch.
 
+<!-- lore:019f03e5-1ed3-7e84-b0ea-736572a8caa9 -->
+* **eval-skill-fork.yml has no setup-node step — floating Node version bypasses pins**: Trap: \`.github/workflows/eval-skill-fork.yml\` job \`eval\` has no \`actions/setup-node\` step at all — it runs on the runner image's system Node (e.g. 24.17.0/22.23.0), the exact versions the Node-pin PR (#1145) aimed to avoid. The PR's claim 'every setup-node step is pinned' is technically true but the intent is unmet because this job never calls setup-node. Fix: add a pinned \`actions/setup-node@v6\` step with \`node-version: ${{ env.NODE\_VERSION\_24 }}\` before \`pnpm install\` in the \`eval\` job. Discovered via subagent adversarial review of PR #1145.
+
 <!-- lore:019f03b9-975b-7854-8a18-e1b79f1c2e2d -->
 * **eval-skill-fork.yml missing setup-node — fork PRs exposed to Node regression**: Trap: PR #1145 pinned Node versions in all 4 main workflow files and the PR description claimed completeness. But \`.github/workflows/eval-skill-fork.yml\` job \`eval\` (lines 27–57) has no \`actions/setup-node\` step — it runs on the runner's system Node. This is the exact workflow exhibiting \`ERR\_STREAM\_PREMATURE\_CLOSE\` (CVE-2026-48931 regression in Node 24.17.0/22.23.0). Fix: add \`actions/setup-node@v6\` with \`node-version: ${{ env.NODE\_VERSION\_24 }}\` before \`pnpm install\` in that job. When pinning Node versions across CI, always grep ALL workflow files for \`setup-node\` AND check for jobs that invoke Node without an explicit setup step.
 
@@ -261,7 +267,7 @@
 * **prepareZipDifs oversizedCount must NOT feed exit-driving counter — format unknown pre-decompression**: Trap: zip entry oversized warnings look like they should increment \`oversizedCount\` in \`prepareDifs\`, just like on-disk file oversize does — both are 'too large' signals. But a compressed entry's format is unknown until inflated, so it can't be attributed to the requested \`--type\`. Counting it would turn an unrelated oversized asset inside a \`.zip\` into a false 'all matched files too large' failure and wrong exit code. Fix: \`prepareZipDifs\` returns \`PreparedDif\[]\` (not \`{ prepared, oversizedCount }\`); oversized zip entries warn per-entry inside \`readZipDifEntries\` only. The \`oversizedCount\` counter in \`prepareDifs\` is exclusively for format-accurate on-disk files.
 
 <!-- lore:019efa43-b28c-727b-a2d2-d41070cf7975 -->
-* **print-sources must never claim to preview bundle-sources while listing sources bundle-sources would skip**: Trap: \`print-sources\` iterates ALL objects in an archive (full inspection value), while \`bundle-sources\` only bundles the first object with debug info (\`selectBundledObject\`). Listing all objects then showing a \`bundle-sources\` hint looks like a preview of that command — but it isn't. Fix: add multi-object warning naming the exact slice that would be bundled (via \`selectBundledObject\`), add \`hasDebugInfo\` to JSON output so consumers can apply the same selection rule, and ensure footer hint distinguishes no-objects / enumeration-failure / no-sources. 🔴 Directive: never claim to preview \`bundle-sources\` while collecting sources \`bundle-sources\` would never collect.
+* **print-sources must never claim to preview bundle-sources while listing sources bundle-sources would skip**: Trap: \`print-sources\` iterates ALL objects in an archive (full inspection value), while \`bundle-sources\` only bundles the first object with debug info (\`selectBundledObject\`). Listing all objects then showing a \`bundle-sources\` hint looks like a preview of that command — but it isn't. Fix: add multi-object warning naming the exact slice that would be bundled (via \`selectBundledObject\`), add \`hasDebugInfo\` to JSON output so consumers can apply the same selection rule, and ensure footer hint distinguishes no-objects / enumeration-failure / no-sources. 🔴 Directive: never claim to preview \`bundle-sources\` while collecting sources \`bundle-sources\` would never collect. Warden finding ANP-RKY (size gate after full file read) was fixed in PR #1141 via \`prepareDifs\` refactor — \`peeked.size\` from \`fd.stat()\` checked before \`readFile\`.
 
 <!-- lore:019d9b86-868d-7ff4-b2a6-74fdd1c9d56e -->
 * **process.stdin.isTTY unreliable in Bun — use isatty(0) and backfill for clack**: \`process.stdin.isTTY\` unreliable — use \`isatty(0)\` from \`node:tty\`. Bun's single-file binary can leave \`process.stdin.isTTY === undefined\` on TTY fds. \`@clack/core\` gates \`setRawMode(true)\` on \`input.isTTY\`, silently disabling raw mode. Fix: backfill \`process.stdin.isTTY = true\` when \`isatty(0)\` confirms. Debugging: \`src/lib/init/tty-diagnostics.ts\` \`dumpTtyDiagnostics(label)\` — no-op unless \`SENTRY\_INIT\_DIAGNOSTICS=1\`.
@@ -343,6 +349,9 @@
 <!-- lore:019ef4b4-f87c-7124-a543-da6f428b1291 -->
 * **bundle-sources exit-code convention: manual exitCode=1 vs OutputError for no-sources path**: In \`src/commands/debug-files/bundle-sources.ts\`, the no-sources-found path uses \`this.process.exitCode = 1\` (not \`OutputError\`). This is a known Medium deviation from the framework-blessed pattern: \`OutputError\` (exit 60) is robust against framework finalization resetting exitCode; manual assignment is fragile. The \`-o\` flag also does NOT create parent directories (unlike \`bundle-jvm\`), so \`writeFile\` throws raw ENOENT if the parent doesn't exist. Both are accepted as-is in PR #1126 — do not 'fix' them without a follow-up PR discussion. Exit code map: ValidationError→21, success→0, no-sources→1.
 
+<!-- lore:019f03e5-1f07-7e75-b223-c383631581f5 -->
+* **CI Node version pinning: centralized env block per workflow file, ternary for matrix jobs**: Node CVE-2026-48931 fix: \`NODE\_VERSION\_22="22.23.1"\`, \`NODE\_VERSION\_24="24.18.0"\` (22.23.0 had the vulnerability; fix landed in 22.23.1 via nodejs/node#64004). Pattern: add top-level \`env:\` block to each workflow file (ci.yml, release.yml, sentry-release.yml, docs-preview.yml) with both constants + rationale comment. Reference via \`${{ env.NODE\_VERSION\_22 }}\`. Matrix jobs (build-npm) use ternary: \`${{ matrix.node == '24' && env.NODE\_VERSION\_24 || env.NODE\_VERSION\_22 }}\` — matrix labels stay as bare majors (\`\["22","24"]\`) for job naming. Gotcha: \`eval-skill-fork.yml\` has no \`setup-node\` step at all — must add one explicitly \[\[019f03bb-f9cf-7208-a183-d4f0074480f9]].
+
 <!-- lore:019e6874-7d69-79e3-9df3-e1779c205b0b -->
 * **clack-utils.ts filename preserved intentionally — rename deferred to next cleanup PR**: \`src/lib/init/clack-utils.ts\` filename kept (not renamed to \`wizard-utils.ts\`) to keep PR 4 diff focused on clack removal. No clack references remain in the file. \`WizardCancelledError\` lives here. \`abortIfCancelled\<T>()\` return type uses \`Exclude\<T, symbol>\` to narrow union types. \`FEATURE\_DISPLAY\_ORDER\` and \`CANONICAL\_STEP\_ORDER\` (12 steps) also defined here. Rename is intentionally deferred.
 
@@ -415,9 +424,6 @@
 <!-- lore:019cb162-d3ad-7b05-ab4f-f87892d517a6 -->
 * **Shared pagination infrastructure: buildPaginationContextKey and parseCursorFlag**: Pagination infrastructure + org flag injection: Bidirectional pagination via cursor stack in \`src/lib/db/pagination.ts\`. \`resolveCursor(flag, key, contextKey)\` maps keywords (next/prev/first/last) to \`{cursor, direction}\`. \`advancePaginationState\` manages stack — back-then-forward truncates stale entries. Critical: \`resolveCursor()\` must be called INSIDE \`org-all\` override closures, not before \`dispatchOrgScopedList\`. \`issue list --limit\` is global total: \`fetchWithBudget\` Phase 1 divides evenly, Phase 2 redistributes surplus. \`trimWithProjectGuarantee\` ensures ≥1 issue per project. Compound cursor (pipe-separated) enables \`-c last\` for multi-target pagination. JSON output wraps in \`{ data, hasMore }\` with optional \`errors\` array. \`sort\` flag is resolved once in \`func()\` before dispatch — never re-derived by infra. \`handleOrgAllIssues\` returns server order (no client-side sort). \`isMultiProject\` guard gates client-side sort at list.ts:1144-1148.
 
-<!-- lore:019ef3d9-da6a-7e7b-b320-940c89fc607f -->
-* **Stricli optional flags with runtime-resolved defaults: omit static default, resolve in func()**: When a flag's default depends on runtime state (e.g., host type), Stricli static defaults cannot be used. Pattern: mark flag \`optional: true\` with no \`default\` in the flag definition; in \`func()\`, immediately resolve: \`const flags = { ...rawFlags, sort: rawFlags.sort ?? defaultIssueSort() }\`. All downstream code receives the concrete resolved type. \`ListFlagsInput\` uses \`Omit\<ListFlags, 'sort'> & { readonly sort?: SortValue }\` as the \`func()\` parameter type; \`ListFlags\` (with required sort) is used everywhere else. This avoids Stricli's static-default limitation while keeping type safety.
-
 <!-- lore:019cbd5f-ec35-7e2d-8386-6d3a67adf0cf -->
 * **Telemetry instrumentation pattern: withTracingSpan + captureException for handled errors**: For graceful-fallback operations, use \`withTracingSpan\` from \`src/lib/telemetry.ts\` for child spans and \`captureException\` from \`@sentry/bun\` (named import — Biome forbids namespace imports) with \`level: 'warning'\` for non-fatal errors. \`withTracingSpan\` uses \`onlyIfParent: true\` — no-op without active transaction. User-visible fallbacks use \`log.warn()\` not \`log.debug()\`. Several commands bypass telemetry by importing \`buildCommand\` from \`@stricli/core\` directly instead of \`../../lib/command.js\` (trace/list, trace/view, log/view, api.ts, help.ts).
 
@@ -489,6 +495,9 @@
 <!-- lore:019e517a-f897-7dbd-8c96-a96ee5b9abaa -->
 * **Always migrate Bun-specific APIs and tooling to Node.js equivalents**: Migrating from Bun to Node.js/pnpm. Replace Bun-specific APIs: \`Bun.spawn\`→\`node:child\_process\`, \`Bun.sleep\`→\`node:timers/promises\`, \`bun:sqlite\`→\`node:sqlite\`, \`bun run\`→\`pnpm run\`/\`tsx\`, \`bun.lock\`→pnpm lockfile. All packages in \`devDependencies\` (never \`dependencies\`). Exception: \`script/build.ts\` uses fossilize (not \`Bun.build\`) and stays on Bun for the build-binary CI job. \`script/bundle.ts\` (npm bundle) uses esbuild via tsx and is Node-native. \`packageManager\` field in package.json: \`pnpm@10.11.0\`. After each migration phase, ensure lint and tests pass before committing. Migration is complete as of main branch (bun.lock deleted, vitest.config.ts added, all test files migrated to vitest).
 
+<!-- lore:019f03c9-2c86-7b6d-8cba-635c8c4f5f26 -->
+* **Always monitor CI status after pushing or posting PR replies**: After completing an action on a PR (pushing commits, posting review replies, responding to bot findings), the user consistently expects the assistant to immediately check and track CI status. The assistant should proactively monitor pending checks, report which have passed/failed/skipped, identify what remains outstanding, and continue following up until all critical checks resolve. Don't wait to be asked — after any PR action, shift focus to CI status tracking and flag any remaining pending checks explicitly.
+
 <!-- lore:019f03b7-69b6-765e-9d5d-7b1fc9a0de4a -->
 * **Always mutation-test new tests to verify they fail without the guarded behavior**: When reviewing or writing tests, the user performs mutation testing to confirm a test is non-tautological: they revert or remove the specific implementation behavior the test is supposed to guard (e.g., replacing \`atomicWriteFile\` with plain \`writeFile\`, or moving \`mkdirSync\` inside a try block), then re-run the suite. If the test still passes after the mutation, the user flags it as a blocking defect ('vacuous' or 'tautological') and requires a fix before the PR can ship. This applies especially to tests that assert absence of side effects (e.g., no leftover temp files, no recursion) where the assertion may be trivially true even without the feature.
 
@@ -543,6 +552,9 @@
 <!-- lore:019e68b0-e91f-76c1-8472-bf529a163c2a -->
 * **Always switch from plan mode to build mode before executing changes**: The user consistently uses a two-phase workflow: first planning (read-only exploration, writing a plan file), then explicitly approving a switch to build/agent mode before any changes are executed. When the user approves the mode switch, the assistant should immediately begin executing the existing plan file — typically by re-reading the key files to be modified. Never execute changes while still in plan mode, even if the plan is complete and approved. Wait for the explicit mode-switch approval before acting.
 
+<!-- lore:019f03e0-72ec-7c8b-a0c0-353937e054ec -->
+* **Always track and verify CI check results to full green before considering a PR complete**: The user consistently monitors CI pipelines until all checks reach a final state (pass/skip/neutral) with zero failures before treating a PR as done. This includes: checking rollup summaries, investigating any bot findings (Warden, Bugbot, Seer), confirming findings are either fixed or explicitly declined with rationale, and verifying follow-up PRs also reach green. The user expects the assistant to proactively report CI status, distinguish pending vs. final states, and flag any outstanding checks or unresolved findings. A PR is only considered complete when all checks are resolved and any bot review comments have been addressed.
+
 <!-- lore:019e56f7-21d3-70f8-a5cb-f7d6464899ff -->
 * **Always track migration progress with explicit completion criteria and remaining blockers**: The Bun→Node migration is complete only when \`Bun.build({ compile: true })\` is replaced by fossilize in \`script/build.ts\`. As of the current session, \`script/build.ts\` already uses fossilize (\`--no-bundle\`, \`--out-dir dist-bin\`, \`--node-version lts\`) with esbuild for bundling — the migration is complete. NODE\_VERSION='lts' in build.ts. The user expects the assistant to track this state across sessions and confirm the migration is done. When resuming sessions, verify \`script/build.ts\` does not contain \`Bun.build({ compile: true })\` before declaring migration complete.
 
@@ -567,6 +579,9 @@
 <!-- lore:019e6105-099f-7869-907c-891c78ee3c38 -->
 * **Always verify PR claims against actual codebase before accepting changes**: When reviewing a PR, the user consistently directs the assistant to check each stated claim against the real source files on the main branch rather than trusting the PR description or commit messages. This applies especially to documentation PRs: the user wants specific file paths, line numbers, and code excerpts cited as evidence. The user also cross-checks automated tooling (scripts, CI configs) against what they actually produce. When a PR introduces fixes, the user wants confirmation that the underlying problem genuinely existed and that the fix is correct — not just that the PR author says so. Always run the relevant check scripts and grep the codebase directly rather than reasoning from PR metadata alone.
 
+<!-- lore:019f03d5-0ada-76c6-bf84-b0eaedd3a0d3 -->
+* **Always wait for Unit Tests to complete before proceeding with PR actions**: When monitoring CI status on PRs, the user consistently treats Unit Tests as the most critical pending check. Even when most other checks (CodeQL, Lint & Typecheck, semgrep, Secret Scan, etc.) have passed, the user waits for Unit Tests to finish before taking any further action (merging, reviewing, etc.). Seer Code Review and warden are also monitored but Unit Tests is the explicit blocker. The assistant should identify Unit Tests as the primary pending check to watch, report its status prominently, and not suggest proceeding until it completes.
+
 <!-- lore:019e56f4-5e72-723b-b993-ed8394c94bd5 -->
 * **Always work around the worktree conflict error when merging to main**: When merging PRs locally, the user consistently encounters \`fatal: 'main' is already used by worktree at ...\` and expects a workaround to be applied automatically rather than treating it as a blocking error. The merge is always completed successfully despite this error (e.g., using \`gh pr merge\` via CLI or other workaround). Never stop or report failure when this specific worktree conflict appears — proceed with the merge using an alternative method and confirm the PR was merged successfully.