From 71a0b36fde2543d58d27350c72c60147fca6a28d Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Wed, 24 Jun 2026 17:36:03 +0000 Subject: [PATCH] feat(debug-files): add upload command (core) Port the legacy `sentry-cli debug-files upload` to the TypeScript CLI, scoped to the Tier A core path that covers the ~90% real-world case. - New `upload` command: recursively scans files/directories, parses each candidate via the bundled `@sentry/symbolic` WASM module, filters, and uploads matching files via the DIF chunk-upload + assemble protocol. - Filters: `--type`/`--id` (repeatable), `--require-all`, and `--no-debug`/`--no-unwind`/`--no-sources` (mirrors legacy feature filtering). - `--include-sources` attaches an in-memory source bundle per file. - `--no-upload` dry-run (no credentials); `--wait`/`--wait-for` block on server-side processing and exit non-zero on failures. - `src/lib/api/debug-files.ts`: `uploadDebugFiles` batches all files into one assemble request keyed by SHA-1 checksum, with no-wait (chunks held) and wait (terminal ok/error) completion modes. - `src/lib/dif/scan.ts`: pure, property-tested filter predicates plus the filesystem scan/parse pipeline. Deferred to follow-ups (noted in --help): ZIP scanning, `--symbol-maps`, `--il2cpp-mapping` line mappings, and `--derived-data`. --- .lore.md | 185 +++--- docs/src/content/docs/contributing.md | 2 +- docs/src/fragments/commands/debug-files.md | 21 + plugins/sentry-cli/skills/sentry-cli/SKILL.md | 1 + .../sentry-cli/references/debug-files.md | 27 + src/commands/debug-files/index.ts | 2 + src/commands/debug-files/upload.ts | 534 ++++++++++++++++++ src/lib/api/debug-files.ts | 377 +++++++++++++ src/lib/dif/scan.ts | 424 ++++++++++++++ test/commands/debug-files/upload.test.ts | 372 ++++++++++++ test/lib/api/debug-files.test.ts | 352 ++++++++++++ test/lib/dif/scan.property.test.ts | 247 ++++++++ 12 files changed, 2455 insertions(+), 89 deletions(-) create mode 100644 src/commands/debug-files/upload.ts create mode 100644 src/lib/api/debug-files.ts create mode 100644 src/lib/dif/scan.ts create mode 100644 test/commands/debug-files/upload.test.ts create mode 100644 test/lib/api/debug-files.test.ts create mode 100644 test/lib/dif/scan.property.test.ts diff --git a/.lore.md b/.lore.md index 8079ec021..c18a34c1d 100644 --- a/.lore.md +++ b/.lore.md @@ -7,6 +7,9 @@ * **@sentry/symbolic 13.4.0 API surface: SourceBundleWriter for bundle-sources command**: \`@sentry/symbolic@13.4.0\` exports 4 classes: \`Archive\`, \`FileEntry\`, \`ObjectFile\`, \`SourceBundleWriter\`, plus \`SourceFileDescriptor\`. Key for CLI source-tier commands: \`SourceBundleWriter.writeObject(object: ObjectFile, object\_name: string, filter: Function, provider: Function): Uint8Array | undefined\` — callback-based; provider reads source content by path, filter selects files. \`bundle-sources\` is directly implementable (provider reads from disk). \`print-sources\` is BLOCKED — \`ObjectFile\` has no \`sourceFiles()\` enumeration method in 13.4.0 (only props: arch, codeId, debugId, fileFormat, hasDebugInfo, hasSources, hasSymbols, hasUnwindInfo, kind). \`SourceFileDescriptor\` has get/set props: contents, debugId, path, sourceMappingUrl, url, type. Confirmed by Dav1dde (Sebastian Zivota's colleague) on Jun 23 2026. + +* **@sentry/symbolic wasm-debug-session API: DebugSession via SelfCell + AsSelf**: \`ObjectFile.debugSession()\` → \`DebugSession\` (PR #997, merged Jun 24 07:51Z). \`DebugSession\` wraps \`SelfCell\, ObjectDebugSession<'static>>\` using the \`derived\_from\_cell!\` macro. Required adding \`impl<'slf, 'data: 'slf> AsSelf<'slf> for ObjectDebugSession<'data>\` in \`symbolic-debuginfo/src/object.rs\` — \`ObjectDebugSession\` was the only major type missing this impl. \`files()\` returns eager \`FileEntry\[]\` (not iterators — deferred per dav1d). \`sourceByPath(path)\` returns \`SourceFileDescriptor | undefined\`. dav1d flagged potential perf issue: Rust String ↔ JS String encoding mismatch may cause excessive wasm↔JS data copying when reading \`descriptor.contents\`. + * **403/401 enrichment pipeline in infrastructure.ts — centralized, no interactive auto-fix**: \`src/lib/api/infrastructure.ts\` centralizes HTTP error enrichment. \`enrichDetail()\` dispatches: 403→\`enrich403Detail\`, 401→\`enrich401Detail\`, others pass raw. \`enrich403Detail()\` three branches: (1) \`rawDetail.includes('disabled this feature')\` → org-policy message; (2) \`isEnvTokenActive()\` → \`extractRequiredScopes(rawDetail)\`; scopes found → definite missing-scope message; no scopes → hedged message; (3) OAuth → re-auth suggestion. \`throwApiError()\` and \`throwRawApiError()\` both set \`ApiError.enriched403=true\`. No interactive auto-fix. \`buildPermissionError()\` in \`project/delete.ts\` NEVER suggests \`sentry auth login\` — re-auth via OAuth won't change permissions. OAuth \`auth login\` always grants required scopes, so scope hints only apply to env-var tokens. 401 errors: fix is always re-authenticate — scope hints do NOT apply. @@ -40,6 +43,9 @@ * **Custom CA loading: priority, caching, TLS error detection, and SaaS warning**: Custom CA in \`src/lib/custom-ca.ts\`: Priority: (1) \`sentry cli defaults ca-cert\` (SQLite), (2) \`NODE\_EXTRA\_CA\_CERTS\`. Cached per-process via module-level vars (\`hasResolved\` flag). \`resolve()\` concatenates custom PEM with \`rootCertificates\` (additive — Bun replaces Mozilla bundle otherwise). \`tryReadPem()\` NEVER throws — missing CA file logs warn and returns \`undefined\`. \`injectIntoNodeTls()\` uses \`tls.setDefaultCACertificates()\` (Node 24+ only; no-op on Node 22). \`TLS\_ERROR\_PATTERNS\`: 5 patterns (local issuer, verify first cert, UNABLE\_TO\_VERIFY\_LEAF\_SIGNATURE, DEPTH\_ZERO\_SELF\_SIGNED\_CERT, SELF\_SIGNED\_CERT\_IN\_CHAIN) — explicitly excludes \`CERT\_HAS\_EXPIRED\` and \`ERR\_TLS\_CERT\_ALTNAME\_INVALID\`. \`getTlsCertErrorMessage()\` walks \`error.cause\` chain with cycle detection. SaaS target + env-sourced CA → one-time warning; stored default silences it. \`\_\_resetForTests()\` resets all cached state. + +* **debug-files upload: per-file upload design and assemble body shape**: The \`sentry debug-files upload\` command uses per-file upload (not per-slice). Assemble body shape: \`{ \[overallSha1]: { name, debug\_id?, chunks: string\[] } }\`. Two modes: no-wait (stop once server holds chunks) and \`--wait\` (poll for \`ok\`/\`error\` up to \`ASSEMBLE\_MAX\_WAIT\_MS\`). Filter rules mirror legacy \`filter\_features\`. Auth deferred to \`resolveOrgAndProject()\` (standard cascade). Source bundles via \`createSourceBundle\` when \`--include-sources\`. Deduplication uses \`debugId:sha1(content)\` composite key. Early peek via \`peekFormat()\` in \`prepareDifs\` rejects non-DIF files before full read. Location: \`src/lib/api/debug-files.ts\`, \`src/lib/dif/scan.ts\`, \`src/commands/debug-files/upload.ts\`. + * **delta-upgrade.ts: patch chain resolution and application architecture**: Two channels: stable (GitHub Releases) and nightly (GHCR \`patch-\\` tags). Patch format: TRDIFF10 (zig-bsdiff + zstd). Constants: \`MAX\_STABLE\_CHAIN\_DEPTH=10\`, \`MAX\_NIGHTLY\_CHAIN\_DEPTH=30\`, \`SIZE\_THRESHOLD\_RATIO=0.6\`. Stable: single API call fetches releases with asset metadata, parallel \`Promise.all\` download. Nightly: list tags → filter semver range → fetch manifests → parallel blob download. \`applyPatchesSequentially()\` alternates between two intermediate files (\`${destPath}.patching.a\`/\`.b\`) — never read/write same path (mmap corruption). SHA-256 verified ONCE after all patches applied, not per-intermediate. Cache-first: \`tryLoadCachedChain()\` with key \`patch-chain:{from}-{to}\`. \`canAttemptDelta()\` blocks on dev version, cross-channel, or downgrade. @@ -126,11 +132,14 @@ * **@sentry/symbolic wasm-pack test never exercises the shipped artifact — use npm pack smoke test instead**: Trap: \`wasm-pack test\` looks like the right way to test \`@sentry/symbolic\` — it builds and runs tests. But it builds its own glue code and never loads the \`--target web\` \`symbolic.js\` + \`initSync\` + package \`exports\`/\`files\` that consumers actually import. Fix: use a packaged-artifact smoke test: \`npm pack\` → install tarball into tmp dir → \`import '@sentry/symbolic'\` → assert via \`node:test\`. This catches exports-map/resolution breakage and runtime errors (e.g., the \`Object\` naming bug) that \`wasm-pack test\` misses entirely. Test files live in the package dir but are excluded from \`files\[]\`; run via \`cd symbolic-wasm/npm && npm test\`. -* **@sentry/symbolic: naming wasm export class \`Object\` shadows JS global — breaks Object.create and Object.getPrototypeOf**: Trap: wasm-bindgen generates a JS class named after the Rust struct — naming it \`Object\` looks fine in Rust. But it shadows the JS global \`Object\` for the entire module: \`Object.create\` (used in \`objects()\`) and \`Object.getPrototypeOf\` (used in \`initSync\`) break with \`TypeError: Object.getPrototypeOf is not a function\`. Fix (PR #993): rename the export to \`ObjectFile\` via \`#\[wasm\_bindgen(js\_name = "ObjectFile")]\` — any name that isn't a JS global works. Discovered via packaged-artifact smoke test, not \`wasm-pack test\`. +* **@sentry/symbolic: naming wasm export class \`Object\` shadows JS global — breaks Object.create and Object.getPrototypeOf**: Trap: wasm-bindgen generates a JS class named after the Rust struct — naming it \`Object\` looks fine in Rust. But it shadows the JS global \`Object\` for the entire module: \`Object.create\` (used in \`objects()\`) and \`Object.getPrototypeOf\` (used in \`initSync\`) break with \`TypeError: Object.getPrototypeOf is not a function\`. Fix (PR #993): rename the export to \`ObjectFile\` via \`#\[wasm\_bindgen(js\_name = "ObjectFile")]\` and \`#\[wasm\_bindgen(js\_class = "ObjectFile")]\` — Rust struct name \`Object\` unchanged. Discovered via packaged-artifact smoke test, not \`wasm-pack test\`. Similarly, \`ObjectDebugSession\` needed \`AsSelf\` impl (PR #997) — check all new wasm-exposed types for missing \`AsSelf\` impls before using \`derived\_from\_cell!\`. * **@stricli/core -H alias patch: pin version to 1.2.7, never remove aliases**: Trap: When Stricli throws on \`-H\` aliases used for \`--header\` or \`--host\`, removing the aliases from command files looks like the simple fix. But the project intentionally uses \`-H\` for curl-style API usage. Fix: the in-repo patch for @stricli/core (targeting 1.2.7) removes \`-H\` from the reserved list. Pin version to \`1.2.7\` (not \`^1.2.8\`) so the patch applies. Never remove \`-H\` alias usages from command files. Added in commit \`78c9b04a5\`. Cursor Bugbot and Seer both flag \`-H\` removal as blocking. + +* **\`--require-all\` false negatives for fat binaries — scan all matched objects**: Trap: \`missingRequestedIds\` computed \`foundIds\` from only the primary object's \`debugId\` per file. Fat Mach-O with multiple slices causes non-primary slice IDs to be reported missing and exits 1 even though they were found. Fix: compute \`foundIds\` from all matched objects (\`.objects\` array), not just \`selectBundledObject()?.debugId\`. Applies to \`src/commands/debug-files/upload.ts\`. + * **acquireLock ENOENT: parent directory may not exist — must mkdir before writeFileSync**: Trap: \`acquireLock(lockPath)\` calls \`writeFileSync(lockPath, pid, { flag: 'wx' })\` — looks safe because the lock path is derived from the install path. But the parent directory (e.g. \`~/.sentry/bin\` or a stale \`/tmp/sentry-test-install/\`) may not exist, causing ENOENT (Sentry issue CLI-1E1). Fix: call \`mkdirSync(dirname(lockPath), { recursive: true, mode: 0o755 })\` BEFORE the try block in \`acquireLock\` (\`src/lib/binary.ts\`). CRITICAL: keep mkdir OUTSIDE the try block — if inside, mkdir's EEXIST error routes into \`handleExistingLock\`, obscuring root cause (comment claiming infinite recursion is inaccurate: actual path is EEXIST→handleExistingLock→readFileSync→ENOTDIR, single throw). The directory creation is idempotent — \`recursive: true\` is a no-op if it already exists. Merged in PR #1125 (squash \`ee768454c\`). @@ -149,6 +158,9 @@ * **check.ts hasId() uses !== null but ObjectFile.codeId is string|undefined — null guard mismatch**: Trap: \`check.ts\` \`hasId()\` guards \`o.codeId !== null && o.codeId.length > 0\`. Looks correct because old \`DifObjectInfo.codeId\` was \`string | null\`. But \`ObjectFile.codeId\` in \`@sentry/symbolic\` 13.4.0 is \`string | undefined\` — the getter returns \`undefined\` (not \`null\`) when Rust returns \`None\`. If \`DifObjectInfo.codeId\` is mapped as \`string | null\` (via \`?? null\`), the \`!== null\` guard works. If mapped as \`string | undefined\` (via \`?? undefined\`), the guard silently passes \`undefined\` through. Fix: ensure \`parseDebugFile\` maps \`codeId\` to \`string | null\` (using \`obj.codeId ?? null\`) so \`check.ts\`'s existing \`!== null\` guard remains correct without touching \`check.ts\`. + +* **chunk-upload gzip wire format: never emit Content-Encoding: gzip with file\_gzip field**: Trap: gzip-compressed chunks look like they should use \`Content-Encoding: gzip\` alongside the \`file\_gzip\` multipart field — standard HTTP compression convention. But zstd-aware Sentry servers reject that combination with 400 to avoid ambiguity. Fix: gzip path uses ONLY the \`file\_gzip\` multipart field with NO \`Content-Encoding\` header (legacy protocol). Zstd path uses \`Content-Encoding: zstd\` + \`file\` field (requires server opt-in via getsentry/sentry#113760+). This is a standing directive from code comments in \`src/lib/api/chunk-upload.ts\`. + * **dashboard revisions/restore and issue events subcommands are undocumented in fragment files**: RESOLVED in PR #1024. \`docs/src/fragments/commands/dashboard.md\` now documents \`revisions\` and \`restore\`. \`docs/src/fragments/commands/issue.md\` now documents \`events\` and \`@latest\`/\`@most\_frequent\` selectors. \`docs/src/fragments/commands/cli.md\` now documents \`defaults\` and \`import\`. \`check:fragments\` (Check 5) now validates subcommand coverage within fragment files — not just file existence. When adding new subcommands, always update the corresponding fragment in \`docs/src/fragments/commands/\` AND run \`pnpm run check:fragments\` to verify coverage. @@ -182,6 +194,9 @@ * **issue list sort default test coverage gap: tests always pass sort explicitly**: Trap: \`list.test.ts\` passes \`sort\` explicitly in every \`func.call(...)\` invocation (\`sort: 'date'\` or \`sort: 'recommended'\`). No end-to-end test omits \`sort\` and asserts the API received the correct default (\`recommended\` on SaaS, \`date\` on self-hosted). This means \`defaultIssueSort()\` logic is not covered by integration tests — only unit-tested via \`\_\_testing\` exports. When adding host-dependent flag defaults, always add a test that omits the flag and verifies the resolved default reaches the API call. + +* **listSources: never copy descriptor.contents across wasm boundary per-file — use type field instead**: Trap: \`descriptor.contents\` looks like the right way to check if a source is embedded — it's the actual content. But reading it copies the full Rust String across the wasm↔JS boundary for every file, triggering the encoding mismatch dav1d flagged (Rust String vs JS String encoding). Fix: use \`descriptor.type\` (a cheap string tag: \`'url'\`, \`'resolved'\`, etc.) to classify the source without copying contents. \`DifSourceFile\` shape uses \`type\` field, not \`embeddedBytes\`. Only fetch \`contents\` if the caller explicitly needs the source text. + * **loadOldBinary: new Uint8Array(readFile()) double-allocates — return Buffer directly**: Trap: \`new Uint8Array(await readFile(tempCopy))\` looks like a safe type conversion. But \`readFile()\` already returns a \`Buffer\` which IS a \`Uint8Array\` — wrapping it in \`new Uint8Array()\` copies the entire ~100MB binary, causing a transient double-allocation peak. Fix: return the \`Buffer\` directly from \`loadOldBinary\` with no wrapper. Zero tradeoff — Buffer is already a Uint8Array subclass and works everywhere Uint8Array is expected. @@ -194,6 +209,9 @@ * **Multi-region fan-out: distinguish all-403 from empty orgs with hasSuccessfulRegion flag**: In \`listOrganizationsUncached\` (\`src/lib/api/organizations.ts\`), \`Promise.allSettled\` collects multi-region results. Don't use \`flatResults.length === 0\` to detect all-regions-failed — a region returning 200 OK with zero orgs pushes nothing into \`flatResults\`. Track a \`hasSuccessfulRegion\` boolean on any \`"fulfilled"\` settlement. Only re-throw 403 \`ApiError\` when \`!hasSuccessfulRegion && lastScopeError\`. + +* **OOM on large non-DIF files — header peek before full read**: Trap: \`prepareDifs\` reads entire file into memory before calling \`parseDebugFile\`. Large files that aren't DIFs (e.g. videos, logs) cause OOM. Fix: call \`peekFormat\` on a small header chunk first to reject non-object files early, then only read the full file if it passes. Pattern documented in \`src/lib/dif/scan.ts\` with \`peek(16)\` + \`peekFormat(header)\`. + * **Parallel agent sessions can commit stray .lore.md changes to feature branches**: Trap: a parallel agent session running concurrently can check out a different branch, commit a \`.lore.md\` update to the wrong branch (e.g., \`byk/feat/issue-list-recommended-sort\`), then switch away — leaving a stray commit on the feature branch. Confirmed: commit \`84b9f7cfb chore: update .lore.md\` appeared on \`byk/feat/issue-list-recommended-sort\` from a parallel agent that had checked out \`byk/feat/debug-files-new-symbolic-api\`. Before pushing or opening a PR, always verify \`git log\` and \`git diff origin/\\` to confirm no unexpected commits from parallel sessions. @@ -206,12 +224,18 @@ * **pnpm test runs generate:docs + generate:sdk before vitest — too slow for direct invocation**: Trap: \`pnpm test\` (or \`pnpm run test:unit\`) runs \`generate:docs && generate:sdk\` before vitest, making the full suite take minutes even for small changes — looks like a normal test command. Fix: invoke vitest directly to skip doc/SDK generation: \`vitest run test/lib test/commands test/types\`. This drops runtime from minutes to ~2-10s. Use \`pnpm run test:unit\` only when you need the generated artifacts to be fresh (e.g., testing doc generation itself). + +* **print-sources must never claim to preview bundle-sources while listing sources bundle-sources would skip**: Trap: \`print-sources\` iterates ALL objects in an archive (full inspection value), while \`bundle-sources\` only bundles the first object with debug info (\`selectBundledObject\`). Listing all objects then showing a \`bundle-sources\` hint looks like a preview of that command — but it isn't. Fix: add multi-object warning naming the exact slice that would be bundled (via \`selectBundledObject\`), add \`hasDebugInfo\` to JSON output so consumers can apply the same selection rule, and ensure footer hint distinguishes no-objects / enumeration-failure / no-sources. 🔴 Directive: never claim to preview \`bundle-sources\` while collecting sources \`bundle-sources\` would never collect. + * **process.stdin.isTTY unreliable in Bun — use isatty(0) and backfill for clack**: \`process.stdin.isTTY\` unreliable — use \`isatty(0)\` from \`node:tty\`. Bun's single-file binary can leave \`process.stdin.isTTY === undefined\` on TTY fds. \`@clack/core\` gates \`setRawMode(true)\` on \`input.isTTY\`, silently disabling raw mode. Fix: backfill \`process.stdin.isTTY = true\` when \`isatty(0)\` confirms. Debugging: \`src/lib/init/tty-diagnostics.ts\` \`dumpTtyDiagnostics(label)\` — no-op unless \`SENTRY\_INIT\_DIAGNOSTICS=1\`. * **ruzstd partial decompression: must validate output size explicitly**: Trap: \`ruzstd::StreamingDecoder\` (unlike \`zstd::bulk::decompress\`) silently returns a partial result when passed a too-small \`size\` — it does NOT error. Fix: read \`size + 1\` bytes into the output buffer, then assert \`decompressed.len() == size\`; return \`None\` on mismatch. This matches \`zstd::bulk::decompress\` error-on-mismatch semantics. Confirmed via test: exact(560)→Some(560)✓, toosmall(550)→None✓, toolarge(570)→None✓. + +* **Silent nonexistent path in scan — throw ValidationError instead of skip**: Trap: \`scanPaths\` silently skipped nonexistent paths (ENOENT from \`stat\` → \`log.debug\` + continue). Users got empty results with no indication the path was missing. Fix: for explicitly provided paths (not directory children), throw \`ValidationError\` with the path name. Directory children that don't exist are still silently skipped. + * **SQLite transaction() ROLLBACK can throw, discarding original error**: (gotcha) SQLite transaction ROLLBACK error-swallowing trap: In \`src/lib/db/sqlite.ts\`, \`transaction()\` catches errors and runs \`this.db.exec('ROLLBACK')\`. If ROLLBACK itself throws, the original error is lost. Fix: \`const origErr = e; try { this.db.exec('ROLLBACK'); } catch (rbErr) { log.debug(...); } throw origErr;\` @@ -224,6 +248,12 @@ * **symbolic-wasm: JS callback errors silently swallowed via .ok()? — must propagate as JsError**: Trap: using \`.ok()?\` on \`call1\` (JS function invocation) and \`dyn\_into::\()\` looks like idiomatic Rust error-to-Option conversion. Fix: both failures must propagate as \`JsError\` to the caller — thrown JS exceptions yield partial bundles with no feedback; non-\`Uint8Array\` returns (ArrayBuffer, plain arrays) silently skip files. Pattern: capture callback error in \`Option\\` outside closure, set it on failure, check after closure returns. Make \`with\_object\` generic over \`E: From\\` so both error paths unify. Flagged by Cursor Bugbot (Medium) and Sentry Seer (Medium) on PR #991. + +* **Symlink cycle hang in recursive file collection — use lstat + visited-realpath set**: Trap: \`collectFiles\` uses \`stat\` (follows symlinks) with no cycle detection. Directory symlinks pointing to ancestors cause unbounded recursion — never returns. macOS \`.framework\`/dSYM trees routinely contain cyclic symlinks. Fix: use \`lstat\`, skip symlinked directories, and track visited realpaths in a \`Set\` to break cycles. File symlinks are safe to follow. + + +* **Upload assembly \`not\_found\` after deadline is a real failure — must set exit code 1**: Trap: upload assembly only treated \`"error"\` state as failure; \`"not\_found"\` was treated as incomplete (exit 0 with debug log). But after deadline, \`not\_found\` means chunks were never delivered — a genuine failure. Fix: treat \`not\_found\` as failure with \`log.warn\` + exit code 1. Also upgrade deadline-break log from \`debug\` to \`warn\`. Discovered during self-review of \`debug-files upload\` PR. + * **UPX destroys ELF notes — incompatible with Node SEA binaries**: Trap: UPX compresses Node binaries from 99 MiB to 25 MiB and the compressed binary still runs — looks like a huge win. But UPX rewrites the entire ELF structure: original binary has 2 ELF notes (NT\_GNU\_BUILD\_ID + NT\_GNU\_ABI\_TAG), UPX'd binary has 0 notes and 0 sections. NODE\_SEA\_BLOB is stored as an ELF note — UPX destroys it. Fix: use \`strip --strip-unneeded\` instead, BUT only on the plain Node binary BEFORE fossilize SEA injection. After injection, \`strip\` fails with 'section .text can't be allocated in segment 2' — the SEA blob corrupts the ELF section-to-segment mapping. Strip the \`.node-cache/\` binaries before calling fossilize. Saves ~17 MB raw / ~4 MB compressed. Strip is idempotent — already-stripped binaries are unchanged. Recommended order: strip cached Node → fossilize (inject) → binpunch → gzip. @@ -248,7 +278,10 @@ ### Pattern -* **@sentry/symbolic 13.4.0 API: Archive/ObjectFile class contract and WASM memory management**: \`@sentry/symbolic\` 13.4.0 exports \`Archive\`, \`ObjectFile\`, \`FileEntry\`, \`SourceBundleWriter\`, \`SourceFileDescriptor\` + \`initSync\`/default \`\_\_wbg\_init\`. Key contracts: \`Archive.peek(data)\` returns \`string | undefined\` (never \`null\`, never \`"unknown"\`) — consuming code must use \`?? "unknown"\` fallback. \`ObjectFile.codeId\` is \`string | undefined\` (not \`null\`). All 5 classes use \`FinalizationRegistry\` for automatic WASM GC — explicit \`.free()\` / \`\[Symbol.dispose]\()\` still recommended for deterministic cleanup. \`initSync\` guards \`if (wasm !== undefined) return wasm\` — safe to call multiple times. \`ObjectFile\` was renamed from \`Object\` in 13.4.0 (PR #993) — old name shadowed JS global \`Object\` and broke \`initSync\` in \`--target web\` glue. \`@sentry/symbolic/symbolic\_bg.wasm\` subpath is marked \`external\` in both \`script/build.ts\` and \`script/bundle.ts\` esbuild configs — bundler never inlines the WASM blob. +* **@sentry/symbolic 13.4.0 API: Archive/ObjectFile class contract and WASM memory management**: Exported classes: \`Archive\` (\`objects()\`, \`peek()\`, \`fileFormat\`, \`objectCount\`), \`ObjectFile\` (\`debugId\`, \`codeId\`, \`arch\`, \`fileFormat\`, \`hasDebugInfo\`, \`hasSources\`, \`hasSymbols\`, \`hasUnwindInfo\`, \`kind\`, \`debugSession()\`), \`DebugSession\` (\`files()\`, \`sourceByPath()\`), \`SourceBundleWriter\` (\`writeObject()\`, \`collectIl2cppSources\` setter, \`isEmpty\`), \`SourceFileDescriptor\` (\`type\`, \`contents\`, \`url\`, \`path\`, \`sourceMappingUrl\`, \`debugId\`). NOT exposed: \`ObjectLineMapping::from\_object\` (il2cpp line-mapping DIF), BCSymbolMap parsing, UuidMapping plist parsing, embedded Portable PDB extraction. These gaps make BCSymbolMap/il2cpp DIF creation impossible without native tooling. + + +* **@sentry/symbolic release flow: build from master → local tarball → npm publish via craft**: Steps: 1. Checkout \`origin/master\` in \`~/Code/getsentry/symbolic\` — ensures all merged PRs included. 2. Run \`cd symbolic-wasm && bash build-npm.sh\` — compiles wasm32, runs wasm-opt, packs tarball, runs smoke test (3 assertions). 3. Pin CLI \`package.json\` to \`file:\` tarball for local validation — run tests, typecheck, lint. 4. Revert \`file:\` pin before committing — wait for npm publish (craft flow via getsentry/publish). 5. Once new version (e.g. 13.5.0) appears on npm, update \`package.json\` to semver pin. Gotchas: - Merge ≠ publish: PR #997 merged Jun 24 but npm still showed 13.4.0 — always verify \`npm dist-tags\` before committing semver pin. - \`file:\` tarball pin must never be committed to the CLI repo. Verify: - \[ ] \`npm view @sentry/symbolic dist-tags.latest\` shows expected version. - \[ ] Smoke test 3/3 pass including 'debug session enumerates referenced source files'. * **@sentry/symbolic WASM build: always have latest build + initSync pattern**: When working with \`@sentry/symbolic\` WASM in the CLI: (1) always have the latest build before testing — recompile the WASM file if changed; (2) WASM module is loaded via \`initSync({ module })\` passing bytes directly; (3) the module statically imports the generated JS glue (\`symbolic.js\`). The 3-path fallback loader: (1) SEA: \`node:sea.getRawAsset(DIF\_WASM\_ASSET\_KEY)\`, (2) npm bundle: sibling \`dist/vendor/symbolic\_bg.wasm\` via \`import.meta.url\` + \`existsSync\`, (3) dev: \`require.resolve\` from installed package. \`@sentry/symbolic@13.4.0\` introduced \`Archive\` class with \`static peek(data: Uint8Array): string | undefined\` and \`ObjectFile\` class replacing old \`parse\_debug\_file\`/\`peek\_format\` free functions. @@ -271,12 +304,18 @@ * **debug-files shared readDebugFile extracted to read-file.ts — not inlined per command**: \`src/commands/debug-files/read-file.ts\` is the shared async helper for reading DIF files. Throws \`ValidationError\` on ENOENT (\`File '${path}' does not exist.\`), EISDIR (\`Path '${path}' is a directory...\`), and generic read failures. Both \`check.ts\` and \`bundle-sources.ts\` import from \`./read-file.js\`. Pattern: extract shared file-reading logic here rather than duplicating in each debug-files subcommand. + +* **debug-files upload: DIF assemble wire format and chunk-upload pipeline**: Native DIF assemble body: \`{ \[overallSha1]: { name, debug\_id?, chunks: string\[] } }\` — identical shape to \`proguard.ts\`/\`dart-symbols.ts\`. \`debug\_id\` is advisory (server re-parses). Per-file upload: each file chunked as raw bytes via \`hashBuffer\`; primary object selected via \`selectBundledObject\` (first with debug info, fallback to first). Assemble endpoint: \`projects/${org}/${project}/files/difs/assemble/\`. Constants: \`DEFAULT\_MAX\_DIF\_SIZE=2GB\`, \`DEFAULT\_MAX\_WAIT=300s\`. \`--wait\` flag controls whether to poll until assembly completes. Deferred: ZIP scanning, BCSymbolMap/dsymutil, Xcode derived-data, il2cpp mapping (require native tools not available in WASM). + * **debug-files/bundle-sources: SourceBundleWriter is one-shot/consuming — create fresh per call**: \`SourceBundleWriter.writeObject\` in \`@sentry/symbolic\` consumes the writer via \`\_\_destroy\_into\_raw()\` — a single instance cannot be reused. \`createSourceBundle(data: Uint8Array)\` in \`src/lib/dif/index.ts\` creates a fresh writer per call. Returns \`SourceBundleResult | null\` (null = empty bundle, i.e. \`writeObject\` returned \`undefined\`). Wasm boundary masks original JS error messages thrown inside provider callbacks — errors do propagate but message is not preserved; tests should use \`.toThrow()\` without message assertion for provider-error cases. * **Dedupe resolved entity IDs in batch operations before API call**: Batch issue merge (\`src/commands/issue/merge.ts\`): (1) Dedupe by resolved numeric ID after \`Promise.all(args.map(resolveIssue))\` — users may pass same entity as \`CLI-K9\`, \`my-org/CLI-K9\`, or \`123\`. Throw \`ValidationError\` if \`new Set(ids).size < 2\`. (2) Reject \`undefined\` orgs in cross-org check — bare numeric IDs without DSN/config resolve with \`org: undefined\`. (3) Pass \`--into\` through \`resolveIssue()\`; compare by numeric \`id\`, not \`shortId\`. (4) Sentry bulk merge API picks canonical parent by event count — \`--into\` is preference only; warn when API's \`parent\` differs. + +* **DifObjectSources: enumerationError + all-or-nothing files contract**: \`DifObjectSources\` in \`src/lib/dif/index.ts\` has \`enumerationError: string | null\`. When \`debugSession()\` throws, the catch block sets \`enumerationError\` to the error message AND resets \`files\` to empty — never a partial list paired with an error. This all-or-nothing contract is a standing invariant: callers can trust that \`files.length > 0\` implies \`enumerationError === null\`. Footer hint in \`print-sources\` uses \`failed.length > 0\` (any failure) not \`failed.length === objects.length\` (all failed) to trigger the read-error path. + * **getsentry/symbolic: follow-up PR pattern for merged PR review comments**: getsentry/symbolic PR #988 (feat/source-bundle-provider) follow-up to Dav1dde review: \*\*Implemented:\*\* (1) \`write\_object\_with\_filter\_and\_provider\` private inner method takes both filter \`F\` and provider \`P: Fn(\&str) -> Option\\`; public methods delegate to it. (2) \`SharedCursor\` (\`Rc\>>>\`) removed — replaced with plain \`Cursor::new(Vec::new())\` + \`into\_inner()\`. (3) Provider changed from destructive \`contents.remove(path)\` to non-destructive \`contents.get(path).map(|v| v.as\_slice())\`. (4) Tests use \`unwrap()\` not \`-> Result\`. (5) \`smoke-test.mjs\` + \`build-npm.sh\` wiring + \`ci.yml\` \`wasm-smoke\` job added — closes gap where wasm bindings were only compile-checked on PRs. Byte-identity verified: sha256 \`4d29224558f27174fef90e81d5c7e80fd388249f25cb48cfc4c5eb316e3b067b\` identical BASE vs HEAD. @@ -304,6 +343,9 @@ * **Security dep bump pattern: pnpm.overrides for transitive vulns + direct version bumps**: Security dep bump pattern for getsentry/cli: (1) Bump direct deps in \`package.json\`. (2) Add \`pnpm.overrides\` for transitive-only vulns (e.g. \`shell-quote\`, \`qs\`). (3) Add \`pnpm.overrides\` to \`docs/package.json\` for docs-workspace transitive vulns. (4) Run \`pnpm install\` in root, then in \`docs/\`. (5) After lockfile regen, verify no vulnerable versions remain and no previously-patched versions were downgraded — run \`pnpm update \\` to fix regressions. (6) Run \`check:deps\` to confirm no runtime deps added. Pre-existing WARNs about \`@sentry/core\`/\`@sentry/node-core\` patches on \`@sentry/node\`'s nested copies are expected. Trap: \`pnpm install\` can downgrade already-patched transitive deps (e.g. \`hono\` downgraded reintroducing GHSA vuln) — always verify versions post-regen. + +* **selectBundledObject: shared generic helper for first-debug-info-else-first selection**: \`selectBundledObject\(items: T\[], hasDI: (t: T) => boolean): T | undefined\` in \`src/lib/dif/index.ts\` is the single source of truth for 'first object with debug info, fallback to first object' selection. Used by both \`createSourceBundle\` (WASM \`Object\[]\`) and \`print-sources\` (multi-object warning). Chosen over duplicating the heuristic in each consumer — divergence between bundler and inspector is structurally impossible. Generic predicate parameter lets it work with both WASM \`ObjectFile\` and \`DifObjectSources\` arrays. + * **sensitive argv flags must never reach telemetry — redactArgv() in cli.ts**: \`SENSITIVE\_ARGV\_FLAGS = new Set(\['token', 'auth-token'])\` in \`src/cli.ts\`. \`redactArgv()\` replaces values of these flags with \`\[REDACTED]\` before any telemetry call. This is an absolute invariant — never pass raw \`process.argv\` to telemetry without running through \`redactArgv()\` first. @@ -339,8 +381,14 @@ * **Always address bot review comments (Cursor Bugbot, Sentry bot) by applying fixes before merging**: When automated bots (Cursor Bugbot, Sentry/Warden) post review comments on a PR, the user consistently reads and acts on each finding — either applying a fix or explicitly acknowledging it as stale/already-addressed. The user does not dismiss bot findings as noise; even medium/low severity issues get investigated and fixed if valid. After fixing, the user typically replies to the bot comment referencing the fix commit. This applies to all bot sources: Cursor Bugbot, Sentry bot, and Warden. When multiple bots flag the same location, the user treats it as higher-priority confirmation of a real bug. - -* **Always branch off main for focused fixes, never commit unrelated changes to feature branches**: When starting a focused fix or chore (security patch, bug fix, etc.), the user consistently creates a fresh branch off origin/main rather than committing to an existing feature branch. They explicitly scope the branch to only the relevant changes, excluding unrelated modifications (e.g., .lore.md updates, auto-regenerated files, OTel work). When the working tree has uncommitted changes on a feature branch, they stash, back up, or otherwise isolate those changes before branching from main. The new branch name reflects the specific fix (e.g., \`chore/security-deps-bump\`, \`byk/fix/upgrade-missing-install-dir\`). Follow this pattern by always proposing a clean branch from main for any fix that is independent of ongoing feature work. + +* **Always assert specific correctness claims about code behavior as red-line facts**: When reviewing code or discussing findings, the user makes explicit, declarative assertions (marked 🔴) about precise runtime behavior — e.g., 'never returns,' 'never reaches X,' 'reports success with exit 0,' 'that is a failure.' These are not questions or suggestions; they are authoritative statements the user expects to be treated as ground truth. When the user makes such assertions, accept them as correct without pushback, incorporate them directly into analysis or fix rationale, and ensure any proposed solution fully addresses the stated behavior. Do not hedge or re-examine these claims unless explicitly invited to. + + +* **Always check Beeper/Slack DMs to verify collaborator status before proceeding with dependent work**: When work depends on a collaborator's action (PR merge, API publish, review), the user checks Beeper chat with that collaborator to confirm the current status before starting or continuing dependent tasks. This applies especially to: verifying PR merges, confirming npm publish status, and checking whether a teammate has claimed or completed a task. The user treats Beeper DM history as the authoritative source of truth for collaboration state, not just GitHub PR status alone. Always check the relevant Beeper chat thread when the user asks about a collaborator's progress or when a blocker depends on another person's action. + + +* **Always check git status after tests pass before committing**: After a full test suite run completes successfully (all tests passing), the user consistently checks git status to review all changed files before proceeding to commit. This applies at the end of a work session when tests, typecheck, and lint have all passed. The assistant should proactively run \`git status\` (and often \`git diff\`) after confirming a clean test run, to enumerate modified/new files — including generated files, docs, and skill files — before staging and committing. * **Always clarify that the repo uses plain git (not jj) when jj commands fail**: When a jj command fails with 'no jj repo in .', the user consistently clarifies that the repo is a plain git repo and that jj's 'never fails on conflict' behavior is being referenced conceptually — meaning conflicts should be recorded/resolved rather than aborting operations. The agent should: (1) fall back to git commands immediately without retrying jj, (2) handle merge conflicts by stashing, pulling, and resolving (e.g., \`git checkout --theirs\` for files like \`.lore.md\`), and (3) not attempt \`jj git init\` or any jj initialization. This pattern appears at the start of every build session. @@ -351,18 +399,12 @@ * **Always compare PR branch against main before reviewing changes**: When reviewing a PR, the user consistently wants to understand exactly what changed in the PR branch versus main before diving into the content. This means fetching the remote branch if not available locally, running \`git log main..origin/\\` to see commits, and \`git diff\` (with stat) to understand the scope of changes. The user explicitly frames this as needing to know 'what changes were made vs what actually exists on main.' Always establish this baseline diff context first before analyzing or discussing PR content. - -* **Always conduct adversarial read-only code review before approving PRs**: When reviewing a PR, the user runs a structured adversarial review with explicit numbered probes (P1, P2, P3...) and severity-tagged findings (SHOULD-FIX, NIT). The review is strictly read-only — no file modifications, no branch switches. Each probe targets a specific correctness concern (e.g., flag propagation, guard completeness, test regression validity, timing/ordering). The review produces a final verdict (APPROVE/BLOCK) with itemized findings. When acting as reviewer, never modify files, never skip probes even if early ones pass, and always produce a non-empty report with explicit pass/fail status per probe. - - -* **Always conduct adversarial read-only PR reviews as a distinct, structured task**: When the user issues a PR review task, they frame it explicitly as adversarial and read-only: no file modifications, no branch switches, no working tree changes. Reviews must always produce a non-empty report. The user expects the reviewer to deeply understand the PR's intent, enumerate specific code locations and logic paths, identify edge cases and failure modes (e.g., infinite recursion, stale paths, error misrouting), and reference related context (Sentry issues, crash paths, bot comments). Reviews cover correctness, error handling, test coverage, and CI status. Never skip or defer findings — even partial findings must be reported. + +* **Always conduct thorough feasibility analysis before implementing features with native/WASM dependencies**: When approaching features that require native tooling, binary parsing, or WASM compilation (e.g., debug-files commands), the user consistently performs deep upfront feasibility research before writing any implementation code. This includes: enumerating all technical blockers (e.g., memmap2, zstd incompatibility with wasm32), classifying subcommands by complexity tier, evaluating multiple architectural approaches side-by-side, and identifying specific constraints (WASM size, native tool availability, Node SEA environment limits). The user expects the assistant to proactively surface blockers and unknowns rather than jumping to implementation. Provide structured analysis with clear go/no-go signals per feature before proposing code. * **Always conduct thorough PR reviews with severity-classified findings**: PR review standards: (1) Compare branch vs main first (\`git log main..origin/\\`, \`git diff --stat\`). (2) Verify every PR description claim against actual source files at specific line numbers — never trust PR metadata. (3) Classify findings as BLOCKING vs NON-BLOCKING with file paths and line numbers. (4) Flag LLM-generated planning artifacts (e.g., DOCS-AUDIT.md) as blocking violations of repo conventions. (5) Investigate root causes — check bundle output, trace esbuild variable renaming, identify silent regressions. (6) Run relevant check scripts and grep codebase directly rather than reasoning from PR metadata. - -* **Always confirm and send notifications to David (Dav1dde) when PR milestones are reached**: When a significant PR milestone is reached (merge, readiness for handoff, or status update), the user explicitly confirms sending a notification to David (Dav1dde/dav1d) via Beeper chat 557079. The user gives a short confirmation ('yup', explicit instruction) and expects the assistant to open the Beeper chat and send the message. This applies to: merged PRs that unblock David's work, status summaries of completed tracks, and follow-up replies to David's messages. The assistant should draft the message, confirm with the user, then send it via Beeper /open/557079. - * **Always coordinate task ownership with collaborators before starting implementation**: When working on a shared codebase with other contributors (e.g., dav1d), the user explicitly checks who is implementing what before starting work, to avoid parallel duplicate effort. They confirm ownership verbally (via chat/Beeper), claim the task, and only then begin implementation. This applies especially when a collaborator has signaled they might draft something themselves. The user also communicates task boundaries proactively — e.g., 'I'll take the debug-session wasm API so you don't need to draft it.' When coordinating, the user opens the relevant chat tool and sends a message to claim the work before branching or writing code. @@ -372,33 +414,12 @@ * **Always create a dedicated branch when updating fossilize versions**: When a new version of fossilize is released, always create a branch named \`chore/fossilize-{version}\` tracking origin/main, update the dependency, remove any functionality now handled natively by fossilize (e.g., \`stripCachedNodeBinaries()\` removed in 0.7.0), verify the build succeeds, then commit with \`chore: update fossilize to X.Y.Z\`. Follow this exact pattern: branch → update dep → remove superseded code → build verify → commit → PR. - -* **Always create a new branch from main before starting feature work**: When starting a new feature or fix, the user consistently switches to a new branch (e.g., \`byk/feat/...\`) from \`main\` before making any changes. The branch naming convention follows \`\/\/\\`. After completing the work, the user commits all relevant files together, pushes to \`origin\`, and creates a PR. The user also monitors CI checks and reviews automated bot feedback (e.g., Cursor Bugbot, Seer) on the PR, treating high-severity findings as legitimate bugs requiring immediate fixes before the PR is merged. - - -* **Always create follow-up PRs to address post-merge review comments**: When a PR has been merged but reviewers left unresolved comments (correctness bugs, breaking changes, style issues), the user creates a dedicated follow-up PR to address those comments rather than ignoring them or treating them as optional. The user tracks which comments were already self-resolved (e.g., fixed in a prior commit) versus which require new work, and scopes the follow-up PR specifically to the outstanding reviewer concerns. This applies even when the original PR is already merged and CI is green. - * **Always document invariants and non-obvious design decisions as inline code comments or JSDoc**: When implementing functions, types, or test utilities with non-obvious invariants, the user consistently adds explicit inline comments or JSDoc explaining \*why\* a design choice was made — not just what it does. Examples: why a function always returns non-null, why a branch is intentionally omitted, why a specific API is used over an alternative, and what would break if the pattern changed. These comments are written as assertions ('Always returned (never null)...', 'Always restore — never delete.') and reference the downstream consumer or failure mode. When generating or modifying code, always include this kind of explanatory commentary for invariants, idempotency guarantees, and intentional omissions. - -* **Always embed design rationale as inline comments on critical invariants**: When code enforces a non-obvious invariant or safety constraint, the user adds a verbatim explanatory comment directly at the enforcement site explaining \*why\* the rule exists, not just what it does. Examples: 'Always clean up intermediate files, even on failure' at the finally block; 'never target the same path — writing to the source would truncate it and corrupt the output' at the alternating-file logic; 'Always check for Homebrew first — the stored install info may be stale'; 'never emit drain' with the race condition explanation. When writing or modifying code that enforces such invariants, always include a concise inline comment explaining the underlying reason (corruption risk, stale data, hang condition, etc.). - - -* **Always encapsulate cleanup logic in finally blocks and keep it close to the resource that created it**: The user consistently enforces that cleanup of temporary resources (intermediate files, temp ZIPs, temp binaries) happens in \`finally\` blocks — guaranteeing cleanup even on failure. The cleanup code should live in the same function or module that created the resource, not delegated to callers. Examples: \`cleanupIntermediates()\` in \`applyPatchesSequentially()\`'s \`finally\`, temp ZIP deleted in \`finally\` in \`uploadSourcemaps\`, reader/writer cancellation in \`transformPatch\`'s \`finally\`. When refactoring, if a resource's lifecycle moves (e.g., intermediates eliminated by in-memory approach), the cleanup obligation moves with it or is removed entirely — never left as dead code. - - -* **Always enforce cleanup of intermediate files in finally blocks**: The user consistently emphasizes that intermediate/temporary files must always be cleaned up, even on failure. This is enforced via \`finally\` blocks rather than only in success paths. Specifically in patch chain application, intermediate files (e.g., \`${destPath}.patching.a\` and \`${destPath}.patching.b\`) must be deleted in a \`finally\` block so cleanup happens regardless of whether patching succeeds or throws. The user treats this as a non-negotiable design principle, often calling it out explicitly in code comments. When implementing any multi-step file operation that creates temporary files, always place cleanup logic in a \`finally\` block. - - -* **Always enforce read-only adversarial review mode with strict branch isolation**: When starting a code review session, the user consistently issues explicit system-level directives: (1) act as adversarial READ-ONLY reviewer — never modify files, never switch branches, never touch the working tree; (2) never return an empty report; (3) specify the exact repo, working dir, current checkout branch (which must not be switched), and PR branch to review. The user reviews PRs while on a \*different\* parallel branch and accesses PR content exclusively via \`gh pr diff\`, \`gh pr view\`, and \`git show origin/\:\\`. Follow these constraints strictly without prompting for confirmation. - * **Always enforce strict read-only constraints during adversarial PR code reviews**: When the user issues an adversarial code review task, they consistently impose hard read-only constraints: never modify, stage, or commit any tracked files in the working repo; never touch \`.lore.md\`; and place any throwaway test files only inside existing vitest include directories (never \`/tmp\`). They also require mutation testing directives to validate test coverage (e.g., introduce specific bugs and confirm tests fail). The review must never return an empty report. The assistant should treat these constraints as non-negotiable and apply them automatically whenever a PR review is framed as 'adversarial' or 'read-only'. - -* **Always explicitly confirm and narrow scope before implementation begins**: When presented with a problem or plan that contains multiple potential fix paths or related issues, the user explicitly confirms which items are in scope and which are deferred/out of scope before work proceeds. The user treats scope confirmation as a required gate before implementation. When the user marks something as out of scope, the assistant should update the plan to reflect this clearly (e.g., label deferred items explicitly) and not revisit them unless the user re-opens them. Do not assume adjacent issues are in scope just because they are related. - * **Always explore codebase systematically before implementing changes**: When investigating a feature or bug, the user consistently requests reads of multiple related files in sequence before any implementation: the command file, the API layer, the type definitions, the formatters, and any generated/skill files. They want to understand the complete data flow end-to-end (API response → types → command → output/formatter) before making changes. Always read all relevant files across these layers when asked to investigate or analyze a feature, rather than jumping straight to implementation or reading only one file. @@ -408,60 +429,66 @@ * **Always fetch security advisories and Dependabot alerts before creating a fix plan**: The user consistently directs you to run two \`gh api\` commands: one for security advisories and one for Dependabot alerts, using headers \`Accept: application/vnd.github+json\` and \`X-GitHub-Api-Version: 2022-11-28\`. After retrieving the data, you must analyze the vulnerabilities and create a detailed plan for fixing them. This sequence recurs across multiple sessions, demonstrating a preferred workflow for data-driven security remediation planning. + +* **Always fix Biome lint errors after implementation before considering work complete**: After implementing or modifying code, the user consistently runs Biome lint checks and expects all errors to be resolved before the task is done. This includes: running \`biome check --write\` for safe auto-fixes first, then \`--unsafe\` if needed, then manually fixing remaining issues (hoisting regexes to module scope, converting parameter properties to explicit fields, reducing cognitive complexity by extracting helpers, removing non-null assertions, fixing import ordering). The user expects a final clean Biome run (0 errors) followed by a clean TypeScript check (tsc exit 0) before marking work complete. Pre-existing errors in unrelated files are noted but not required to fix. + * **Always flag import framework mismatches as blocking CI issues in PR reviews**: When reviewing PRs, the user consistently identifies test files using the wrong import framework (e.g., \`bun:test\` instead of \`vitest\`) as a BLOCKING issue, not a non-blocking suggestion. This applies when the project has migrated frameworks and all other test files use the new one. The user expects the reviewer/assistant to explicitly label it as blocking (B1, B2, etc.) and distinguish it from non-blocking issues (N1, N2, etc.), using a clear severity classification system in PR review feedback. - -* **Always follow maintainer/reviewer feedback precisely when updating PRs**: When a PR receives review comments from maintainers, the user expects all feedback to be addressed faithfully and completely before proceeding. This includes: refactoring delegation chains as suggested, changing return types per reviewer preference, removing unnecessary abstractions (e.g., SharedCursor), updating tests to match reviewer style (unwrap() instead of Result returns), and deferring out-of-scope items to future PRs. The user treats reviewer direction as authoritative and confirms the direction explicitly before starting implementation. Do not skip or partially address review comments — implement each one fully. - - -* **Always follow red-green-refactor TDD cycle before implementing fixes**: When fixing bugs, the user consistently writes failing tests first (red), confirms they fail with the exact expected error, then applies the fix and verifies tests pass (green), before running typecheck and lint. This applies to all bug fixes, not just new features. The red test must reproduce the actual crash/bug symptom. After green, always run the broader test suite to catch regressions, then typecheck (exit 0) and lint (no fixes applied) before committing. Commit using conventional commit format (\`fix(scope): description\`). - * **Always honor Retry-After header when present in LLM adapter**: (architecture) LLM adapter backoff in \`packages/gateway/src/llm-adapter.ts\`: Always honor Retry-After — \`backoffMs()\` returns \`Math.min(retryAfterMs, cap)\` where cap is \`RETRY\_AFTER\_CAP\_URGENT\_MS=8\_000\` or \`RETRY\_AFTER\_CAP\_BACKGROUND\_MS=120\_000\`. TRANSIENT\_CODES={429,500,502,503,529}; MAX\_RETRIES: rate-limit=3, server=3, urgent=2. Backoff (no Retry-After): 429 background=60s/120s/180s; urgent=min(1000×2^n,4000); 5xx background=min(1000×2^n,8000). Bearer tokens inject \`billingBlock\` as first system block; \`signBody()\` replaces \`cch=00000\` with xxHash64. System prompt caching uses \`cache\_control:{type:'ephemeral',ttl:'1h'}\`. \`opts.thinking\` NOT forwarded to bare API calls. Circuit breaker tripped on non-urgent 429s via \`tripCircuitBreaker()\`. Gateway auth (\`packages/gateway/src/auth.ts\`): \`AuthCredential\` (api-key|bearer). Two-level lookup: \`sessionAuth\` Map → \`lastSeenAuth\` global fallback via \`resolveAuth(sessionID?)\`. \`authFingerprint()\` = SHA-256 truncated to 16 hex chars. * **Always investigate root cause by tracing through multiple specific code layers before accepting a fix**: When facing a runtime bug (especially undefined values from framework internals), the user consistently demands thorough investigation across multiple layers — framework source code (node\_modules), wrapper utilities, bundler config, and call sites — before accepting any fix. The user explicitly rejects surface-level explanations and pushes for tracing the exact code path that produces the unexpected value. Only after exhausting the investigation does the user accept a defensive fix strategy. When directing investigation, the user specifies concrete areas to search (e.g., 4 specific code locations). Always read and analyze the relevant framework internals, not just application code. - -* **Always keep David (dav1d) informed of progress via Beeper DM after completing milestones**: After completing a significant milestone (PR ready, CI green, feature wired in, bug found/fixed), the user consistently shares an update with David via Beeper DM. Messages are casual, concise, and written in BYK's voice — using lowercase, emoji, backticks for code, and a friendly sign-off. The user also checks David's replies before composing follow-ups. When the assistant drafts a message, the user expects it to be sent (or pre-filled for review) before moving on to the next task. This pattern applies specifically to the symbolic-wasm / CLI migration work shared with David. - * **Always keep work-in-progress repos at permanent paths under ~/Code/getsentry/**: When working on a repository across sessions, the user expects it to live at a permanent location under \`~/Code/getsentry/\\` (e.g., \`~/Code/getsentry/symbolic\`, \`~/Code/getsentry/cli\`), not in ephemeral directories like \`/tmp\`. If a clone exists in \`/tmp\`, move it to the permanent location and remove the temporary one. This applies to any \`getsentry\` org repo being actively developed on. Always use the permanent path when referencing or operating on the repo in subsequent steps. + +* **Always maintain a structured plan file before starting implementation**: Before beginning any implementation work, the user creates or references a dedicated plan file (in \`.opencode/plans/\`) that documents scope, design decisions, task breakdown, and deferred items. The plan is read and confirmed before coding starts, and may be updated (e.g., appended with roadmap tiers) during the planning phase. Only after the plan is finalized and explicitly declared complete does the user transition to implementation. Always read the plan file early in the session, confirm its contents, and treat it as the authoritative source of truth for task ordering and design decisions. + * **Always merge PRs despite transient CI failures when root cause is confirmed external**: When all CI failures are confirmed to be caused by external/transient issues (e.g., Anthropic outage, provider unavailability) rather than code problems, the user proceeds to merge the PR without waiting for those checks to go green. The user investigates the failure root cause first, confirms it's unrelated to the code changes, then merges — even if the merge state shows UNSTABLE due to failing checks. This applies specifically when: (1) the failing checks have no code findings, (2) the failures are clearly transient/infrastructure issues, and (3) all code-related checks pass. * **Always migrate Bun-specific APIs and tooling to Node.js equivalents**: Migrating from Bun to Node.js/pnpm. Replace Bun-specific APIs: \`Bun.spawn\`→\`node:child\_process\`, \`Bun.sleep\`→\`node:timers/promises\`, \`bun:sqlite\`→\`node:sqlite\`, \`bun run\`→\`pnpm run\`/\`tsx\`, \`bun.lock\`→pnpm lockfile. All packages in \`devDependencies\` (never \`dependencies\`). Exception: \`script/build.ts\` uses fossilize (not \`Bun.build\`) and stays on Bun for the build-binary CI job. \`script/bundle.ts\` (npm bundle) uses esbuild via tsx and is Node-native. \`packageManager\` field in package.json: \`pnpm@10.11.0\`. After each migration phase, ensure lint and tests pass before committing. Migration is complete as of main branch (bun.lock deleted, vitest.config.ts added, all test files migrated to vitest). + +* **Always monitor CI status until all checks pass or fail before proceeding**: The user consistently polls and tracks CI check statuses after pushing commits or opening PRs, waiting for all checks to reach a terminal state (pass/fail/skip) before moving on. They expect the assistant to actively monitor pending checks (including external gates like \`warden\` and E2E tests), report intermediate states, and surface any bot review comments (Cursor Bugbot, Seer, Sentry bot) that appear during the CI run. Only after all checks are green (or explicitly acknowledged as skipped) does the user consider the PR ready for the next step. + * **Always perform thorough codebase exploration before designing or implementing fixes**: When investigating a bug or feature, the user consistently requests comprehensive upfront exploration across multiple files before any code changes. This includes: reading relevant command and API files completely, searching for all references to key terms/parameters, checking type definitions in SDK/node\_modules, and understanding the full data flow from flags to API calls. The user expects the assistant to map out the entire call chain, identify misleading comments, and surface all related code paths before proposing a solution. Do not jump to fixes — first read all relevant files thoroughly and report findings. * **Always perform thorough quality reviews of PRs distinguishing blocking vs non-blocking issues**: When reviewing PRs, the user expects a structured quality review that: (1) categorizes issues as BLOCKING vs non-blocking/low-priority, (2) verifies each claimed change against the actual codebase, (3) flags LLM-generated planning artifacts (e.g., DOCS-AUDIT.md) that violate repo conventions as blocking issues, (4) checks for missed/inconsistent changes across all affected files, and (5) confirms correct changes are working as intended. The user wants specific file paths and line numbers cited for each issue. Non-blocking issues should still be reported but clearly distinguished from blockers. + +* **Always pin @sentry/symbolic to exact published version before pushing/opening PRs**: When working with @sentry/symbolic in the CLI repo, the user consistently ensures the package.json pins the exact published npm version (e.g., '13.5.0') — not a local file: tarball — before committing, force-pushing, or opening a PR. Local tarball pins (file: specifiers) are used only during development/validation and must be replaced with the exact npm version before the final commit. If a commit was made with a local/stale pin, the user amends it to fold the version bump into the same commit rather than adding a separate bump commit. + * **Always plan systemic fixes with structured multi-problem breakdowns before implementation**: When the user identifies documentation or tooling issues, they consistently organize them as numbered problems with precise file locations, line numbers, and root causes before any code is written. They expect the assistant to engage at the planning level first — proposing detection strategies, fix approaches, and tradeoffs — and to consolidate related problems (e.g., merging overlapping tasks) rather than treating each in isolation. Plans are written to files and iterated on. Implementation only follows after the plan is agreed upon. The user prefers systemic/automated fixes (e.g., derive patterns from package.json) over one-off patches. * **Always prefer systemic/automated solutions over one-off fixes**: When the user identifies errors, gaps, or problems, they explicitly direct the assistant to create or fix systems that prevent the entire class of errors in the future, rather than applying isolated one-off fixes. This applies especially when evaluating code quality, reviewing PRs, or addressing bugs. The user wants automated checks (e.g., CI steps, lint rules, scripts) and general solutions that scale, not patches that only address the immediate symptom. When planning or executing fixes, always ask: 'Can this be automated or systematized?' and prefer that approach. - -* **Always proceed with implementation immediately when stating 'I need to move forward'**: When the user says 'I need to move forward' (or equivalent), they are signaling to stop discussing alternatives and immediately execute the next concrete action. This phrase appears when the user has heard enough analysis/options and wants the assistant to proceed with implementation without further deliberation. Do not offer more options, ask clarifying questions, or summarize tradeoffs — just start doing the work. The user has already made their decision and wants forward momentum on the current priority task. + +* **Always provide exact code context and error details when issues arise**: When the assistant encounters a tool failure, formatting problem, or code error, the user consistently supplies exact file content, error messages, and specific directives to enable precise fixes. This includes pasting relevant code snippets, exact error output, and detailed instructions for replacement or refactoring. The user expects the assistant to use this information to accurately identify and correct issues without guessing, preferring concrete context over assumptions. * **Always read and document full file details before proceeding with analysis or implementation**: When exploring a codebase, the user consistently reads files in full and records comprehensive structured details: exact line counts, all imports, every exported type/interface with their fields, all constants, all function signatures with their logic, and any notable comments or assertions. This applies to both source files and build/tooling scripts. The user expects the assistant to capture and reference these details precisely rather than summarizing loosely. When examining related files (e.g., a module and its consumers), the user reads each completely before drawing conclusions. This pattern applies during architecture exploration, feature planning, and documentation generation tasks. + +* **Always read existing similar command implementations before writing new ones**: Before implementing a new CLI command, the user systematically reads multiple existing command files in the same command group (e.g., \`check.ts\`, \`bundle-jvm.ts\`, \`bundle-sources.ts\`) plus related utilities (\`read-file.ts\`, \`resolve-target.ts\`) to extract reusable patterns: auth settings, positional/flag definitions, error handling conventions, shared utilities, exit code patterns, and output types. Follow this pattern by surveying existing sibling commands and shared libs before generating new command code, and mirror their conventions exactly (e.g., \`auth: false\`, \`ValidationError\` usage, \`CommandOutput\` yield pattern, hint strings). + + +* **Always read existing test files before writing new tests to match established patterns**: Before writing tests for a new feature, the user reads the existing test file for the same command or a closely related command to understand the exact testing conventions in use: temp dir setup (mkdtemp/rm vs useTestConfigDir), fixture naming, how the app runner is invoked, how stdout is captured, and what assertion style is used. Apply the same patterns (same helpers, same fixture format, same run() call shape) when writing new tests rather than inventing a new structure. + * **Always read source files thoroughly before asking questions or making changes**: The user consistently reads full source files (often 400-900+ lines) and traces complete data flow pipelines across multiple modules before taking action. They examine types, constants, function signatures, and cross-module dependencies in depth. They do not ask clarifying questions upfront — instead they investigate the codebase themselves to build a complete mental model. When helping this user, proactively read all relevant files in full, trace imports and data flows end-to-end, and present comprehensive findings rather than asking what they want to know. Assume they want the full picture, not a summary. * **Always record detailed post-merge state in session observations including squash commit hash, adjacent commits, and completed todo items**: After a PR is merged, the user consistently logs: (1) the squash commit hash and full commit message, (2) the names of adjacent commits on main at merge time, (3) confirmation that all todo/task items are completed with their descriptions, and (4) a technical summary of what the fix actually does. This pattern appears in session observations marked with 🟡 timestamps. When assisting with PR workflows, ensure post-merge summaries capture the squash SHA, neighboring commits on main, task completion status, and a concise fix description — not just a simple 'merged' confirmation. - -* **Always record detailed session observations with timestamped bullet points**: The user consistently documents work sessions as structured observation logs with emoji status indicators (🟡 in-progress, 🔴 decision/blocker), timestamps in (HH:MM) format, and granular bullet points covering: design decisions made and alternatives rejected, exact file paths and line numbers changed, compile/test results with timing, and explicit rationale for choices. These logs appear to be maintained as a persistent record across sessions. When working, always produce or update this style of log — capturing not just what was done but why alternatives were rejected, exact technical details (commit hashes, line numbers, timing), and outcome verification (test counts, build success). - * **Always record precise pre-edit state before making code changes**: Before any code modification, the user expects a thorough read and documentation of the exact pre-edit state: function signatures, line numbers, types, logic flow, imports, and any bugs or design decisions. This applies especially when refactoring — the user wants the assistant to capture what exists (e.g., 'Pre-edit \`loadOldBinary\` bug: ...', 'Pre-edit \`OldFileHandle\` type: ...') before proposing or applying changes. The pattern also includes noting which exports/tests reference the code being changed. This ensures the assistant has full context and can reason about regressions, API compatibility, and test impacts before touching anything. @@ -469,17 +496,14 @@ * **Always reference external tools and prior art when exploring build/size optimization approaches**: When investigating build pipeline improvements or binary size reduction, the user consistently references specific external tools, repos, and contacts (e.g., Vercel's build-binary.mjs, binpunch, fossilize, Melkey's work) as starting points for evaluation. They expect the assistant to analyze whether each referenced approach actually applies to their specific setup before recommending it. The user wants a clear breakdown of what's relevant vs. irrelevant given their actual architecture (e.g., 'we already use esbuild full bundling, so node\_modules stripping doesn't apply'), followed by concrete alternative opportunities ranked by impact. -* **Always reply to automated review bot comments after fixing the reported issue**: When an automated review tool (e.g., Cursor Bugbot, Seer Code Review) leaves an inline comment identifying a bug or issue, the user expects a reply to that specific comment after the fix is committed. The reply should reference the fix commit hash and briefly explain what was corrected and why the original code was problematic. This applies to inline PR review comments from bots, not just general PR-level feedback. Always post the reply using the comment's ID before considering the fix workflow complete. - - -* **Always request adversarial, read-only code reviews with explicit scope boundaries**: When asking for code reviews, the user consistently specifies: (1) READ-ONLY mode — never modify files, switch branches, or alter the working tree; (2) a specific PR number and repo; (3) explicit focus areas (e.g., behavior parity, error handling, type correctness, test quality, edge cases); (4) the review must never be empty even if findings are partial. Always use \`gh pr diff\`, \`gh pr view\`, and \`git show\` to read PR content without checking out. Deliver a critical, objective report covering all specified focus areas. - - -* **Always request critical self-review of PRs before considering them ready to merge**: PR review workflow: (1) After completing a PR, always do a critical self-review (subagent for objectivity) before merge. Structure: severity levels (Critical/High/Medium/Low), actionable findings, cover correctness/safety/CI wiring/docs/changelog. (2) Address all bot/automated review comments (Cursor Bugbot, Sentry bot) as actionable work items — enumerate, investigate, fix, verify no regressions before merge. (3) Verify PR merge status (MERGED state, all checks passed, remote branch deleted) before starting follow-up work. (4) When reviewing PRs, verify factual claims (e.g. 'no existing code changed') with concrete tool-based evidence (git diff, file reads) — never accept at face value. (5) After requesting a merge, confirm merge commit OID, mergedAt, mergedBy, and final state. When blocked, explain why (ruleset vs. branch protection, self-approval restrictions). Post-merge: run verification tests, post results as PR comment, then merge. \`gh pr view --json merged\` field unavailable — use \`mergeCommit\`, \`mergedAt\`, \`mergedBy\`, \`state\`. +* **Always reply to automated review bot comments after fixing the reported issue**: After fixing a Bugbot-reported issue and pushing the fix commit, the user replies directly to the Bugbot inline comment thread with a message that references the fix commit hash and briefly explains what was done. This applies to each individual Bugbot finding that was addressed. The reply should include the commit SHA, confirm the fix, and optionally summarize the resolution. Do not just push the fix silently — always close the loop by posting a reply to the specific Bugbot comment thread. * **Always request READ-ONLY critical code reviews with explicit focus areas**: When asking for a code review, the user consistently specifies: (1) it is READ-ONLY — no file modifications; (2) it is 'critical' or 'objective'; (3) the exact repo path, branch, and base branch; (4) the specific files under review; (5) explicit focus areas that CI cannot catch (e.g., correctness, memory management, format compatibility, error handling, convention compliance). Always treat these reviews as advisory-only, never modify files, and structure feedback around the user-specified focus areas rather than general style or formatting concerns. + +* **Always request research/review phases before implementation or modification**: The user consistently separates research/review from implementation. Before writing or modifying code, they explicitly request a study or review phase (e.g., 'research/review ONLY — no file modifications') to understand existing patterns, analogs, and infrastructure. They provide relevant context files and comparable implementations as reference material. Only after this research phase do they proceed to implementation. When reviewing, the assistant should read and analyze without making changes, then summarize findings, design decisions, and potential issues before any code is written. + * **Always require a critical self-review before merging any PR**: Before merging any PR, the user consistently requires a thorough critical self-review of both the code and the PR description, explicitly preferring a subagent for objectivity/adversarial perspective. The subagent must be read-only (no file modifications, no branch switching). The review should cover: correctness of logic, accuracy of PR description claims, edge cases, regression test adequacy, and any stale/aspirational statements. The review must always produce a report (never empty). Issues found should be fixed before merging. This is a non-negotiable pre-merge gate, not optional. @@ -489,11 +513,11 @@ * **Always respond to adversarial/bot review findings by applying concrete fixes and re-running mutation/regression tests to verify**: When a code review (adversarial subagent, Cursor Bugbot, Seer, or similar) identifies a vacuous test, inaccurate comment, or swallowed error, the user expects: (1) the specific finding to be fixed immediately (strengthen the test assertion, correct the comment, add a tradeoff note), (2) mutation testing or targeted re-runs to confirm the fix now catches the previously-uncaught mutation, and (3) a full test+lint+typecheck pass before committing. The user also expects replies posted to bot review comments acknowledging the fix with the commit SHA. Do not defer or mark findings as 'low priority' — apply all SHOULD-FIX items before committing. - -* **Always review drafted messages before sending, then explicitly approve sending**: When the assistant drafts a Slack/Beeper message on the user's behalf, the user expects it to be pre-filled or staged for review — not auto-sent. The user then explicitly approves sending (e.g., selecting an option, confirming intent). The assistant should never auto-send communications without a clear send instruction from the user. Once the user gives explicit approval (e.g., 'send it', selecting a send option), the assistant should proceed to send immediately. + +* **Always run Biome auto-fix first before manually addressing lint errors**: When lint errors are found, the user expects the assistant to first attempt Biome's auto-fix (\`biome check --write\` or equivalent) to resolve formatting and safe-fixable errors automatically, then assess what manual fixes remain. The user does not want manual edits applied to formatting issues that Biome can handle automatically. After auto-fix, remaining errors (non-formatting issues like \`noParameterProperties\`, \`noEvolvingTypes\`, complexity violations) are addressed manually. The workflow is always: run auto-fix → verify results → fix remaining errors manually → confirm clean lint exit code 0. -* **Always run lint and typecheck after code changes, then fix all errors before proceeding**: After implementing or modifying code, the user consistently runs the full lint (Biome) and typecheck (tsc) pipeline and expects all errors to be resolved before moving on. When lint errors are found (e.g., \`noParameterProperties\`, complexity violations), the user applies fixes immediately and re-runs checks to confirm a clean exit. The pattern is: implement → lint → fix lint errors → re-lint to confirm clean → typecheck → confirm clean → run tests. The user does not consider a task complete until lint, tsc, and tests all pass with zero errors. +* **Always run lint and typecheck after code changes, then fix all errors before proceeding**: After each code modification, the user consistently executes the project's typechecker (tsc), linter (Biome), and test suite (vitest) and shares the complete output with the assistant. This iterative edit-verify-fix cycle allows the assistant to catch and correct any issues—such as type errors, lint violations, or failing tests—before proceeding. The user expects the assistant to diagnose the root cause of all reported failures and apply targeted fixes, then re-run verification until all checks pass cleanly. This pattern maintains high code quality and prevents regressions throughout development. * **Always sequence npm publication before CLI integration (no vendored blobs)**: Always sequence npm publication before CLI integration (no vendored blobs): Publish upstream packages to npm first through the proper release pipeline before integrating into the CLI. Correct sequence: (1) create and merge full upstream PR including CI, build, and craft/npm targets, (2) wait for npm publish, (3) open/merge CLI PR consuming it as a proper npm dependency. CLI PR #1109 (MERGED): \`@sentry/symbolic@13.3.0\` consumed as devDependency (pinned exactly — no \`^\` or \`~\` for Sentry-scoped packages). 3-path wasm loader: (1) SEA: \`node:sea.getRawAsset(DIF\_WASM\_ASSET\_KEY)\`, (2) npm bundle: sibling \`dist/vendor/symbolic\_bg.wasm\` via \`existsSync(new URL('./vendor/symbolic\_bg.wasm', import.meta.url))\`, (3) dev: \`readFileSync(\_require.resolve(SYMBOLIC\_WASM\_SUBPATH))\`. \`@sentry/symbolic/symbolic\_bg.wasm\` marked \`external\` in esbuild (both \`build.ts\` and \`bundle.ts\`). Vendored crate \`crates/dif-wasm/\` and \`src/lib/dif/vendor/\` fully deleted. @@ -501,9 +525,6 @@ * **Always stage all modified files before committing, not just already-staged ones**: When preparing to commit, the user reviews git status and expects ALL modified files to be staged together — not just files already in the index. If unstaged modified files exist alongside staged ones, the user treats this as an incomplete commit state that needs to be resolved before proceeding. The user reviews the full list of changed files (staged + unstaged) as a checklist against completed tasks, and expects the commit to encompass all related changes from the session as a single coherent unit. - -* **Always stage and commit \`.lore.md\` alongside code changes**: When committing code changes, the user consistently stages and includes \`.lore.md\` updates in the same commit. This is treated as a repo rule, not optional. When preparing commits, always ensure \`.lore.md\` is updated with relevant session observations and staged alongside the code files. The commit message follows conventional commit format (e.g., \`feat(scope): description\`). Do not create commits that omit \`.lore.md\` when it has been updated during the session. - * **Always start with thorough codebase exploration before planning or implementing changes**: When the user wants to add a feature or make a change, they consistently begin by identifying all relevant code locations first — type definitions, default values, API layer, command files, schemas, and tests — before writing any code. They explicitly enumerate investigation targets upfront. This applies to both small changes (adding a sort value) and larger features (new commands). The assistant should treat the initial request as a research/mapping task, perform parallel searches across all relevant files, and present a comprehensive picture of the existing implementation before proposing any changes. Confirmed across many sessions including issue sort addition (June 22 2026). @@ -513,39 +534,24 @@ * **Always switch from plan mode to build mode before executing changes**: The user consistently uses a two-phase workflow: first planning (read-only exploration, writing a plan file), then explicitly approving a switch to build/agent mode before any changes are executed. When the user approves the mode switch, the assistant should immediately begin executing the existing plan file — typically by re-reading the key files to be modified. Never execute changes while still in plan mode, even if the plan is complete and approved. Wait for the explicit mode-switch approval before acting. - -* **Always track implementation progress in \`.lore.md\` as a living project log**: The user maintains a \`.lore.md\` file as a persistent, append-only session log that records what was built, why decisions were made, and what remains. After completing meaningful work (new commands, bug fixes, refactors, CI fixes), the user stages and commits \`.lore.md\` alongside the code changes. The file captures: file-level diffs with line counts, key implementation details (types, constants, logic), tool/library choices, and task queue state. When pushing PRs, \`.lore.md\` is included in the changeset. The assistant should treat \`.lore.md\` updates as a required step after any significant implementation work, not optional documentation. - * **Always track migration progress with explicit completion criteria and remaining blockers**: The Bun→Node migration is complete only when \`Bun.build({ compile: true })\` is replaced by fossilize in \`script/build.ts\`. As of the current session, \`script/build.ts\` already uses fossilize (\`--no-bundle\`, \`--out-dir dist-bin\`, \`--node-version lts\`) with esbuild for bundling — the migration is complete. NODE\_VERSION='lts' in build.ts. The user expects the assistant to track this state across sessions and confirm the migration is done. When resuming sessions, verify \`script/build.ts\` does not contain \`Bun.build({ compile: true })\` before declaring migration complete. * **Always track pre-existing failures separately from introduced regressions**: When running tests, the user consistently distinguishes between failures that existed before their changes and failures caused by their changes. They verify pre-existing failures by checking out main/stashing changes and confirming the same failures reproduce. Only new failures introduced by the current branch are treated as actionable. When reporting test results, always clarify which failures are pre-existing (with evidence) versus newly introduced, and never treat pre-existing failures as blockers for the current fix. - -* **Always track project state through structured session observations before acting**: The user consistently works across multiple sessions on long-running, multi-PR projects (e.g., symbolic-wasm, debug-files CLI). Before taking action, they review prior session context and orient around the current state of PRs, branches, and design decisions. They expect the assistant to fetch and summarize PR/code details proactively, record decisions and findings as structured observations, and maintain continuity across sessions. When resuming work, always re-establish context (branch state, open PRs, pending items) before proceeding with implementation or review tasks. - * **Always use Beeper deep-link /open/557079 to contact David**: When the user needs to send a message to David (dav1d/Dav1dde), they consistently use the Beeper deep-link command \`/open/557079\` to open the DM chat. This applies whenever coordinating on API availability, confirming implementation details, sharing PR status, or asking blocking questions. The assistant should use this exact deep-link path without prompting the user for contact details or confirmation of how to reach David. * **Always use specific file paths when staging git commits, never \`git add -A\`**: When staging files for a commit, the user explicitly lists individual file paths rather than using \`git add -A\` or \`git add .\`. This avoids accidentally including untracked files (e.g., stray markdown files, \`.lore.md\` conflicts) or unintended changes. When a rebase or merge goes wrong due to bulk staging, the user aborts and re-applies changes manually with targeted \`git add \\` calls. Always recommend staging by explicit file path, and warn against broad staging commands that could sweep in unrelated files. - -* **Always validate changes with local builds and tests before committing**: Before committing or finalizing a branch, the user consistently runs local validation: native tests (cargo test), target-specific builds (e.g., wasm32-unknown-unknown), fmt checks, and clippy. This applies especially after non-trivial changes like dependency swaps, cfg-gating, or CI workflow additions. The assistant should proactively run these checks (or prompt the user to) before declaring work done, and report pass/fail counts and timing. Only after all checks pass should files be committed. - * **Always validate published package artifacts via pack-install smoke tests, not just unit/wasm-pack tests**: When working on WASM/npm packages, the user consistently insists on validating the actual shipped artifact (via \`npm pack\` → install tarball → import → \`node:test\`) rather than relying on \`wasm-pack test\` or similar build-time tests. The user understands that \`wasm-pack test\` builds its own glue and never exercises the \`--target web\` \`symbolic.js\` + \`initSync\` + exports map that consumers actually load. When bugs are found (e.g., class name shadowing JS globals), the user attributes the miss to this gap and adds a packaged-artifact smoke test as a permanent fixture. Always recommend or implement pack-install smoke tests for npm/WASM packages, not just wasm-pack or unit tests. * **Always verify all tasks are complete before committing, then commit with descriptive conventional commit messages**: Before committing, the user reviews a task checklist to confirm all items are completed or in-progress. They stage all relevant files, then commit with a conventional commit message (e.g., 'docs: fix stale Bun references and add systemic doc checks') that summarizes the scope of changes. The commit message reflects the primary theme of the work session. The user expects the assistant to help verify task completion status, check git status, and confirm the commit succeeds with a summary of files changed and insertions/deletions. - -* **Always verify branch state and confirm key artifacts before pushing or sharing work**: Before pushing a branch or sharing it (e.g., opening a PR, force-pushing, tagging for review), the user consistently confirms: the branch base/HEAD commit hash, the diff summary (files changed, insertions/deletions), that tests/smoke tests pass with explicit counts, and that key generated artifacts (e.g., tarball name, wasm size, renamed symbols) are present at expected locations. This applies after rebases, after fixes, and before handing off to collaborators. Always surface these confirmation details proactively: branch name, HEAD commit, diff stats, test results, and artifact names/sizes. - - -* **Always verify byte-identity of critical code paths before merging refactors**: When a refactor touches existing code paths (especially serialization/output functions like \`write\_object\`), the user requires empirical byte-identity verification: build both BASE and HEAD, run the same operation, and compare sha256 hashes of the binary outputs. This applies even when source diffs show no deletions. The user expects the assistant to: (1) identify the correct test harness/insertion point, (2) run the actual builds, (3) compare hashes explicitly, and (4) report results with sha256 + byte count. Results are typically posted as PR comments before merging. This pattern applies to WASM builds, Rust crate outputs, and any refactor claimed to be 'purely additive' or 'behavior-identical'. - * **Always verify CI passes after pushing commits to PRs**: After pushing commits to a PR, the user consistently checks CI status and waits for all checks to go green before considering the work done. This includes checking each individual CI job (linting, tests across platforms, doc comments, etc.), investigating any failures locally to reproduce them, and pushing fixes if needed. The user treats a clean CI run as a required completion criterion for each PR push, not an optional step. When CI reveals issues (e.g., broken intra-doc links, version mismatches, merge conflicts blocking workflow triggers), the user immediately diagnoses and fixes them. CRITICAL: if CI jobs are entirely absent (not failing — just missing), check \`mergeable\`/\`mergeStateStatus\` first — merge conflicts silently suppress \`pull\_request\` workflow triggers. @@ -561,26 +567,29 @@ * **Always verify compilation after applying code changes**: After making code changes (edits, refactors, new impls, or fixes), the user consistently runs a compile/check command (e.g., \`cargo check\`, \`cargo check --target wasm32-unknown-unknown\`, \`cargo check -p \ --all-features\`) to confirm the changes compile cleanly before proceeding. If compilation fails, the user investigates the root cause, applies a fix, and re-runs the check until it passes. This applies to both library crates and wasm targets. The assistant should proactively suggest or run the appropriate compile check after any code modification, and report the result (pass/fail, duration, any warnings or errors). + +* **Always verify fix targets by reading full file content before applying changes**: Before making any code edit, the user retrieves and reads the full file content to confirm the exact line numbers, surrounding context, and precise condition to change. This applies especially when acting on bugbot/linter findings — the user does not trust the reported line number alone but validates the actual code structure first. When proposing or applying fixes, always confirm the specific line and expression to change by referencing the retrieved file content, not just the issue description. + * **Always verify lint and typecheck pass after code changes before committing**: Always verify lint and typecheck pass after code changes before committing: Run Biome lint and typecheck after implementing or modifying code. When lint errors appear, verify whether pre-existing (use git stash) vs. newly introduced — fix only newly introduced issues. Expect clean lint output (0 errors) and passing tests before committing or pushing. Use \`npmx.dev\` (not npmjs.com or npmx.com) when searching for existing npm packages. + +* **Always verify npm publish status before assuming merged code is available**: When working with packages (especially \`@sentry/symbolic\`), the user consistently checks whether a merged PR has actually been published to npm before proceeding. They inspect dist-tags, recent version lists, installed package file contents, and exported class/method surfaces to confirm the installed version matches expectations. If the needed API (e.g., \`debugSession()\`) is only on \`master\` but not yet published, they treat it as unavailable and work around it. Always check \`npm dist-tags\`, installed \`.d.ts\` exports, and actual \`node\_modules\` file contents rather than assuming merged == published. + * **Always verify PR claims against actual codebase before accepting changes**: When reviewing a PR, the user consistently directs the assistant to check each stated claim against the real source files on the main branch rather than trusting the PR description or commit messages. This applies especially to documentation PRs: the user wants specific file paths, line numbers, and code excerpts cited as evidence. The user also cross-checks automated tooling (scripts, CI configs) against what they actually produce. When a PR introduces fixes, the user wants confirmation that the underlying problem genuinely existed and that the fix is correct — not just that the PR author says so. Always run the relevant check scripts and grep the codebase directly rather than reasoning from PR metadata alone. - -* **Always verify PRs are purely additive (zero deletions) before pushing**: Before force-pushing or finalizing a PR, the user explicitly verifies that no existing code lines were deleted — only new lines added. This involves running \`git diff base..HEAD | grep -E '^-\[^-]'\` or checking diff stats for zero deletions, and confirming byte-identity of unchanged files by removing new additions and diffing against base. The user treats any unintended deletion as a blocker requiring a fix before the PR is considered ready. This pattern applies especially when refactoring is involved and the stated goal is a purely additive change. - - -* **Always verify tests catch real bugs via TDD regression check (revert-to-fail)**: After implementing a feature and writing tests, the user consistently performs a TDD regression verification: temporarily reverts or breaks the implementation to confirm the new tests actually fail as expected, then restores the fix. This validates that tests are genuinely testing the behavior rather than passing vacuously. The assistant should proactively perform this revert-to-fail check after writing tests, report which tests failed and with what values, then restore the correct implementation before declaring the work done. - * **Always work around the worktree conflict error when merging to main**: When merging PRs locally, the user consistently encounters \`fatal: 'main' is already used by worktree at ...\` and expects a workaround to be applied automatically rather than treating it as a blocking error. The merge is always completed successfully despite this error (e.g., using \`gh pr merge\` via CLI or other workaround). Never stop or report failure when this specific worktree conflict appears — proceed with the merge using an alternative method and confirm the PR was merged successfully. * **Always work from a structured plan file before executing multi-step tasks**: When tackling multi-step or multi-file changes, the user consistently creates a formal plan file (e.g., \`.opencode/plans/\-\.md\`) during a planning phase before any edits are made. The plan enumerates discrete numbered tasks with priorities and target files. Execution only begins after the user explicitly approves the plan. During execution, tasks are marked in\_progress and completed sequentially. The user expects this plan-then-execute workflow to be followed strictly — no file edits during planning, and tasks tracked against the approved plan. + +* **Always work through a numbered task list in a single session when tackling multi-step changes**: When the user has a multi-step change (e.g., build artifact, re-pin dependency, refactor code, update types/tests, verify, lint), they establish an explicit numbered task list with priorities and statuses (in\_progress/pending) at the start of the session and work through all items sequentially in that same session. Follow this pattern by tracking task state explicitly, completing items in order, and not splitting related work across sessions unless blocked. When reporting progress, reference the task number and update its status. + -* **Always write tests after implementing new modules or features**: After implementing a new module or integrating a feature, the user consistently adds corresponding tests — both a dedicated test file for the new module (e.g., \`semantic-display.test.ts\`) and additional tests in existing test files for integration points (e.g., new describe blocks or test cases in \`local.test.ts\`). The user also reads existing test files first to understand patterns before writing new tests. Tests are added as a required step in the todo list, not as an afterthought, and are followed by typecheck/test runs to verify correctness. +* **Always write tests after implementing new modules or features**: After implementing a new CLI command file, the user consistently requests a corresponding test file. Tests cover validation paths, flag combinations, error states, happy paths, and env var management. The user expects tests to follow established patterns from similar existing test files (e.g., proguard upload tests, check tests), use \`vi.spyOn\` for API mocking, and use the project's standard test runner patterns (\`run(app, ...)\` or direct loader invocation). The user treats test writing as a required step in the implementation workflow, not optional. * **Bot review triage: distinguish real bugs from SDK-mirroring false positives**: When Sentry Seer or Cursor Bugbot flags 'unusual' code that intentionally mirrors upstream SDK behavior (e.g., \`http\_proxy\` as last-resort fallback for HTTPS URLs — deliberate in \`@sentry/node-core\` \`applyNoProxyOption\`), decline with a written rationale referencing the SDK source rather than silently changing behavior. Verify against \`node\_modules/@sentry/node-core/build/esm/transports/http.js\`, post a reply explaining the precedent, and resolve the thread. Real bugs (uppercase env var support, whitespace trimming, wildcard handling) get fixed; SDK-mirroring 'bugs' get explained and dismissed. Removing the mirror creates a divergence where users get different proxy semantics from our transport vs. the SDK default. @@ -591,14 +600,14 @@ * **Follow the established git workflow (branch, PR, review)**: Behavioral pattern detected across 6 sessions (action: enforced-workflow). The user consistently demonstrates this behavior. + +* **Never push or create PRs without explicit user confirmation**: The user expects the assistant to pause before pushing branches or opening PRs, even when those are logical next steps in a workflow. The assistant should complete local work (commits, fixes, tests) and then explicitly wait for the user to say 'push' or 'open a PR' before doing so. This applies even when AGENTS.md or task lists include PR creation as a step — treat it as blocked until the user confirms. Always surface the pause point clearly so the user knows action is pending their approval. + + +* **Never treat incomplete operations as successful — always surface silent failures**: When reviewing code, the user explicitly asserts that incomplete operations must never report success. Examples from \`debug-files upload\`: \`not\_found\` after deadline is a failure (exit 1), not success (exit 0). Symlink cycles must not hang silently. Missing requested IDs must be surfaced. Any failure path that silently exits 0 or produces no diagnostic is a blocker. + * **Never uses form-data at runtime — only dev transitive dependency via @types/node-fetch**: User stated never uses form-data at runtime — only dev transitive dependency via @types/node-fetch. * **Prefers Bun-native APIs; use buildCommand from lib/command.js (never @stricli/core directly); use buildRouteMap from lib/route-map.js; silent catch blocks prohibited; every new src/lib/\*\*/\*.ts must start with module-level JSDoc; test isolation via useTestConfigDir(); prefer property-based and model-based tests over unit tests; DEFAULT\_NUM\_RUNS = 50; architecture tree documented; error exit code ranges: 1x=auth**: Project conventions (AGENTS.md): use \`pnpm run\`/\`pnpm install\`/\`pnpm add -D\` (NOT bun for package management); use buildCommand from lib/command.js (never @stricli/core directly); use buildRouteMap from lib/route-map.js; silent catch blocks prohibited — every catch must re-throw, log with log.debug()/log.warn(), or return fallback WITH a log.debug() call; every new src/lib/\*\*/\*.ts must start with module-level JSDoc; test isolation via useTestConfigDir(); prefer property-based and model-based tests over unit tests; DEFAULT\_NUM\_RUNS=50; error exit code ranges: 1x=auth, 2x=input/config, 3x=API/network, 4x=feature/billing, 5x=operations, 6x=command-specific. Testing: vitest + fast-check (NEVER bun:test). All packages in devDependencies (CI enforces via \`pnpm run check:deps\`). NEVER merge a PR if CI is failing unless explicitly told to ignore. Always use \`pnpm add -D \\` — never add to \`dependencies\`. - - -* **Respect explicitly rejected approaches**: Behavioral pattern detected across 3 sessions (action: rejected-approach). The user consistently demonstrates this behavior. - - -* **Review code before committing**: Behavioral pattern detected across 3 sessions (action: requested-review). The user consistently demonstrates this behavior. diff --git a/docs/src/content/docs/contributing.md b/docs/src/content/docs/contributing.md index 8a07ef96f..5e55c7b07 100644 --- a/docs/src/content/docs/contributing.md +++ b/docs/src/content/docs/contributing.md @@ -58,7 +58,7 @@ cli/ │ │ ├── code-mappings/# upload │ │ ├── dart-symbol-map/# upload │ │ ├── dashboard/ # add, create, delete, edit, list, restore, revisions, view -│ │ ├── debug-files/ # bundle-jvm, bundle-sources, check, print-sources +│ │ ├── debug-files/ # bundle-jvm, bundle-sources, check, print-sources, upload │ │ ├── event/ # list, send, view │ │ ├── issue/ # archive, events, explain, list, merge, plan, resolve, unresolve, view │ │ ├── local/ # run, serve diff --git a/docs/src/fragments/commands/debug-files.md b/docs/src/fragments/commands/debug-files.md index 1056862ff..90986a2a4 100644 --- a/docs/src/fragments/commands/debug-files.md +++ b/docs/src/fragments/commands/debug-files.md @@ -25,6 +25,17 @@ sentry debug-files bundle-jvm --output ./out --debug-id --exclude generat # Output as JSON sentry debug-files bundle-jvm --output ./out --debug-id --json ./src + +# Upload debug information files (scans directories recursively) +sentry debug-files upload ./build +sentry debug-files upload ./libexample.so --include-sources + +# Restrict by type or debug id, and wait for server-side processing +sentry debug-files upload ./dsyms --type dsym --wait +sentry debug-files upload ./build --id --require-all + +# Preview what would be uploaded without uploading (no credentials needed) +sentry debug-files upload ./build --no-upload ``` ## Important Notes @@ -44,6 +55,16 @@ sentry debug-files bundle-jvm --output ./out --debug-id --json ./src files that are not present locally are skipped; it exits non-zero (writing nothing) when none are found. The bundle defaults to `.src.zip` and is uploaded via `sentry debug-files upload`. +- `upload` scans each path (files or directories, walked recursively) for + native debug information files, parses them in-process, and uploads matching + files via the chunk-upload protocol. Use `--type`/`--id` to restrict which + files are sent, `--no-debug`/`--no-unwind`/`--no-sources` to drop files whose + only useful feature is the named one, and `--include-sources` to attach a + source bundle per file. `--no-upload` previews the selection without + credentials; `--wait`/`--wait-for` block on server-side processing and exit + non-zero if any file fails. `--require-all` fails if a requested `--id` was not + found. Scanning inside ZIP archives, `--symbol-maps`, `--il2cpp-mapping` line + mappings, and `--derived-data` are not yet supported. - Upload a JVM bundle separately via `sentry debug-files upload --type jvm`. - Supported JVM source file extensions: `.java`, `.kt`, `.scala`, `.sc`, `.groovy`, `.gvy`, `.gy`, `.gsh`, `.clj`, `.cljc` diff --git a/plugins/sentry-cli/skills/sentry-cli/SKILL.md b/plugins/sentry-cli/skills/sentry-cli/SKILL.md index dc3bfc948..e02c87670 100644 --- a/plugins/sentry-cli/skills/sentry-cli/SKILL.md +++ b/plugins/sentry-cli/skills/sentry-cli/SKILL.md @@ -411,6 +411,7 @@ Work with Dart/Flutter symbol maps Work with debug information files - `sentry debug-files check ` — Inspect a debug information file +- `sentry debug-files upload ` — Upload debug information files to Sentry - `sentry debug-files print-sources ` — List the source files a debug file references - `sentry debug-files bundle-sources ` — Bundle a debug file's source files for source context - `sentry debug-files bundle-jvm ` — Create a JVM source bundle for source context diff --git a/plugins/sentry-cli/skills/sentry-cli/references/debug-files.md b/plugins/sentry-cli/skills/sentry-cli/references/debug-files.md index d442fea7d..210a0f19d 100644 --- a/plugins/sentry-cli/skills/sentry-cli/references/debug-files.md +++ b/plugins/sentry-cli/skills/sentry-cli/references/debug-files.md @@ -15,6 +15,22 @@ Work with debug information files Inspect a debug information file +### `sentry debug-files upload ` + +Upload debug information files to Sentry + +**Flags:** +- `-t, --type ... - Only upload files of this type (repeatable): dsym, elf, pe, pdb, portablepdb, wasm, breakpad, sourcebundle, jvm` +- `--id ... - Only upload the object with this debug id (repeatable)` +- `--require-all - Fail if any --id value was not found among scanned files` +- `--no-debug - Do not upload files whose only feature is debug/symbol info` +- `--no-unwind - Do not upload files whose only feature is unwind info` +- `--no-sources - Do not upload files whose only feature is source info` +- `--include-sources - Build and upload a source bundle for each file with debug info` +- `--no-upload - Scan and print what would be uploaded without uploading` +- `--wait - Wait for server-side processing and report any errors` +- `--wait-for - Wait up to this many seconds for server-side processing` + ### `sentry debug-files print-sources ` List the source files a debug file references @@ -59,6 +75,17 @@ sentry debug-files bundle-jvm --output ./out --debug-id --exclude generat # Output as JSON sentry debug-files bundle-jvm --output ./out --debug-id --json ./src + +# Upload debug information files (scans directories recursively) +sentry debug-files upload ./build +sentry debug-files upload ./libexample.so --include-sources + +# Restrict by type or debug id, and wait for server-side processing +sentry debug-files upload ./dsyms --type dsym --wait +sentry debug-files upload ./build --id --require-all + +# Preview what would be uploaded without uploading (no credentials needed) +sentry debug-files upload ./build --no-upload ``` All commands also support `--json`, `--fields`, `--help`, `--log-level`, and `--verbose` flags. diff --git a/src/commands/debug-files/index.ts b/src/commands/debug-files/index.ts index be40a4aa8..d270ef6d2 100644 --- a/src/commands/debug-files/index.ts +++ b/src/commands/debug-files/index.ts @@ -9,10 +9,12 @@ import { bundleJvmCommand } from "./bundle-jvm.js"; import { bundleSourcesCommand } from "./bundle-sources.js"; import { checkCommand } from "./check.js"; import { printSourcesCommand } from "./print-sources.js"; +import { uploadCommand } from "./upload.js"; export const debugFilesRoute = buildRouteMap({ routes: { check: checkCommand, + upload: uploadCommand, "print-sources": printSourcesCommand, "bundle-sources": bundleSourcesCommand, "bundle-jvm": bundleJvmCommand, diff --git a/src/commands/debug-files/upload.ts b/src/commands/debug-files/upload.ts new file mode 100644 index 000000000..2c372b365 --- /dev/null +++ b/src/commands/debug-files/upload.ts @@ -0,0 +1,534 @@ +/** + * sentry debug-files upload ... + * + * Scan files and directories for native debug information files (Mach-O/dSYM, + * ELF, PE/PDB, Portable PDB, WASM, Breakpad, source bundles), filter them, and + * upload them to Sentry via the DIF chunk-upload protocol. + * + * Org/project are resolved via the standard cascade (DSN auto-detection, env + * vars, config defaults), so `--no-upload` (dry-run) needs no credentials. + * + * This is the first stage of `debug-files upload` parity. ZIP scanning, + * `--symbol-maps`, `--il2cpp-mapping` line mappings, and `--derived-data` are + * deferred to follow-up PRs (see the command's full description). + */ + +import { createHash } from "node:crypto"; +import { readFileSync } from "node:fs"; +import { basename } from "node:path"; +import type { SentryContext } from "../../context.js"; +import { + DEBUG_FILES_MAX_WAIT_MS, + type DebugFileUpload, + type DebugFileUploadResult, + uploadDebugFiles, +} from "../../lib/api/debug-files.js"; +import { buildCommand } from "../../lib/command.js"; +import { createSourceBundle } from "../../lib/dif/index.js"; +import { + buildDifFilters, + debugIdMatches, + type PreparedDif, + prepareDifs, + scanPaths, +} from "../../lib/dif/scan.js"; +import { ContextError, ValidationError } from "../../lib/errors.js"; +import { + colorTag, + mdKvTable, + renderMarkdown, +} from "../../lib/formatters/markdown.js"; +import { CommandOutput } from "../../lib/formatters/output.js"; +import { logger } from "../../lib/logger.js"; +import { resolveOrgAndProject } from "../../lib/resolve-target.js"; + +const log = logger.withTag("debug-files.upload"); + +const USAGE_HINT = "sentry debug-files upload ..."; + +// ── Types ─────────────────────────────────────────────────────────── + +/** Per-file entry in the command result. */ +type UploadedFileSummary = { + /** Name stamped on the DIF. */ + name: string; + /** Advisory debug id, if known. */ + debugId?: string; + /** Assembly state after upload (omitted for `--no-upload`). */ + state?: DebugFileUploadResult["state"]; + /** Server detail/error, if any. */ + detail?: string | null; +}; + +/** Structured result for the debug-files upload command. */ +type DebugFilesUploadResult = { + /** Organization slug. Omitted when `--no-upload` short-circuits. */ + org?: string; + /** Project slug. Omitted when `--no-upload` short-circuits. */ + project?: string; + /** Whether files were actually uploaded (false for `--no-upload`). */ + uploaded: boolean; + /** Per-file results. */ + files: UploadedFileSummary[]; + /** Number of files uploaded (0 for `--no-upload`). */ + filesUploaded: number; +}; + +/** Flags accepted by the upload command. */ +type UploadFlags = { + type?: string[]; + id?: string[]; + "require-all"?: boolean; + "no-debug"?: boolean; + "no-unwind"?: boolean; + "no-sources"?: boolean; + "include-sources"?: boolean; + "no-upload"?: boolean; + wait?: boolean; + "wait-for"?: number; +}; + +// ── Formatter ─────────────────────────────────────────────────────── + +/** Human-readable label for an empty-state cell. */ +function noneCell(): string { + return colorTag("muted", "none"); +} + +/** Format human-readable output for the upload result. */ +function formatUploadResult(data: DebugFilesUploadResult): string { + const rows: [string, string][] = []; + if (data.org) { + rows.push(["Organization", data.org]); + } + if (data.project) { + rows.push(["Project", data.project]); + } + rows.push([ + data.uploaded ? "Files uploaded" : "Files found", + String(data.files.length), + ]); + for (const file of data.files) { + const id = file.debugId ?? noneCell(); + const suffix = file.state ? ` [${file.state}]` : ""; + rows.push(["DIF", `${id} ${file.name}${suffix}`]); + } + return renderMarkdown(mdKvTable(rows)); +} + +// ── Helpers ───────────────────────────────────────────────────────── + +/** Build a stable dedupe key from a file's debug id and content hash. */ +function difKey(dif: DebugFileUpload): string { + const hash = createHash("sha1").update(dif.content).digest("hex"); + return `${dif.debugId ?? ""}:${hash}`; +} + +/** + * Convert prepared files into the DIF upload list, optionally appending a + * source bundle per file when `--include-sources` is set. + * + * Source files are read synchronously from the paths recorded in each object's + * debug info; files not present locally are skipped. A bundle is only added + * when it contains at least one source file. + */ +function buildDifList( + prepared: PreparedDif[], + includeSources: boolean +): DebugFileUpload[] { + const difs: DebugFileUpload[] = []; + for (const file of prepared) { + difs.push({ + name: basename(file.path), + debugId: file.debugId, + content: file.content, + }); + + if (!includeSources) { + continue; + } + + let result: ReturnType; + try { + result = createSourceBundle( + new Uint8Array(file.content), + basename(file.path), + (sourcePath) => { + try { + return readFileSync(sourcePath); + } catch (err) { + log.debug( + `Source file not available, skipping: ${sourcePath}`, + err + ); + return null; + } + } + ); + } catch (err) { + log.debug(`Could not build source bundle for ${file.path}`, err); + continue; + } + + if (result.bundle && result.fileCount > 0) { + // Stamp the bundle with the filter-passing primary object's debug id so + // it matches the main DIF's advisory id, even when filters dropped the + // slice `createSourceBundle` would otherwise have picked. + const bundleDebugId = file.debugId ?? result.debugId ?? undefined; + difs.push({ + name: `${bundleDebugId ?? basename(file.path)}.src.zip`, + debugId: bundleDebugId, + content: Buffer.from(result.bundle), + }); + } + } + return difs; +} + +/** Deduplicate DIFs by debug id + content hash, keeping the first occurrence. */ +function dedupeDifs(difs: DebugFileUpload[]): DebugFileUpload[] { + const seen = new Set(); + const unique: DebugFileUpload[] = []; + for (const dif of difs) { + const key = difKey(dif); + if (seen.has(key)) { + continue; + } + seen.add(key); + unique.push(dif); + } + return unique; +} + +/** + * Determine which explicitly requested `--id` values were not found among the + * uploaded files. Returns an empty array when `--id` was not used. + */ +function missingRequestedIds( + requestedIds: string[] | undefined, + prepared: PreparedDif[] +): string[] { + if (!requestedIds || requestedIds.length === 0) { + return []; + } + const foundIds = prepared.flatMap((p) => p.objects.map((o) => o.debugId)); + return requestedIds.filter( + (requested) => !foundIds.some((found) => debugIdMatches(requested, found)) + ); +} + +/** + * Resolve the wait mode and deadline from `--wait` / `--wait-for`. + * + * @throws {ValidationError} If both flags are set, or `--wait-for` is invalid. + */ +function resolveWaitMode(flags: UploadFlags): { + wait: boolean; + maxWaitMs: number; +} { + const waitFor = flags["wait-for"]; + if (flags.wait && waitFor !== undefined) { + throw new ValidationError( + "--wait and --wait-for cannot be combined", + "wait" + ); + } + if (waitFor !== undefined) { + if (!Number.isFinite(waitFor) || waitFor <= 0) { + throw new ValidationError( + "--wait-for must be a positive number of seconds", + "wait-for" + ); + } + return { wait: true, maxWaitMs: Math.round(waitFor * 1000) }; + } + return { wait: Boolean(flags.wait), maxWaitMs: DEBUG_FILES_MAX_WAIT_MS }; +} + +// ── Command ───────────────────────────────────────────────────────── + +/** + * Yield a no-upload (dry-run) result and return the appropriate hint. + * Handles --require-all exit-code logic. + */ +function* doDryRun( + setExitCode: (code: number) => void, + difs: DebugFileUpload[], + missingIds: string[], + requireAll: boolean +) { + yield new CommandOutput({ + uploaded: false, + files: difs.map((d) => ({ name: d.name, debugId: d.debugId })), + filesUploaded: 0, + }); + if (missingIds.length > 0 && requireAll) { + setExitCode(1); + return { hint: `Missing requested debug id(s): ${missingIds.join(", ")}` }; + } + return { + hint: + difs.length === 0 + ? `No debug information files found. Try: ${USAGE_HINT}` + : `Would upload ${difs.length} debug file(s). Remove --no-upload to upload.`, + }; +} + +/** + * Report that nothing was found to upload (no auth needed). + * Honors `--require-all`: exit 1 only when requested ids are missing. + */ +function* doNothingToUpload( + setExitCode: (code: number) => void, + missingIds: string[], + requireAll: boolean +) { + log.warn("No debug information files found."); + yield new CommandOutput({ + uploaded: false, + files: [], + filesUploaded: 0, + }); + if (missingIds.length > 0 && requireAll) { + setExitCode(1); + return { hint: `Missing requested debug id(s): ${missingIds.join(", ")}` }; + } + return { hint: `No debug information files found. Try: ${USAGE_HINT}` }; +} + +/** + * Perform the upload, yield the result, and return a hint. Non-terminal states + * (error, not_found) set the exit code and return a descriptive hint. + * Also honors `--require-all` against the requested `--id` values. + */ +async function* doUpload( + setExitCode: (code: number) => void, + params: { + org: string; + project: string; + difs: DebugFileUpload[]; + wait: boolean; + maxWaitMs: number; + missingRequestedIds: string[]; + requireAll: boolean; + } +) { + const results = await uploadDebugFiles(params); + + yield new CommandOutput({ + org: params.org, + project: params.project, + uploaded: true, + files: results.map((r) => ({ + name: r.name, + debugId: r.debugId, + state: r.state, + detail: r.detail, + })), + filesUploaded: results.length, + }); + + const failures = results.filter( + (r) => r.state === "error" || r.state === "not_found" + ); + if (failures.length > 0) { + setExitCode(1); + const details = failures + .map( + (r) => + `${r.debugId ?? r.name}: ${r.state}${r.detail ? ` (${r.detail})` : ""}` + ) + .join("; "); + return { + hint: `${failures.length === 1 ? "1 file" : `${failures.length} files`} had failures: ${details}`, + }; + } + + if (params.missingRequestedIds.length > 0 && params.requireAll) { + setExitCode(1); + return { + hint: `Missing requested debug id(s): ${params.missingRequestedIds.join(", ")}`, + }; + } + + return { + hint: `Uploaded ${results.length} debug file(s) to ${params.org}/${params.project}`, + }; +} + +export const uploadCommand = buildCommand({ + // Auth is not required for --no-upload (dry-run). The upload path calls + // resolveOrgAndProject which triggers auth resolution. + auth: false, + docs: { + brief: "Upload debug information files to Sentry", + fullDescription: + "Scan files and directories for native debug information files and " + + "upload them to Sentry using the chunk-upload protocol. Supports " + + "Mach-O/dSYM, ELF, PE/PDB, Portable PDB, WebAssembly, Breakpad, and " + + "source bundles. Directories are scanned recursively.\n\n" + + "Org/project are auto-detected from DSN, env vars, or config defaults.\n\n" + + "Filters:\n" + + " --type Only upload files of the given type (repeatable):\n" + + " dsym, elf, pe, pdb, portablepdb, wasm, breakpad,\n" + + " sourcebundle, jvm\n" + + " --id Only upload the object with the given debug id (repeatable)\n" + + " --no-debug / --no-unwind / --no-sources Drop files whose only\n" + + " useful feature is the named one\n\n" + + "Usage:\n" + + " sentry debug-files upload ./build\n" + + " sentry debug-files upload ./libexample.so --include-sources\n" + + " sentry debug-files upload ./dsyms --type dsym --wait\n" + + " sentry debug-files upload ./build --no-upload\n\n" + + "Not yet supported (planned): scanning inside ZIP archives, " + + "--symbol-maps (BCSymbolMap resolution), --il2cpp-mapping line " + + "mappings, and --derived-data.", + }, + output: { + human: formatUploadResult, + }, + parameters: { + positional: { + kind: "array", + parameter: { + brief: "Files or directories to scan for debug information files", + parse: String, + placeholder: "path", + }, + }, + flags: { + type: { + kind: "parsed", + parse: String, + brief: + "Only upload files of this type (repeatable): dsym, elf, pe, pdb, " + + "portablepdb, wasm, breakpad, sourcebundle, jvm", + optional: true, + variadic: true, + }, + id: { + kind: "parsed", + parse: String, + brief: "Only upload the object with this debug id (repeatable)", + optional: true, + variadic: true, + }, + "require-all": { + kind: "boolean", + brief: "Fail if any --id value was not found among scanned files", + optional: true, + default: false, + }, + "no-debug": { + kind: "boolean", + brief: "Do not upload files whose only feature is debug/symbol info", + optional: true, + default: false, + }, + "no-unwind": { + kind: "boolean", + brief: "Do not upload files whose only feature is unwind info", + optional: true, + default: false, + }, + "no-sources": { + kind: "boolean", + brief: "Do not upload files whose only feature is source info", + optional: true, + default: false, + }, + "include-sources": { + kind: "boolean", + brief: "Build and upload a source bundle for each file with debug info", + optional: true, + default: false, + }, + "no-upload": { + kind: "boolean", + brief: "Scan and print what would be uploaded without uploading", + optional: true, + default: false, + }, + wait: { + kind: "boolean", + brief: "Wait for server-side processing and report any errors", + optional: true, + default: false, + }, + "wait-for": { + kind: "parsed", + parse: Number, + brief: "Wait up to this many seconds for server-side processing", + optional: true, + }, + }, + aliases: { + t: "type", + }, + }, + async *func(this: SentryContext, flags: UploadFlags, ...paths: string[]) { + if (paths.length === 0) { + throw new ContextError("Debug file path(s)", USAGE_HINT, []); + } + const { wait, maxWaitMs } = resolveWaitMode(flags); + + const filters = buildDifFilters({ + types: flags.type, + ids: flags.id, + noDebug: flags["no-debug"], + noUnwind: flags["no-unwind"], + noSources: flags["no-sources"], + }); + const files = await scanPaths(paths); + const prepared = await prepareDifs(files, filters); + const difs = dedupeDifs( + buildDifList(prepared, Boolean(flags["include-sources"])) + ); + const missingIds = missingRequestedIds(flags.id, prepared); + const requireAll = Boolean(flags["require-all"]); + + if (flags["no-upload"]) { + return yield* doDryRun( + (c) => { + this.process.exitCode = c; + }, + difs, + missingIds, + requireAll + ); + } + + if (difs.length === 0) { + return yield* doNothingToUpload( + (c) => { + this.process.exitCode = c; + }, + missingIds, + requireAll + ); + } + + const resolved = await resolveOrgAndProject({ + cwd: this.cwd, + usageHint: USAGE_HINT, + }); + if (!resolved) { + throw new ContextError("Organization and project", USAGE_HINT); + } + + return yield* doUpload( + (c) => { + this.process.exitCode = c; + }, + { + org: resolved.org, + project: resolved.project, + difs, + wait, + maxWaitMs, + missingRequestedIds: missingIds, + requireAll, + } + ); + }, +}); diff --git a/src/lib/api/debug-files.ts b/src/lib/api/debug-files.ts new file mode 100644 index 000000000..f4ae2d9e8 --- /dev/null +++ b/src/lib/api/debug-files.ts @@ -0,0 +1,377 @@ +/** + * Debug Information File (DIF) Upload API + * + * Uploads native debug information files (Mach-O/dSYM, ELF, PE/PDB, Portable + * PDB, WASM, Breakpad, source bundles) via the DIF chunk-upload + assemble + * protocol shared with ProGuard and Dart symbol-map uploads. + * + * Protocol: each file's raw bytes are chunked directly (no ZIP wrapping) and + * assembled through `projects/{org}/{project}/files/difs/assemble/`. The body + * keys each file by its overall SHA-1 checksum, with `name`, optional + * `debug_id`, and the per-chunk checksum list. Multiple files are batched into + * a single assemble request, like ProGuard. + * + * Two completion modes (see {@link uploadDebugFiles}): + * - **no-wait** (default): stop once the server holds every chunk of every + * file. Server-side processing errors are not surfaced. + * - **wait**: poll until every file reaches a terminal state (`ok`/`error`), + * collecting `error` details so the caller can report and exit non-zero. + */ + +import { z } from "zod"; +import { ApiError } from "../errors.js"; +import { logger } from "../logger.js"; +import { resolveOrgRegion } from "../region.js"; +import { + ASSEMBLE_MAX_WAIT_MS, + ASSEMBLE_POLL_INTERVAL_MS, + type AssembleResponse, + AssembleResponseSchema, + type ChunkInfo, + type ChunkServerOptions, + getChunkUploadOptions, + hashBuffer, + pickUploadEncoding, + type UploadEncoding, + uploadMissingBufferChunks, +} from "./chunk-upload.js"; +import { apiRequestToRegion } from "./infrastructure.js"; + +const log = logger.withTag("api.debug-files"); + +// ── Types ─────────────────────────────────────────────────────────── + +/** A single debug information file to upload. */ +export type DebugFileUpload = { + /** Name stamped on the assembled DIF (typically the input file's basename). */ + name: string; + /** + * Advisory debug id for the file's primary object. The server re-parses the + * uploaded bytes and indexes every contained slice itself, so this is + * `skip_serializing_if none` in the wire format — omitted when absent. + */ + debugId?: string; + /** Pre-read raw file content (chunked as-is). */ + content: Buffer; +}; + +/** Options for {@link uploadDebugFiles}. */ +export type DebugFilesUploadOptions = { + /** Organization slug. */ + org: string; + /** Project slug. */ + project: string; + /** Debug information files to upload. */ + difs: DebugFileUpload[]; + /** + * When `true`, poll until every file reaches a terminal state (`ok`/`error`) + * and surface processing errors. When `false`, return as soon as the server + * holds every chunk (no server-side processing wait). + */ + wait: boolean; + /** Maximum time to wait for assembly/processing, in milliseconds. */ + maxWaitMs: number; +}; + +/** Per-file result of a debug information file upload. */ +export type DebugFileUploadResult = { + /** Name stamped on the assembled DIF. */ + name: string; + /** Advisory debug id, if one was provided. */ + debugId?: string; + /** Overall SHA-1 checksum of the uploaded file. */ + checksum: string; + /** + * Terminal (or last-observed) assembly state. In no-wait mode this is often + * `created`/`assembling` because processing has not finished. + */ + state: AssembleResponse["state"]; + /** Server-provided detail/error message, if any. */ + detail: string | null; +}; + +// ── Schemas ───────────────────────────────────────────────────────── + +/** + * DIF assemble response — keyed by overall checksum, each value has the same + * shape as the standard assemble response. + */ +const DifAssembleResponseSchema = z.record(z.string(), AssembleResponseSchema); + +type DifAssembleResponse = z.infer; + +// ── Internal types ────────────────────────────────────────────────── + +/** Per-file chunk metadata computed before the assemble request. */ +type ChunkedDif = { + /** The file being uploaded. */ + dif: DebugFileUpload; + /** Per-chunk SHA-1 checksums and offsets. */ + chunks: ChunkInfo[]; + /** SHA-1 of the entire file. */ + overallChecksum: string; +}; + +/** Outcome of evaluating an assemble response against the desired stop mode. */ +type AssembleEvaluation = { + /** + * Whether the upload is complete for the active mode: in wait mode every + * file is terminal (`ok`/`error`); in no-wait mode the server holds every + * chunk of every file. + */ + done: boolean; + /** SHA-1 checksums the server still needs uploaded. */ + missingChecksums: Set; +}; + +// ── Helpers ───────────────────────────────────────────────────────── + +/** + * Build the assemble request body — one entry per file, keyed by checksum. + * + * `debug_id` is only included when present (matches the legacy + * `skip_serializing_if none` behavior). + */ +function buildAssembleBody( + chunked: ChunkedDif[] +): Record { + const body: Record< + string, + { name: string; debug_id?: string; chunks: string[] } + > = {}; + for (const cd of chunked) { + body[cd.overallChecksum] = { + name: cd.dif.name, + ...(cd.dif.debugId ? { debug_id: cd.dif.debugId } : {}), + chunks: cd.chunks.map((c) => c.sha1), + }; + } + return body; +} + +/** + * Evaluate an assemble response for completion and outstanding chunks. + * + * A file's chunks are "held" by the server once its entry exists, is not in the + * `not_found` state, and reports no missing chunks. A file is "terminal" once + * its state is `ok` or `error`. Unlike ProGuard/Dart uploads, an `error` entry + * is NOT thrown here — it is a terminal state collected for reporting so the + * caller can decide the exit code. + * + * When a file's entry is missing from the response, all of that file's chunks + * are treated as missing — the server has no record of receiving them, so we + * must (re-)send. This guards against an upload loop stalling on a phantom + * "missing nothing" file that the server has never seen. + * + * @param response - The assemble response keyed by checksum. + * @param chunked - The per-file chunk metadata, in input order. + * @param wait - When `true`, completion requires every file to be terminal. + */ +function evaluateAssembly( + response: DifAssembleResponse, + chunked: ChunkedDif[], + wait: boolean +): AssembleEvaluation { + const missingChecksums = new Set(); + let allHeld = true; + let allTerminal = true; + + for (const cd of chunked) { + const result = classifyEntry(response[cd.overallChecksum], cd); + if (!result.held) { + allHeld = false; + } + if (!result.terminal) { + allTerminal = false; + } + for (const sha1 of result.missing) { + missingChecksums.add(sha1); + } + } + + return { done: wait ? allTerminal : allHeld, missingChecksums }; +} + +/** + * Classify a single file's assemble response. + * + * Returns whether the server holds every chunk (`held`), whether the file is + * in a terminal state (`terminal`), and the chunk checksums that still need + * to be uploaded (`missing`). A missing entry is treated as "no chunks held" + * — the server has no record of the file, so every chunk must be (re-)sent. + */ +function classifyEntry( + entry: AssembleResponse | undefined, + cd: ChunkedDif +): { held: boolean; terminal: boolean; missing: string[] } { + if (!entry) { + return { + held: false, + terminal: false, + missing: cd.chunks.map((c) => c.sha1), + }; + } + + if (entry.state === "not_found") { + // Server has no record of this file. Re-send every chunk rather than + // trusting the (possibly absent) `missingChunks` field — an empty list + // here would otherwise leave the upload loop polling forever. + return { + held: false, + terminal: false, + missing: cd.chunks.map((c) => c.sha1), + }; + } + + const missing = entry.missingChunks ?? []; + if (missing.length > 0) { + return { held: false, terminal: false, missing }; + } + + // Entry exists, server holds every chunk. `created`/`assembling` are + // held-but-not-terminal; `ok`/`error` are terminal. + const terminal = entry.state === "ok" || entry.state === "error"; + return { held: true, terminal, missing: [] }; +} + +/** Upload any chunks the server reported missing, across all files. */ +async function uploadMissing(params: { + chunked: ChunkedDif[]; + missingChecksums: Set; + serverOptions: ChunkServerOptions; + encoding: UploadEncoding | undefined; + regionUrl: string; +}): Promise { + const { chunked, missingChecksums, serverOptions, encoding, regionUrl } = + params; + if (missingChecksums.size === 0) { + return; + } + for (const cd of chunked) { + await uploadMissingBufferChunks({ + chunks: cd.chunks, + missingChecksums, + content: cd.dif.content, + serverOptions, + encoding, + regionUrl, + }); + } +} + +/** POST the assemble request and parse the keyed response. */ +async function postAssemble( + regionUrl: string, + endpoint: string, + body: unknown +): Promise { + const { data } = await apiRequestToRegion( + regionUrl, + endpoint, + { method: "POST", body, schema: DifAssembleResponseSchema } + ); + return data; +} + +/** Build per-file results from the last-observed assemble response. */ +function buildResults( + chunked: ChunkedDif[], + response: DifAssembleResponse +): DebugFileUploadResult[] { + return chunked.map((cd) => { + const entry = response[cd.overallChecksum]; + return { + name: cd.dif.name, + debugId: cd.dif.debugId, + checksum: cd.overallChecksum, + state: entry?.state ?? "not_found", + detail: entry?.detail ?? null, + }; + }); +} + +// ── API Functions ─────────────────────────────────────────────────── + +/** + * Upload debug information files to Sentry via the DIF chunk-upload protocol. + * + * Each file's raw bytes are chunked directly (no ZIP wrapping) and all files + * are batched into a single assemble request keyed by overall SHA-1 checksum. + * The server re-parses each uploaded file and indexes every contained object + * slice itself; `debugId` is advisory. + * + * @param options - Upload configuration (org, project, files, wait mode). + * @returns Per-file terminal/last-observed assembly state. + * @throws {ApiError} If chunk upload fails, or (wait mode only) assembly does + * not complete within `maxWaitMs`. + */ +export async function uploadDebugFiles( + options: DebugFilesUploadOptions +): Promise { + const { org, project, difs, wait, maxWaitMs } = options; + + if (difs.length === 0) { + return []; + } + + const serverOptions = await getChunkUploadOptions(org); + const encoding = pickUploadEncoding(serverOptions.compression); + + const chunked: ChunkedDif[] = difs.map((dif) => { + const { chunks, overallChecksum } = hashBuffer( + dif.content, + serverOptions.chunkSize + ); + return { dif, chunks, overallChecksum }; + }); + + const regionUrl = await resolveOrgRegion(org); + const endpoint = `projects/${org}/${project}/files/difs/assemble/`; + const body = buildAssembleBody(chunked); + + const deadline = Date.now() + maxWaitMs; + let response = await postAssemble(regionUrl, endpoint, body); + let evaluation = evaluateAssembly(response, chunked, wait); + await uploadMissing({ + chunked, + missingChecksums: evaluation.missingChecksums, + serverOptions, + encoding, + regionUrl, + }); + + while (!evaluation.done) { + if (Date.now() >= deadline) { + if (wait) { + throw new ApiError( + "Debug file assembly timed out", + 408, + `Assembly did not complete within ${Math.round(maxWaitMs / 1000)}s`, + endpoint + ); + } + // No-wait mode: the server kept reporting missing chunks past the + // deadline. Stop and report the last-observed state rather than hang. + log.warn( + "Chunk delivery did not settle before the deadline — some files may not have been fully uploaded" + ); + break; + } + + await new Promise((r) => setTimeout(r, ASSEMBLE_POLL_INTERVAL_MS)); + response = await postAssemble(regionUrl, endpoint, body); + evaluation = evaluateAssembly(response, chunked, wait); + await uploadMissing({ + chunked, + missingChecksums: evaluation.missingChecksums, + serverOptions, + encoding, + regionUrl, + }); + } + + return buildResults(chunked, response); +} + +/** Default maximum wait for server-side DIF processing (`--wait`). */ +export const DEBUG_FILES_MAX_WAIT_MS = ASSEMBLE_MAX_WAIT_MS; diff --git a/src/lib/dif/scan.ts b/src/lib/dif/scan.ts new file mode 100644 index 000000000..d7d9354f0 --- /dev/null +++ b/src/lib/dif/scan.ts @@ -0,0 +1,424 @@ +/** + * Debug information file scanning and filtering. + * + * Walks files and directories for debug information files, parses each via the + * bundled `symbolic` WASM module, and applies the `debug-files upload` filter + * rules (`--type`, `--id`, `--no-debug`/`--no-unwind`/`--no-sources`). The pure + * filter predicates ({@link objectPassesFilters}, {@link normalizeDebugId}) are + * separated from the I/O ({@link scanPaths}, {@link prepareDifs}) so the matching + * logic can be property-tested without touching the filesystem. + * + * Type matching maps the user-facing `--type` value to the canonical object + * file format reported by the parser (e.g. `dsym`/`macho` → `macho`, + * `jvm` → `sourcebundle`). This is format-level matching: `--type dsym` and + * `--type macho` are equivalent here, and the feature filters narrow further. + */ + +import { open, readdir, readFile, realpath, stat } from "node:fs/promises"; +import { join } from "node:path"; +import { ValidationError } from "../errors.js"; +import { logger } from "../logger.js"; +import { + type DifArchiveInfo, + type DifObjectInfo, + parseDebugFile, + peekFormat, + selectBundledObject, +} from "./index.js"; + +const log = logger.withTag("dif.scan"); + +/** + * Nil debug id (hyphenated UUID form). An object whose debug id starts with + * this carries no real identifier and cannot be symbolicated, so it is never + * uploadable. PE/PDB ids may append an `-` suffix, hence prefix matching. + */ +const NIL_DEBUG_ID_PREFIX = "00000000-0000-0000-0000-000000000000"; + +/** + * Map of accepted `--type` values to the canonical object file format the + * parser reports. Multiple type aliases can map to the same format. + */ +const TYPE_TO_FORMAT: Readonly> = { + dsym: "macho", + macho: "macho", + elf: "elf", + pe: "pe", + pdb: "pdb", + portablepdb: "portablepdb", + wasm: "wasm", + breakpad: "breakpad", + sourcebundle: "sourcebundle", + jvm: "sourcebundle", +}; + +/** Accepted `--type` filter values. */ +export const VALID_DIF_TYPES: readonly string[] = Object.keys(TYPE_TO_FORMAT); + +/** Resolved filter set applied to each parsed object. */ +export type DifFilters = { + /** Accepted file formats (canonical names), or `undefined` for any. */ + formats?: Set; + /** Accepted debug ids (normalized), or `undefined` for any. */ + ids?: Set; + /** Whether a symbol table satisfies the feature requirement. */ + symtab: boolean; + /** Whether debug info satisfies the feature requirement. */ + debug: boolean; + /** Whether unwind info satisfies the feature requirement. */ + unwind: boolean; + /** Whether embedded/referenced sources satisfy the feature requirement. */ + sources: boolean; +}; + +/** Flag inputs used to build a {@link DifFilters}. */ +export type DifFilterOptions = { + /** Repeatable `--type` values (e.g. `dsym`, `elf`). */ + types?: string[]; + /** Repeatable `--id` values (debug ids). */ + ids?: string[]; + /** `--no-debug`: drop both debug and symbol-table features. */ + noDebug?: boolean; + /** `--no-unwind`: drop unwind features. */ + noUnwind?: boolean; + /** `--no-sources`: drop source features. */ + noSources?: boolean; +}; + +/** A debug information file that passed filtering, ready to upload. */ +export type PreparedDif = { + /** Filesystem path the file was read from. */ + path: string; + /** Raw file content. */ + content: Buffer; + /** + * Advisory debug id of the primary (first debug-info) object among the + * filter-matched set. This is the same id used as the source bundle's + * debug id under `--include-sources`, so the bundle's slice matches the + * slice whose metadata the main DIF advertises. + */ + debugId?: string; + /** The objects within the file that passed the filters (for reporting). */ + objects: DifObjectInfo[]; +}; + +/** + * Normalize a debug id for comparison: trim, lowercase, and strip braces. + * + * @param id - A debug id, possibly brace-wrapped or mixed-case. + * @returns The normalized form. + */ +export function normalizeDebugId(id: string): string { + return id.trim().toLowerCase().replace(/[{}]/g, ""); +} + +/** + * Reduce a normalized debug id to its base UUID, dropping any age/appendix + * suffix. A standard UUID has 5 hyphen-separated groups; PE/PDB ids append a + * 6th group (`-`). + */ +function baseDebugId(normalized: string): string { + const parts = normalized.split("-"); + return parts.length > 5 ? parts.slice(0, 5).join("-") : normalized; +} + +/** Whether an object carries a real (non-nil) debug id. */ +function hasValidDebugId(obj: DifObjectInfo): boolean { + const id = normalizeDebugId(obj.debugId); + return id.length > 0 && !id.startsWith(NIL_DEBUG_ID_PREFIX); +} + +/** Whether the object's format is one of the requested types (or no filter). */ +function formatMatches( + objFormat: string, + formats: Set | undefined +): boolean { + return !formats || formats.size === 0 || formats.has(objFormat); +} + +/** + * Whether two debug ids refer to the same object, ignoring case, braces, and + * any PE/PDB age suffix (so a base UUID matches its aged form and vice versa). + * + * @param a - First debug id (any casing/form). + * @param b - Second debug id (any casing/form). + */ +export function debugIdMatches(a: string, b: string): boolean { + const normA = normalizeDebugId(a); + const normB = normalizeDebugId(b); + return normA === normB || baseDebugId(normA) === baseDebugId(normB); +} + +/** Whether the object's debug id matches a requested id (or no filter). */ +function idMatches(objDebugId: string, ids: Set | undefined): boolean { + if (!ids || ids.size === 0) { + return true; + } + for (const wanted of ids) { + if (debugIdMatches(objDebugId, wanted)) { + return true; + } + } + return false; +} + +/** Whether the object carries at least one of the wanted features. */ +function featureMatches(obj: DifObjectInfo, filters: DifFilters): boolean { + return ( + (filters.debug && obj.hasDebugInfo) || + (filters.symtab && obj.hasSymbols) || + (filters.unwind && obj.hasUnwindInfo) || + (filters.sources && obj.hasSources) + ); +} + +/** + * Build a {@link DifFilters} from command flags. + * + * `--no-debug` drops both the debug and symbol-table features (matching the + * legacy `sentry-cli` semantics). At least one feature always remains wanted + * unless every `--no-*` flag is set, in which case nothing would match. + * + * @param options - Raw flag inputs. + * @returns The resolved filter set. + * @throws {ValidationError} If a `--type` value is not recognized. + */ +export function buildDifFilters(options: DifFilterOptions): DifFilters { + let formats: Set | undefined; + if (options.types && options.types.length > 0) { + formats = new Set(); + for (const raw of options.types) { + const type = raw.trim().toLowerCase(); + const format = TYPE_TO_FORMAT[type]; + if (!format) { + throw new ValidationError( + `Unknown debug file type '${raw}'. Valid types: ${VALID_DIF_TYPES.join(", ")}`, + "type" + ); + } + formats.add(format); + } + } + + let ids: Set | undefined; + if (options.ids && options.ids.length > 0) { + ids = new Set(options.ids.map((id) => normalizeDebugId(id))); + } + + return { + formats, + ids, + symtab: !options.noDebug, + debug: !options.noDebug, + unwind: !options.noUnwind, + sources: !options.noSources, + }; +} + +/** + * Whether a parsed object passes all active filters. + * + * An object is included when it (1) has a real debug id, (2) matches the + * `--type` filter, (3) matches the `--id` filter, and (4) carries at least one + * wanted feature. Pure and side-effect free. + * + * @param obj - The parsed object metadata. + * @param filters - The resolved filter set. + */ +export function objectPassesFilters( + obj: DifObjectInfo, + filters: DifFilters +): boolean { + return ( + hasValidDebugId(obj) && + formatMatches(obj.fileFormat, filters.formats) && + idMatches(obj.debugId, filters.ids) && + featureMatches(obj, filters) + ); +} + +/** + * Recursively collect every regular file under a path, with cycle-safe + * symlink-following via visited realpath tracking. + */ +async function collectFiles( + path: string, + out: string[], + visited: Set +): Promise { + let info: Awaited>; + try { + info = await stat(path); + } catch (err) { + log.debug(`Skipping unreadable path: ${path}`, err); + return; + } + + if (info.isDirectory()) { + let real: string; + try { + real = await realpath(path); + } catch { + log.debug(`Could not resolve real path for directory: ${path}`); + return; + } + if (visited.has(real)) { + log.debug(`Skipping already-visited directory (symlink cycle?): ${path}`); + return; + } + visited.add(real); + + let entries: string[]; + try { + entries = await readdir(path); + } catch (err) { + log.debug(`Skipping unreadable directory: ${path}`, err); + return; + } + for (const entry of entries) { + await collectFiles(join(path, entry), out, visited); + } + return; + } + + if (info.isFile()) { + out.push(path); + } +} + +/** + * Recursively scan paths for candidate files. + * + * Each argument may be a file (kept as-is) or a directory (walked + * recursively). Unreadable entries are skipped with a debug log. Symlinks + * are followed; cycle-safe via visited-realpath tracking. A path that does + * not exist at all throws {@link ValidationError} (unlike a walk failure + * inside a tree, which is silently skipped). + * + * @param paths - Files and/or directories to scan. + * @returns Absolute-or-relative file paths in scan order. + * @throws {ValidationError} If an explicit path does not exist or cannot be + * accessed. Walk-internal failures (entries deep inside a tree) are still + * silently skipped. + */ +export async function scanPaths(paths: string[]): Promise { + const files: string[] = []; + const visited = new Set(); + for (const path of paths) { + try { + await stat(path); + } catch (err) { + const code = (err as NodeJS.ErrnoException).code; + if (code === "ENOENT") { + throw new ValidationError(`Path '${path}' does not exist.`, "path"); + } + if (code === "EACCES" || code === "EPERM") { + throw new ValidationError( + `Path '${path}' is not readable: ${code}.`, + "path" + ); + } + throw new ValidationError( + `Cannot access path '${path}': ${code ?? "unknown error"}.`, + "path" + ); + } + await collectFiles(path, files, visited); + } + return files; +} + +const PEEK_HEADER_BYTES = 4096; + +/** + * Peek at a file's header bytes for format detection — avoids reading the + * whole file for large non-DIF data. Returns `null` when the file is + * unreadable, empty, or in an unrecognised format. + */ +async function peekHeader(path: string): Promise { + try { + const fd = await open(path, "r"); + try { + const buf = Buffer.alloc(PEEK_HEADER_BYTES); + const { bytesRead } = await fd.read(buf, 0, PEEK_HEADER_BYTES, 0); + const header = + bytesRead < PEEK_HEADER_BYTES + ? new Uint8Array(buf.subarray(0, bytesRead)) + : new Uint8Array(buf); + if (header.length === 0 || peekFormat(header) === "unknown") { + return null; + } + return header; + } finally { + await fd.close(); + } + } catch (err) { + log.debug(`Skipping unreadable file: ${path}`, err); + return null; + } +} + +/** + * Read, parse, and filter candidate files into uploadable debug files. + * + * Each candidate is first cheaply inspected via a header-sized read + + * {@link peekFormat}. Only files whose format is recognised are then fully + * read and parsed — large non-DIF files (videos, archives) cost only the + * header I/O and are never fully materialised. + * + * After a full read and parse, the file is kept only if at least one + * contained object passes {@link objectPassesFilters}. Read/parse failures + * are skipped with a debug log — a scanned tree contains many non-object files. + * + * @param paths - Candidate file paths (from {@link scanPaths}). + * @param filters - The resolved filter set. + * @returns The files to upload, each with its matched objects and primary id. + */ +export async function prepareDifs( + paths: string[], + filters: DifFilters +): Promise { + const prepared: PreparedDif[] = []; + + for (const path of paths) { + if (!(await peekHeader(path))) { + continue; + } + + let content: Buffer; + try { + content = await readFile(path); + } catch (err) { + log.debug(`Skipping unreadable file: ${path}`, err); + continue; + } + if (content.length === 0) { + continue; + } + + const data = new Uint8Array(content); + let archive: DifArchiveInfo; + try { + archive = parseDebugFile(data); + } catch (err) { + log.debug(`Skipping unparseable file: ${path}`, err); + continue; + } + + const matched = archive.objects.filter((obj) => + objectPassesFilters(obj, filters) + ); + if (matched.length === 0) { + continue; + } + + prepared.push({ + path, + content, + debugId: selectBundledObject(matched)?.debugId, + objects: matched, + }); + } + + return prepared; +} diff --git a/test/commands/debug-files/upload.test.ts b/test/commands/debug-files/upload.test.ts new file mode 100644 index 000000000..3d7b76c9e --- /dev/null +++ b/test/commands/debug-files/upload.test.ts @@ -0,0 +1,372 @@ +/** + * Tests for `sentry debug-files upload`. + * + * Uses Breakpad symbol files (a deterministic, portable text format) as + * fixtures so the tests need no committed binaries. Filter and dry-run paths + * run end-to-end through the scanner; the network upload is stubbed by spying + * on `uploadDebugFiles`. + */ + +import { mkdtemp, rm, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { run } from "@stricli/core"; +import { afterEach, beforeEach, describe, expect, test, vi } from "vitest"; +import { app } from "../../../src/app.js"; +import type { SentryContext } from "../../../src/context.js"; +// biome-ignore lint/performance/noNamespaceImport: needed for spyOn mocking +import * as debugFilesApi from "../../../src/lib/api/debug-files.js"; +import { useTestConfigDir } from "../../helpers.js"; + +useTestConfigDir("debug-files-upload-"); + +const BREAKPAD_FIXTURE = [ + "MODULE Linux x86_64 0F13A5DA412AFBF7C8662048F3294F3D0 example", + "INFO CODE_ID DAA5130F2A41F7FBC8662048F3294F3D439CA7FF", + "FUNC 1000 10 0 main", + "1000 10 42 1", + "PUBLIC 2000 0 some_symbol", +].join("\n"); + +const KNOWN_DEBUG_ID = "0f13a5da-412a-fbf7-c866-2048f3294f3d"; + +let tempDir: string; +let savedEnv: Record; + +beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), "df-upload-test-")); + savedEnv = { + SENTRY_ORG: process.env.SENTRY_ORG, + SENTRY_PROJECT: process.env.SENTRY_PROJECT, + }; +}); + +afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + for (const [k, v] of Object.entries(savedEnv)) { + if (v === undefined) { + delete process.env[k]; + } else { + process.env[k] = v; + } + } + vi.restoreAllMocks(); +}); + +/** Write a Breakpad symbol file inside `tempDir` and return its path. */ +async function writeBreakpad(name = "example.sym"): Promise { + const path = join(tempDir, name); + await writeFile(path, BREAKPAD_FIXTURE); + return path; +} + +/** Run `debug-files upload` and capture stdout + exit code. */ +async function runUpload( + args: string[] +): Promise<{ output: string; error: string; exitCode: number | undefined }> { + let output = ""; + let error = ""; + const mockContext: SentryContext = { + process: { ...process, exitCode: undefined } as typeof process, + env: process.env, + cwd: tempDir, + homeDir: "/tmp", + configDir: "/tmp", + stdout: { + write(data: string | Uint8Array) { + output += + typeof data === "string" ? data : new TextDecoder().decode(data); + return true; + }, + }, + stderr: { + write(data: string | Uint8Array) { + error += + typeof data === "string" ? data : new TextDecoder().decode(data); + return true; + }, + }, + stdin: process.stdin, + }; + + await run(app, ["debug-files", "upload", ...args], mockContext); + return { output, error, exitCode: mockContext.process.exitCode }; +} + +describe("sentry debug-files upload", () => { + // ── Input validation ───────────────────────────────────────────── + + test("no paths exits non-zero", async () => { + const { exitCode } = await runUpload([]); + expect(exitCode).not.toBe(0); + }); + + test("--wait and --wait-for together exits non-zero", async () => { + const path = await writeBreakpad(); + const { exitCode } = await runUpload([ + path, + "--no-upload", + "--wait", + "--wait-for", + "30", + ]); + expect(exitCode).not.toBe(0); + }); + + test("unknown --type exits non-zero", async () => { + const path = await writeBreakpad(); + const { exitCode } = await runUpload([ + path, + "--type", + "bogus", + "--no-upload", + ]); + expect(exitCode).not.toBe(0); + }); + + // ── --no-upload (dry-run) ──────────────────────────────────────── + + test("--no-upload scans a directory and reports the debug id", async () => { + await writeBreakpad(); + const { output, exitCode } = await runUpload([tempDir, "--no-upload"]); + expect(output).toContain(KNOWN_DEBUG_ID); + expect(exitCode).toBe(0); + }); + + test("--no-upload --json reports uploaded:false with the file", async () => { + await writeBreakpad(); + const { output } = await runUpload([tempDir, "--no-upload", "--json"]); + const parsed = JSON.parse(output); + expect(parsed.uploaded).toBe(false); + expect(parsed.files).toHaveLength(1); + expect(parsed.files[0].debugId).toBe(KNOWN_DEBUG_ID); + }); + + test("--no-upload needs no credentials", async () => { + delete process.env.SENTRY_ORG; + delete process.env.SENTRY_PROJECT; + await writeBreakpad(); + const { exitCode } = await runUpload([tempDir, "--no-upload"]); + expect(exitCode).toBe(0); + }); + + // ── Filters ────────────────────────────────────────────────────── + + test("--type breakpad matches the fixture", async () => { + await writeBreakpad(); + const { output } = await runUpload([ + tempDir, + "--type", + "breakpad", + "--no-upload", + "--json", + ]); + expect(JSON.parse(output).files).toHaveLength(1); + }); + + test("--type breakpad --include-sources --no-upload attaches a bundle", async () => { + // The fixture has no source files, so createSourceBundle returns null; + // we just verify --include-sources does not break the flow. + await writeBreakpad(); + const { output, exitCode } = await runUpload([ + tempDir, + "--type", + "breakpad", + "--no-upload", + "--include-sources", + "--json", + ]); + expect(exitCode).toBe(0); + const parsed = JSON.parse(output); + // The main DIF is always present; the bundle is only added when + // createSourceBundle returns one or more files. + expect(parsed.files.length).toBeGreaterThanOrEqual(1); + }); + + test("--type elf excludes a breakpad file", async () => { + await writeBreakpad(); + const { output } = await runUpload([ + tempDir, + "--type", + "elf", + "--no-upload", + "--json", + ]); + expect(JSON.parse(output).files).toHaveLength(0); + }); + + test("--id matching the fixture keeps it", async () => { + await writeBreakpad(); + const { output } = await runUpload([ + tempDir, + "--id", + KNOWN_DEBUG_ID, + "--no-upload", + "--json", + ]); + expect(JSON.parse(output).files).toHaveLength(1); + }); + + test("--id with no match drops the file", async () => { + await writeBreakpad(); + const { output } = await runUpload([ + tempDir, + "--id", + "11111111-1111-1111-1111-111111111111", + "--no-upload", + "--json", + ]); + expect(JSON.parse(output).files).toHaveLength(0); + }); + + test("--require-all with a missing id exits non-zero", async () => { + await writeBreakpad(); + const { exitCode } = await runUpload([ + tempDir, + "--id", + "11111111-1111-1111-1111-111111111111", + "--require-all", + "--no-upload", + ]); + expect(exitCode).toBe(1); + }); + + test("--id without --require-all does not fail (dry-run)", async () => { + await writeBreakpad(); + const { exitCode } = await runUpload([ + tempDir, + "--id", + "11111111-1111-1111-1111-111111111111", + "--no-upload", + ]); + expect(exitCode).toBe(0); + }); + + // ── Upload orchestration ───────────────────────────────────────── + + test("uploads scanned files via uploadDebugFiles", async () => { + process.env.SENTRY_ORG = "test-org"; + process.env.SENTRY_PROJECT = "test-project"; + await writeBreakpad(); + + const spy = vi.spyOn(debugFilesApi, "uploadDebugFiles").mockResolvedValue([ + { + name: "example.sym", + debugId: KNOWN_DEBUG_ID, + checksum: "a".repeat(40), + state: "ok", + detail: null, + }, + ]); + + const { exitCode } = await runUpload([tempDir]); + expect(spy).toHaveBeenCalledTimes(1); + const callArgs = spy.mock.calls[0]?.[0]; + expect(callArgs?.org).toBe("test-org"); + expect(callArgs?.project).toBe("test-project"); + expect(callArgs?.difs).toHaveLength(1); + expect(callArgs?.difs[0]?.debugId).toBe(KNOWN_DEBUG_ID); + expect(callArgs?.difs[0]?.content).toBeInstanceOf(Buffer); + expect(exitCode).toBe(0); + }); + + test("wait mode surfaces processing errors with a non-zero exit", async () => { + process.env.SENTRY_ORG = "test-org"; + process.env.SENTRY_PROJECT = "test-project"; + await writeBreakpad(); + + vi.spyOn(debugFilesApi, "uploadDebugFiles").mockResolvedValue([ + { + name: "example.sym", + debugId: KNOWN_DEBUG_ID, + checksum: "a".repeat(40), + state: "error", + detail: "could not process", + }, + ]); + + const { exitCode } = await runUpload([tempDir, "--wait"]); + expect(exitCode).toBe(1); + }); + + test("a not_found result exits non-zero (chunk delivery incomplete)", async () => { + process.env.SENTRY_ORG = "test-org"; + process.env.SENTRY_PROJECT = "test-project"; + await writeBreakpad(); + + vi.spyOn(debugFilesApi, "uploadDebugFiles").mockResolvedValue([ + { + name: "example.sym", + debugId: KNOWN_DEBUG_ID, + checksum: "a".repeat(40), + state: "not_found", + detail: null, + }, + ]); + + const { exitCode } = await runUpload([tempDir]); + expect(exitCode).toBe(1); + }); + + test("--require-all on a real upload fails when an --id is missing", async () => { + process.env.SENTRY_ORG = "test-org"; + process.env.SENTRY_PROJECT = "test-project"; + await writeBreakpad(); + + const spy = vi.spyOn(debugFilesApi, "uploadDebugFiles").mockResolvedValue([ + { + name: "example.sym", + debugId: KNOWN_DEBUG_ID, + checksum: "a".repeat(40), + state: "ok", + detail: null, + }, + ]); + + const { exitCode, output } = await runUpload([ + tempDir, + "--id", + KNOWN_DEBUG_ID, + "--id", + "11111111-1111-1111-1111-111111111111", + "--require-all", + ]); + expect(spy).toHaveBeenCalledTimes(1); + expect(exitCode).toBe(1); + expect(output).toContain("11111111-1111-1111-1111-111111111111"); + }); + + test("--id without --require-all does not fail on a real upload", async () => { + process.env.SENTRY_ORG = "test-org"; + process.env.SENTRY_PROJECT = "test-project"; + await writeBreakpad(); + + vi.spyOn(debugFilesApi, "uploadDebugFiles").mockResolvedValue([ + { + name: "example.sym", + debugId: KNOWN_DEBUG_ID, + checksum: "a".repeat(40), + state: "ok", + detail: null, + }, + ]); + + const { exitCode } = await runUpload([ + tempDir, + "--id", + "11111111-1111-1111-1111-111111111111", + ]); + expect(exitCode).toBe(0); + }); + + test("reports nothing to upload when no debug files are found", async () => { + process.env.SENTRY_ORG = "test-org"; + process.env.SENTRY_PROJECT = "test-project"; + await writeFile(join(tempDir, "notes.txt"), "not a debug file"); + + const spy = vi.spyOn(debugFilesApi, "uploadDebugFiles"); + const { exitCode } = await runUpload([tempDir]); + expect(spy).not.toHaveBeenCalled(); + expect(exitCode).toBe(0); + }); +}); diff --git a/test/lib/api/debug-files.test.ts b/test/lib/api/debug-files.test.ts new file mode 100644 index 000000000..4adb8c327 --- /dev/null +++ b/test/lib/api/debug-files.test.ts @@ -0,0 +1,352 @@ +/** + * Tests for the debug information file upload API. + * + * Mocks the chunk-upload primitives and the region request layer so the + * assemble body shape, missing-chunk upload, and the no-wait/wait completion + * modes can be exercised without a network. `hashBuffer` and `pickUploadEncoding` + * remain real (they are pure). + */ + +import { afterEach, beforeEach, describe, expect, test, vi } from "vitest"; +import { + type DebugFileUpload, + uploadDebugFiles, +} from "../../../src/lib/api/debug-files.js"; +import { ApiError } from "../../../src/lib/errors.js"; + +const { apiRequestToRegionMock, uploadMissingBufferChunksMock } = vi.hoisted( + () => ({ + apiRequestToRegionMock: vi.fn(), + uploadMissingBufferChunksMock: vi.fn(() => Promise.resolve()), + }) +); + +vi.mock("../../../src/lib/region.js", () => ({ + resolveOrgRegion: vi.fn(async () => "https://us.sentry.io"), +})); + +vi.mock("../../../src/lib/api/infrastructure.js", () => ({ + apiRequestToRegion: apiRequestToRegionMock, +})); + +vi.mock("../../../src/lib/api/chunk-upload.js", async (importOriginal) => { + const actual = + await importOriginal< + typeof import("../../../src/lib/api/chunk-upload.js") + >(); + return { + ...actual, + getChunkUploadOptions: vi.fn(async () => ({ + url: "https://us.sentry.io/api/0/chunk-upload/", + chunkSize: 8192, + chunksPerRequest: 64, + maxRequestSize: 1_048_576, + hashAlgorithm: "sha1", + concurrency: 8, + compression: ["gzip"], + })), + uploadMissingBufferChunks: uploadMissingBufferChunksMock, + }; +}); + +/** Build a single debug-file upload payload. */ +function makeDif( + name: string, + content: string, + debugId?: string +): DebugFileUpload { + return { name, debugId, content: Buffer.from(content) }; +} + +/** The single checksum key from the most recent assemble body. */ +function lastAssembleBody(): Record< + string, + { name: string; debug_id?: string; chunks: string[] } +> { + const calls = apiRequestToRegionMock.mock.calls; + const lastCall = calls.at(-1); + return lastCall?.[2].body; +} + +const SOLE = { + org: "acme", + project: "app", +}; + +beforeEach(() => { + apiRequestToRegionMock.mockReset(); + uploadMissingBufferChunksMock.mockClear(); +}); + +afterEach(() => { + vi.useRealTimers(); +}); + +describe("uploadDebugFiles", () => { + test("empty input returns no results and makes no requests", async () => { + const results = await uploadDebugFiles({ + ...SOLE, + difs: [], + wait: false, + maxWaitMs: 1000, + }); + expect(results).toEqual([]); + expect(apiRequestToRegionMock).not.toHaveBeenCalled(); + }); + + test("assemble body keys by checksum and includes name + debug_id + chunks", async () => { + const dif = makeDif("libfoo.so", "ELF debug bytes", "abc123-def"); + // The checksum is content-derived, so we capture the request body and + // echo its keys back as "ok" — server already holds the file, so no-wait + // completes on the first assemble. + apiRequestToRegionMock.mockImplementationOnce( + async (_url: string, _endpoint: string, init: { body: object }) => { + const body = init.body as Record; + const data: Record = {}; + for (const key of Object.keys(body)) { + data[key] = { state: "ok" }; + } + return { data }; + } + ); + + const results = await uploadDebugFiles({ + ...SOLE, + difs: [dif], + wait: false, + maxWaitMs: 1000, + }); + + const body = lastAssembleBody(); + const keys = Object.keys(body); + expect(keys).toHaveLength(1); + expect(keys[0]).toMatch(/^[0-9a-f]{40}$/); + const entry = body[keys[0] as string]; + expect(entry?.name).toBe("libfoo.so"); + expect(entry?.debug_id).toBe("abc123-def"); + expect(Array.isArray(entry?.chunks)).toBe(true); + expect(entry?.chunks.every((c) => /^[0-9a-f]{40}$/.test(c))).toBe(true); + + expect(results).toHaveLength(1); + expect(results[0]?.state).toBe("ok"); + expect(results[0]?.name).toBe("libfoo.so"); + }); + + test("omits debug_id from the body when not provided", async () => { + apiRequestToRegionMock.mockImplementationOnce( + async (_url: string, _endpoint: string, init: { body: object }) => { + const body = init.body as Record; + const data: Record = {}; + for (const key of Object.keys(body)) { + data[key] = { state: "ok" }; + } + return { data }; + } + ); + + await uploadDebugFiles({ + ...SOLE, + difs: [makeDif("anon.so", "bytes")], + wait: false, + maxWaitMs: 1000, + }); + + const entry = Object.values(lastAssembleBody())[0]; + expect(entry).toBeDefined(); + expect("debug_id" in (entry as object)).toBe(false); + }); + + test("uploads missing chunks then completes (no-wait)", async () => { + vi.useFakeTimers(); + const dif = makeDif("libbar.so", "more debug bytes", "id-1"); + + // First assemble: server is missing the chunk. Second: held (created). + apiRequestToRegionMock.mockImplementationOnce( + async (_url: string, _endpoint: string, init: { body: object }) => { + const body = init.body as Record; + const key = Object.keys(body)[0] as string; + return { + data: { + [key]: { state: "not_found", missingChunks: body[key]?.chunks }, + }, + }; + } + ); + apiRequestToRegionMock.mockImplementationOnce( + async (_url: string, _endpoint: string, init: { body: object }) => { + const key = Object.keys(init.body as object)[0] as string; + return { data: { [key]: { state: "created" } } }; + } + ); + + const promise = uploadDebugFiles({ + ...SOLE, + difs: [dif], + wait: false, + maxWaitMs: 60_000, + }); + await vi.runAllTimersAsync(); + const results = await promise; + + expect(uploadMissingBufferChunksMock).toHaveBeenCalled(); + expect(apiRequestToRegionMock).toHaveBeenCalledTimes(2); + expect(results[0]?.state).toBe("created"); + }); + + test("wait mode polls past 'assembling' until terminal 'ok'", async () => { + vi.useFakeTimers(); + const dif = makeDif("libbaz.so", "debug bytes", "id-2"); + + apiRequestToRegionMock.mockImplementationOnce( + async (_url: string, _endpoint: string, init: { body: object }) => { + const key = Object.keys(init.body as object)[0] as string; + return { data: { [key]: { state: "assembling" } } }; + } + ); + apiRequestToRegionMock.mockImplementationOnce( + async (_url: string, _endpoint: string, init: { body: object }) => { + const key = Object.keys(init.body as object)[0] as string; + return { data: { [key]: { state: "ok" } } }; + } + ); + + const promise = uploadDebugFiles({ + ...SOLE, + difs: [dif], + wait: true, + maxWaitMs: 60_000, + }); + await vi.runAllTimersAsync(); + const results = await promise; + + expect(apiRequestToRegionMock).toHaveBeenCalledTimes(2); + expect(results[0]?.state).toBe("ok"); + }); + + test("wait mode collects an 'error' state without throwing", async () => { + const dif = makeDif("broken.so", "debug bytes", "id-3"); + apiRequestToRegionMock.mockImplementationOnce( + async (_url: string, _endpoint: string, init: { body: object }) => { + const key = Object.keys(init.body as object)[0] as string; + return { + data: { [key]: { state: "error", detail: "corrupt object" } }, + }; + } + ); + + const results = await uploadDebugFiles({ + ...SOLE, + difs: [dif], + wait: true, + maxWaitMs: 60_000, + }); + + expect(results[0]?.state).toBe("error"); + expect(results[0]?.detail).toBe("corrupt object"); + }); + + test("wait mode throws ApiError when assembly does not finish in time", async () => { + const dif = makeDif("slow.so", "debug bytes", "id-4"); + // Always "assembling" → never terminal. maxWaitMs=0 trips the deadline. + apiRequestToRegionMock.mockImplementation( + async (_url: string, _endpoint: string, init: { body: object }) => { + const key = Object.keys(init.body as object)[0] as string; + return { data: { [key]: { state: "assembling" } } }; + } + ); + + await expect( + uploadDebugFiles({ + ...SOLE, + difs: [dif], + wait: true, + maxWaitMs: 0, + }) + ).rejects.toBeInstanceOf(ApiError); + }); + + test("batches multiple files into one assemble request", async () => { + apiRequestToRegionMock.mockImplementationOnce( + async (_url: string, _endpoint: string, init: { body: object }) => { + const body = init.body as Record; + const data: Record = {}; + for (const key of Object.keys(body)) { + data[key] = { state: "ok" }; + } + return { data }; + } + ); + + const results = await uploadDebugFiles({ + ...SOLE, + difs: [makeDif("a.so", "alpha", "id-a"), makeDif("b.so", "beta", "id-b")], + wait: false, + maxWaitMs: 1000, + }); + + expect(apiRequestToRegionMock).toHaveBeenCalledTimes(1); + expect(Object.keys(lastAssembleBody())).toHaveLength(2); + expect(results).toHaveLength(2); + }); + + test("re-sends every chunk when server response omits a file's entry", async () => { + // First call: server has no record of either file (entries missing). + // Second call: server holds both files. No-wait completes on the second + // call after the chunks are re-uploaded. + apiRequestToRegionMock.mockImplementationOnce(async () => ({ data: {} })); + apiRequestToRegionMock.mockImplementationOnce( + async (_url: string, _endpoint: string, init: { body: object }) => { + const body = init.body as Record; + const data: Record = {}; + for (const key of Object.keys(body)) { + data[key] = { state: "ok" }; + } + return { data }; + } + ); + + await uploadDebugFiles({ + ...SOLE, + difs: [makeDif("missing.so", "alpha", "id-a")], + wait: false, + maxWaitMs: 60_000, + }); + + // First call has the file's entry missing from response, so all its + // chunks must have been queued for re-upload. + expect(uploadMissingBufferChunksMock).toHaveBeenCalled(); + const firstCall = uploadMissingBufferChunksMock.mock.calls[0]?.[0]; + const passedMissing = firstCall?.missingChecksums as Set; + expect(passedMissing.size).toBeGreaterThan(0); + }); + + test("re-sends every chunk when entry state is not_found without missingChunks", async () => { + // Server returns `not_found` but omits the `missingChunks` field — the + // client must still upload all chunks for that file, otherwise the loop + // would poll forever without sending anything. + apiRequestToRegionMock.mockImplementationOnce( + async (_url: string, _endpoint: string, init: { body: object }) => { + const key = Object.keys(init.body as object)[0] as string; + return { data: { [key]: { state: "not_found" } } }; + } + ); + apiRequestToRegionMock.mockImplementationOnce( + async (_url: string, _endpoint: string, init: { body: object }) => { + const key = Object.keys(init.body as object)[0] as string; + return { data: { [key]: { state: "ok" } } }; + } + ); + + await uploadDebugFiles({ + ...SOLE, + difs: [makeDif("silent.so", "alpha", "id-x")], + wait: false, + maxWaitMs: 60_000, + }); + + const firstCall = uploadMissingBufferChunksMock.mock.calls[0]?.[0]; + const passedMissing = firstCall?.missingChecksums as Set; + // Must contain the file's chunk checksums, not be empty. + expect(passedMissing.size).toBeGreaterThan(0); + }); +}); diff --git a/test/lib/dif/scan.property.test.ts b/test/lib/dif/scan.property.test.ts new file mode 100644 index 000000000..6bc8b385e --- /dev/null +++ b/test/lib/dif/scan.property.test.ts @@ -0,0 +1,247 @@ +/** + * Property-based tests for `debug-files upload` scanning filters. + * + * Verifies the pure filter predicates in `src/lib/dif/scan.ts` + * ({@link objectPassesFilters}, {@link normalizeDebugId}, {@link buildDifFilters}) + * hold for arbitrary parsed-object inputs, without touching the filesystem. + */ + +import { + type Arbitrary, + boolean, + constant, + constantFrom, + assert as fcAssert, + oneof, + property, + record, + tuple, +} from "fast-check"; +import { describe, expect, test } from "vitest"; +import type { DifObjectInfo } from "../../../src/lib/dif/index.js"; +import { + buildDifFilters, + type DifFilters, + debugIdMatches, + normalizeDebugId, + objectPassesFilters, + VALID_DIF_TYPES, +} from "../../../src/lib/dif/scan.js"; +import { ValidationError } from "../../../src/lib/errors.js"; +import { DEFAULT_NUM_RUNS } from "../../model-based/helpers.js"; + +const NIL_DEBUG_ID = "00000000-0000-0000-0000-000000000000"; + +const hexGroup = (length: number) => + tuple( + ...Array.from({ length }, () => constantFrom(..."0123456789abcdef")) + ).map((chars) => chars.join("")); + +/** A non-nil UUID (8-4-4-4-12). May coincide with nil with negligible odds. */ +const uuidArb = tuple( + hexGroup(8), + hexGroup(4), + hexGroup(4), + hexGroup(4), + hexGroup(12) +).map((parts) => parts.join("-")); + +/** A debug id: valid UUID, UUID+age suffix, or the nil id. */ +const debugIdArb = oneof( + uuidArb, + tuple(uuidArb, hexGroup(8)).map(([id, age]) => `${id}-${age}`), + constant(NIL_DEBUG_ID) +); + +const formatArb = constantFrom( + "elf", + "macho", + "pe", + "pdb", + "portablepdb", + "wasm", + "breakpad", + "sourcebundle", + "unknown" +); + +const difObjectArb: Arbitrary = record({ + debugId: debugIdArb, + codeId: oneof(constant(null), hexGroup(16)), + arch: constant("x86_64"), + fileFormat: formatArb, + kind: constant("dbg"), + hasSymbols: boolean(), + hasDebugInfo: boolean(), + hasUnwindInfo: boolean(), + hasSources: boolean(), +}); + +/** The default filter set: every feature wanted, no type/id restriction. */ +const defaultFilters = (): DifFilters => buildDifFilters({}); + +describe("property: normalizeDebugId", () => { + test("is idempotent", () => { + fcAssert( + property(debugIdArb, (id) => { + const once = normalizeDebugId(id); + expect(normalizeDebugId(once)).toBe(once); + }), + { numRuns: DEFAULT_NUM_RUNS } + ); + }); + + test("output is lowercase and brace-free", () => { + fcAssert( + property(debugIdArb, (id) => { + const wrapped = `{${id.toUpperCase()}}`; + const normalized = normalizeDebugId(wrapped); + expect(normalized).toBe(normalized.toLowerCase()); + expect(normalized).not.toContain("{"); + expect(normalized).not.toContain("}"); + }), + { numRuns: DEFAULT_NUM_RUNS } + ); + }); +}); + +describe("property: objectPassesFilters", () => { + test("a nil debug id never passes, regardless of filters", () => { + fcAssert( + property(difObjectArb, (obj) => { + const nilObj = { ...obj, debugId: NIL_DEBUG_ID }; + expect(objectPassesFilters(nilObj, defaultFilters())).toBe(false); + }), + { numRuns: DEFAULT_NUM_RUNS } + ); + }); + + test("an object with no features never passes", () => { + fcAssert( + property(difObjectArb, (obj) => { + const featureless = { + ...obj, + hasSymbols: false, + hasDebugInfo: false, + hasUnwindInfo: false, + hasSources: false, + }; + expect(objectPassesFilters(featureless, defaultFilters())).toBe(false); + }), + { numRuns: DEFAULT_NUM_RUNS } + ); + }); + + test("dropping every feature filter rejects everything", () => { + const noFeatures = buildDifFilters({ + noDebug: true, + noUnwind: true, + noSources: true, + }); + fcAssert( + property(difObjectArb, (obj) => { + expect(objectPassesFilters(obj, noFeatures)).toBe(false); + }), + { numRuns: DEFAULT_NUM_RUNS } + ); + }); + + test("a type filter excludes objects of other formats", () => { + fcAssert( + property(difObjectArb, (obj) => { + // Filter for ELF only; any non-ELF, non-nil object must be excluded. + const elfOnly = buildDifFilters({ types: ["elf"] }); + if (obj.fileFormat !== "elf") { + expect(objectPassesFilters(obj, elfOnly)).toBe(false); + } + }), + { numRuns: DEFAULT_NUM_RUNS } + ); + }); + + test("a valid id + at least one feature passes with default filters", () => { + fcAssert( + property(uuidArb, constantFrom(0, 1, 2, 3), (id, featureIdx) => { + const obj: DifObjectInfo = { + debugId: id, + codeId: null, + arch: "x86_64", + fileFormat: "elf", + kind: "dbg", + hasSymbols: featureIdx === 0, + hasDebugInfo: featureIdx === 1, + hasUnwindInfo: featureIdx === 2, + hasSources: featureIdx === 3, + }; + expect(objectPassesFilters(obj, defaultFilters())).toBe(true); + }), + { numRuns: DEFAULT_NUM_RUNS } + ); + }); + + test("an id filter matches regardless of age suffix", () => { + fcAssert( + property(uuidArb, hexGroup(8), (id, age) => { + const obj: DifObjectInfo = { + debugId: `${id}-${age}`, + codeId: null, + arch: "x86_64", + fileFormat: "pe", + kind: "dbg", + hasSymbols: true, + hasDebugInfo: false, + hasUnwindInfo: false, + hasSources: false, + }; + // Requesting the base UUID (no age) still matches the aged object. + const idFilter = buildDifFilters({ ids: [id] }); + expect(objectPassesFilters(obj, idFilter)).toBe(true); + }), + { numRuns: DEFAULT_NUM_RUNS } + ); + }); +}); + +describe("property: debugIdMatches", () => { + test("is reflexive and case/brace insensitive", () => { + fcAssert( + property(debugIdArb, (id) => { + expect(debugIdMatches(id, id)).toBe(true); + expect(debugIdMatches(id, `{${id.toUpperCase()}}`)).toBe(true); + }), + { numRuns: DEFAULT_NUM_RUNS } + ); + }); + + test("a base UUID matches its aged form (PE/PDB)", () => { + fcAssert( + property(uuidArb, hexGroup(8), (id, age) => { + expect(debugIdMatches(id, `${id}-${age}`)).toBe(true); + expect(debugIdMatches(`${id}-${age}`, id)).toBe(true); + }), + { numRuns: DEFAULT_NUM_RUNS } + ); + }); +}); + +describe("buildDifFilters", () => { + test("accepts every documented type", () => { + for (const type of VALID_DIF_TYPES) { + expect(() => buildDifFilters({ types: [type] })).not.toThrow(); + } + }); + + test("throws ValidationError on an unknown type", () => { + expect(() => buildDifFilters({ types: ["bogus"] })).toThrow( + ValidationError + ); + }); + + test("--no-debug drops both debug and symtab features", () => { + const filters = buildDifFilters({ noDebug: true }); + expect(filters.debug).toBe(false); + expect(filters.symtab).toBe(false); + expect(filters.unwind).toBe(true); + expect(filters.sources).toBe(true); + }); +});