diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c85c315..b740c52 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,6 +6,10 @@ on: branches: [main] pull_request: +permissions: + contents: read + pull-requests: write + env: CARGO_TERM_COLOR: always RUSTFLAGS: -D warnings @@ -42,6 +46,56 @@ jobs: - uses: Swatinem/rust-cache@v2 - run: cargo test --all-features + # Informational coverage report. Emits lcov.info as an artifact and + # posts a sticky PR comment with the line-coverage %. Intentionally + # NOT in the required-status-checks list and NOT using + # `--fail-under-lines` — let the number be visible across 2-3 + # releases before ratcheting (see CONTRIBUTING.md "Coverage"). + coverage: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@1.88 + with: + components: llvm-tools-preview + - uses: Swatinem/rust-cache@v2 + - name: Install cargo-llvm-cov + run: cargo install cargo-llvm-cov --locked + - name: Run coverage + run: cargo llvm-cov --all-features --workspace --lcov --output-path lcov.info + - name: Upload lcov artifact + uses: actions/upload-artifact@v4 + with: + name: lcov.info + path: lcov.info + retention-days: 30 + - name: Compute coverage % + id: compute + run: | + covered=$(grep "^DA:" lcov.info | awk -F, '$2>0' | wc -l) + total=$(grep -c "^DA:" lcov.info) + if [ "$total" -gt 0 ]; then + pct=$(awk "BEGIN{printf \"%.1f\", ${covered}*100/${total}}") + else + pct="N/A" + fi + echo "pct=$pct" >> "$GITHUB_OUTPUT" + echo "covered=$covered" >> "$GITHUB_OUTPUT" + echo "total=$total" >> "$GITHUB_OUTPUT" + - name: Post coverage summary as PR comment + if: github.event_name == 'pull_request' + uses: marocchino/sticky-pull-request-comment@v2 + with: + header: bomdrift-coverage + message: | + ## Coverage report + + Line coverage: **${{ steps.compute.outputs.pct }}%** (${{ steps.compute.outputs.covered }} / ${{ steps.compute.outputs.total }} lines) + + Full lcov report available as workflow artifact `lcov.info`. + + v0.9.8 introduces this report; `--fail-under-lines` will be added once coverage is visible across 2–3 releases. + audit: runs-on: ubuntu-latest steps: diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml new file mode 100644 index 0000000..693719e --- /dev/null +++ b/.github/workflows/fuzz.yml @@ -0,0 +1,48 @@ +name: Fuzz + +# cargo-fuzz requires nightly Rust, so this lives in its own workflow +# rather than bolting onto ci.yml's stable-pinned matrix. PR runs use +# a 60s budget per target (signal: did the seed corpus crash?); the +# weekly schedule extends to 600s for deeper coverage. +on: + pull_request: + paths: + - 'src/parse/**' + - 'fuzz/**' + - '.github/workflows/fuzz.yml' + schedule: + - cron: '17 3 * * 0' # Sundays 03:17 UTC + workflow_dispatch: + +permissions: + contents: read + +jobs: + fuzz: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + target: [parse_cyclonedx, parse_spdx, parse_syft] + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@nightly + - uses: Swatinem/rust-cache@v2 + with: + workspaces: fuzz + - name: Install cargo-fuzz + run: cargo install cargo-fuzz --locked + - name: Run fuzz target + run: | + BUDGET=60 + if [ "${{ github.event_name }}" = "schedule" ]; then + BUDGET=600 + fi + cd fuzz + cargo +nightly fuzz run ${{ matrix.target }} -- -max_total_time=$BUDGET + - name: Upload artifacts on crash + if: failure() + uses: actions/upload-artifact@v4 + with: + name: fuzz-${{ matrix.target }}-artifacts + path: fuzz/artifacts/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 5281238..9bba1bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,86 @@ project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +## [0.9.8] - 2026-04-30 + +The "code-review-driven hardening" milestone. External agent review surfaced +nine recommendations across P1/P2/P3; v0.9.8 takes five of the six high-leverage +items, defers the rest as v1.0+ candidates with explicit rationale. + +### Added + +- **Continuous parser fuzzing.** New `fuzz/` standalone sub-workspace with + three `cargo-fuzz` libfuzzer targets (`parse_cyclonedx`, `parse_spdx`, + `parse_syft`). Each target two-stages JSON validation before invoking the + bomdrift parser, scoping the fuzz to bomdrift-side parsing rather than + serde_json's well-tested layer. Seed corpus from `tests/fixtures/`. + New `.github/workflows/fuzz.yml` runs each target for 60s on PRs touching + `src/parse/**` or `fuzz/**`, and 600s weekly on a Sunday cron schedule. + Nightly Rust toolchain pinned per cargo-fuzz convention. Closes the + textbook "untrusted-input parser" gap for a security tool. +- **CI coverage report.** New `coverage` job in `.github/workflows/ci.yml` + runs `cargo-llvm-cov`, emits `lcov.info` as a workflow artifact, and posts + a sticky PR comment via `marocchino/sticky-pull-request-comment@v2` showing + line-coverage percentage. **No `--fail-under-lines` gate yet** — coverage + is informational for v0.9.8/v0.9.9 to establish a stable baseline before + ratcheting in a later release. CONTRIBUTING.md gains a "Coverage" subsection + describing the policy. + +### Changed + +- **`unwrap`/`expect`/`panic`/`todo`/`unimplemented` lints now warn** at + crate root via `#![warn(clippy::unwrap_used, clippy::expect_used, + clippy::panic, clippy::todo, clippy::unimplemented)]`. Production code + audited; the four remaining `.expect()` sites + (`baseline.rs:389`, `render/json.rs:42`, `render/sarif.rs:84`, + `vex.rs:932`) are true invariants and gain explicit + `#[allow(clippy::expect_used, reason = "...")]` annotations citing the + why. Test modules opt-out via inner `#![allow(clippy::unwrap_used, + clippy::expect_used)]` (28 modules touched). Zero production `.unwrap()` + remain. +- **Every `unsafe` block now carries a `// SAFETY:` comment.** + 16 of 23 unsafe sites (the Rust 2024 `env::set_var` wrappers + a few + test helpers) lacked annotation. Added rationale to each, and enforce + going forward via `#![warn(clippy::undocumented_unsafe_blocks)]` at + crate root. + +### Refactored + +- **`src/lib.rs` 47 KB → 31 lines.** Extracted the 1,300-line `run_diff` + orchestration into a new `src/run.rs` module. `lib.rs` is now pure + re-exports + module declarations. Public API surface preserved + byte-for-byte: external consumers calling `bomdrift::run_diff(...)` get + the same function via re-export. Behavior-preserving — all 432 tests + pass without modification beyond import-path updates. + +### Documentation + +- README.md gains a "Continuous fuzzing (v0.9.8+)" subsection. +- CONTRIBUTING.md gains a "Coverage" subsection. + +### Tests + +- 432 → 432 (no net change). Refactor commits are behavior-preserving. + +### Scope notes — what's deferred to v1.0 + +The external review surfaced four other recommendations explicitly deferred: + +- **Remaining file splits** for `vex.rs` (50 KB), `render/markdown.rs` + (58 KB), `render/sarif.rs` (48 KB), `baseline.rs` (42 KB), + `enrich/typosquat.rs` (42 KB), `enrich/license.rs` (34 KB). The lib.rs + split was the highest-ROI single split; the others can land + organically as future PRs touch those files. +- **Mutation testing audit** via `cargo-mutants`. High-signal but slow; + use as v1.0 audit tool, not a CI gate. +- **Calibration FPR docs** — running bomdrift on top-1000 npm + PyPI for + 12 months of releases needs data-collection infrastructure that + doesn't exist yet. Tracked separately. +- **Coverage `--fail-under-lines` ratchet** — flip on after 2-3 releases + of visibility. +- **WASM-sandboxed plugin model** — carryover from v0.9.7; conflicts with + single-binary tenet at current toolchain costs. + ## [0.9.7] - 2026-04-29 The "v0.9.6 follow-up backlog" milestone. Five concrete items from the diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 61a1b5b..21cb35e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -133,6 +133,17 @@ Network-touching enrichers should have a unit test for the network- failure path (fake fetcher returns `Err`) — the best-effort contract matters and silently breaking it would be an easy regression. +### Coverage (v0.9.8+) + +CI runs `cargo llvm-cov` on every PR and posts a sticky comment with +the overall line coverage % (the full `lcov.info` is uploaded as a +workflow artifact). The job is informational for now — there is no +`--fail-under-lines` threshold yet. The plan is to add a ratchet in +v0.9.9 once 2–3 releases have made the baseline visible. Until then, +the report is a nudge, not a gate; PRs that move coverage in the +wrong direction without justification will get a review comment, not +a red check. + ### Test conventions (v0.9.5+) Tests that mutate `SOURCE_DATE_EPOCH` (directly or indirectly via diff --git a/Cargo.lock b/Cargo.lock index f69252f..1d3cf10 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -123,7 +123,7 @@ dependencies = [ [[package]] name = "bomdrift" -version = "0.9.7" +version = "0.9.8" dependencies = [ "anyhow", "base64", diff --git a/Cargo.toml b/Cargo.toml index c24ff07..3b8c2c2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "bomdrift" -version = "0.9.7" +version = "0.9.8" edition = "2024" rust-version = "1.88" description = "SBOM diff with supply-chain risk signals (CVEs, typosquats, maintainer-age)." diff --git a/README.md b/README.md index cdfc20d..34f4384 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ Recent incidents bomdrift would have surfaced: The dimensions adopters actually filter on. Sourced from [`files/competitor-research-v0.7-v0.9.md`](./files/competitor-research-v0.7-v0.9.md); -correct as of v0.9.7. +correct as of v0.9.8. | | bomdrift | Socket | Snyk | Trivy | OSV-Scanner | Grype | |------------------------------------------|:---:|:---:|:---:|:---:|:---:|:---:| @@ -94,7 +94,7 @@ jobs: # verify-signatures: true (set false on trusted mirrors) ``` -Pin to `@v1` for the latest v0.x; pin to `@v0.9.7` for reproducible builds. Run `bomdrift init` if you want a checked-in `.bomdrift.toml` policy and both workflows scaffolded locally. See the [Action reference](https://metbcy.github.io/bomdrift/github-action.html) for every input — including `upload-to-code-scanning`, `verify-signatures`, `comment-size-limit`, and the `before-sbom`/`after-sbom` escape hatch. +Pin to `@v1` for the latest v0.x; pin to `@v0.9.8` for reproducible builds. Run `bomdrift init` if you want a checked-in `.bomdrift.toml` policy and both workflows scaffolded locally. See the [Action reference](https://metbcy.github.io/bomdrift/github-action.html) for every input — including `upload-to-code-scanning`, `verify-signatures`, `comment-size-limit`, and the `before-sbom`/`after-sbom` escape hatch. **Other forges:** GitLab CI, Bitbucket Pipelines, and Azure DevOps Pipelines all have ready-to-copy templates under [`examples/`](./examples/) and dedicated docs chapters: [GitLab CI](https://metbcy.github.io/bomdrift/gitlab-ci.html), [Bitbucket](https://metbcy.github.io/bomdrift/bitbucket.html), [Azure DevOps](https://metbcy.github.io/bomdrift/azure-devops.html). Comment-driven `/bomdrift suppress` works on all four SCMs via the Cloudflare Worker bridges added in v0.9.5. @@ -127,7 +127,7 @@ Comment `/bomdrift suppress GHSA-xxxx` on any PR; the sub-action appends to `.bo Pre-built binaries cover Linux x86_64 + aarch64, macOS aarch64, and Windows x86_64. Each archive is cosign-signed via Sigstore + GitHub OIDC. ```bash -VERSION=v0.9.7 +VERSION=v0.9.8 TARGET=x86_64-unknown-linux-gnu curl -sSL -o bomdrift.tar.gz \ "https://github.com/Metbcy/bomdrift/releases/download/${VERSION}/bomdrift-${VERSION}-${TARGET}.tar.gz" @@ -143,7 +143,7 @@ Verify the archive's signature before you trust the binary — see [Release sign ### From source ```bash -cargo install --locked --git https://github.com/Metbcy/bomdrift --tag v0.9.7 bomdrift +cargo install --locked --git https://github.com/Metbcy/bomdrift --tag v0.9.8 bomdrift ``` Requires Rust 1.85+ (the project uses edition 2024). @@ -279,7 +279,7 @@ Every release archive is signed with cosign keyless via Sigstore (GitHub OIDC). ```bash # Replace VERSION + TARGET with your downloaded archive's pair -VERSION=v0.9.7 +VERSION=v0.9.8 TARGET=x86_64-unknown-linux-gnu ARCHIVE=bomdrift-${VERSION}-${TARGET}.tar.gz @@ -293,6 +293,16 @@ cosign verify-blob \ The Action verifies signatures automatically by default. Set `verify-signatures: false` on trusted mirrors to skip the cosign install step (~15s saved per run). +### Continuous fuzzing (v0.9.8+) + +The CycloneDX, SPDX, and Syft JSON parsers are continuously fuzzed +via [`cargo-fuzz`](https://rust-fuzz.github.io/book/cargo-fuzz/). +Pull requests touching `src/parse/**` get a short fuzz pass per +target on Linux nightly; a longer scheduled run executes weekly on +`main`. Crash artifacts are uploaded for triage. +See [`.github/workflows/fuzz.yml`](./.github/workflows/fuzz.yml) and +[`fuzz/fuzz_targets/`](./fuzz/fuzz_targets/). + ## Documentation - **[Docs site (mdBook)](https://metbcy.github.io/bomdrift/)** — full reference: CLI flags, every action input, output-format anatomy, per-enricher deep dives, architecture notes, roadmap. diff --git a/docs/src/quickstart.md b/docs/src/quickstart.md index 4896183..20fbf4f 100644 --- a/docs/src/quickstart.md +++ b/docs/src/quickstart.md @@ -25,7 +25,7 @@ jobs: ``` The `@v1` mutable tag tracks the latest v0.x release. Pin to a specific -version (`@v0.9.7`) if you prefer reproducible builds. See +version (`@v0.9.8`) if you prefer reproducible builds. See [GitHub Action](./github-action.md) for every input. If you prefer a checked-in policy file, install the binary and run @@ -39,7 +39,7 @@ Pre-built binaries cover Linux x86_64 + aarch64, macOS aarch64, and Windows x86_64. Each archive is cosign-signed via Sigstore + GitHub OIDC. ```bash -VERSION=v0.9.7 +VERSION=v0.9.8 TARGET=x86_64-unknown-linux-gnu curl -sSL -o bomdrift.tar.gz \ "https://github.com/Metbcy/bomdrift/releases/download/${VERSION}/bomdrift-${VERSION}-${TARGET}.tar.gz" @@ -60,7 +60,7 @@ To verify the archive's signature before you trust the binary, see ## From source ```bash -cargo install --locked --git https://github.com/Metbcy/bomdrift --tag v0.9.7 bomdrift +cargo install --locked --git https://github.com/Metbcy/bomdrift --tag v0.9.8 bomdrift ``` Requires Rust 1.85+ (the project uses edition 2024). diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 0000000..e274846 --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,5 @@ +# cargo-fuzz / libFuzzer build + crash artifacts. The seed corpus +# under corpus/ IS checked in (those are inputs, not outputs). +/target +/artifacts +Cargo.lock diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 0000000..d0f5b0e --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,43 @@ +[package] +name = "bomdrift-fuzz" +version = "0.0.0" +publish = false +edition = "2024" +rust-version = "1.88" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +serde_json = "1" + +[dependencies.bomdrift] +path = ".." + +# Prevent cargo from treating this directory as part of the parent +# workspace. cargo-fuzz expects fuzz/ to be its own workspace so the +# nightly toolchain (required for libfuzzer instrumentation) doesn't +# leak into the main build. +[workspace] + +[[bin]] +name = "parse_cyclonedx" +path = "fuzz_targets/parse_cyclonedx.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "parse_spdx" +path = "fuzz_targets/parse_spdx.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "parse_syft" +path = "fuzz_targets/parse_syft.rs" +test = false +doc = false +bench = false diff --git a/fuzz/corpus/parse_cyclonedx/cdx-after.json b/fuzz/corpus/parse_cyclonedx/cdx-after.json new file mode 100644 index 0000000..dc20850 --- /dev/null +++ b/fuzz/corpus/parse_cyclonedx/cdx-after.json @@ -0,0 +1,42 @@ +{ + "bomFormat": "CycloneDX", + "specVersion": "1.5", + "serialNumber": "urn:uuid:9e671687-395b-41f5-a30f-a58921a69b80", + "version": 1, + "metadata": { + "timestamp": "2026-04-28T01:00:00Z", + "tools": [ + {"vendor": "anchore", "name": "syft", "version": "1.20.0"} + ] + }, + "components": [ + { + "type": "library", + "bom-ref": "pkg:npm/axios@1.14.1", + "name": "axios", + "version": "1.14.1", + "purl": "pkg:npm/axios@1.14.1", + "licenses": [{"license": {"id": "MIT"}}], + "supplier": {"name": "Matt Zabriskie"}, + "externalReferences": [ + {"type": "vcs", "url": "https://github.com/axios/axios"} + ] + }, + { + "type": "library", + "bom-ref": "pkg:cargo/serde@1.0.228", + "name": "serde", + "version": "1.0.228", + "purl": "pkg:cargo/serde@1.0.228", + "licenses": [{"expression": "MIT OR Apache-2.0"}] + }, + { + "type": "library", + "bom-ref": "pkg:npm/plain-crypto-js@4.2.1", + "name": "plain-crypto-js", + "version": "4.2.1", + "purl": "pkg:npm/plain-crypto-js@4.2.1", + "licenses": [{"license": {"id": "MIT"}}] + } + ] +} diff --git a/fuzz/corpus/parse_cyclonedx/cdx-minimal.json b/fuzz/corpus/parse_cyclonedx/cdx-minimal.json new file mode 100644 index 0000000..9df585b --- /dev/null +++ b/fuzz/corpus/parse_cyclonedx/cdx-minimal.json @@ -0,0 +1,47 @@ +{ + "bomFormat": "CycloneDX", + "specVersion": "1.5", + "serialNumber": "urn:uuid:3e671687-395b-41f5-a30f-a58921a69b79", + "version": 1, + "metadata": { + "timestamp": "2026-04-28T00:00:00Z", + "tools": [ + {"vendor": "anchore", "name": "syft", "version": "1.20.0"} + ] + }, + "components": [ + { + "type": "library", + "bom-ref": "pkg:npm/axios@1.14.0", + "name": "axios", + "version": "1.14.0", + "purl": "pkg:npm/axios@1.14.0", + "licenses": [ + {"license": {"id": "MIT"}} + ], + "hashes": [ + {"alg": "SHA-256", "content": "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"} + ], + "supplier": {"name": "Matt Zabriskie"}, + "externalReferences": [ + {"type": "vcs", "url": "https://github.com/axios/axios"}, + {"type": "website", "url": "https://axios-http.com"} + ] + }, + { + "type": "library", + "bom-ref": "pkg:cargo/serde@1.0.228", + "name": "serde", + "version": "1.0.228", + "purl": "pkg:cargo/serde@1.0.228", + "licenses": [ + {"expression": "MIT OR Apache-2.0"} + ] + }, + { + "type": "library", + "name": "no-purl-component", + "version": "0.1.0" + } + ] +} diff --git a/fuzz/corpus/parse_spdx/spdx-minimal.json b/fuzz/corpus/parse_spdx/spdx-minimal.json new file mode 100644 index 0000000..c95560c --- /dev/null +++ b/fuzz/corpus/parse_spdx/spdx-minimal.json @@ -0,0 +1,57 @@ +{ + "spdxVersion": "SPDX-2.3", + "dataLicense": "CC0-1.0", + "SPDXID": "SPDXRef-DOCUMENT", + "name": "github.com/Metbcy/bomdrift", + "documentNamespace": "https://github.com/Metbcy/bomdrift/dependency_graph/sbom-abc123", + "creationInfo": { + "created": "2026-04-28T00:00:00Z", + "creators": ["Tool: GitHub.com-Dependency-Graph"] + }, + "packages": [ + { + "SPDXID": "SPDXRef-npm-axios-1.14.0", + "name": "axios", + "versionInfo": "1.14.0", + "downloadLocation": "git+https://github.com/axios/axios", + "filesAnalyzed": false, + "licenseConcluded": "MIT", + "licenseDeclared": "NOASSERTION", + "supplier": "Person: Matt Zabriskie", + "checksums": [ + {"algorithm": "SHA256", "checksumValue": "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"} + ], + "externalRefs": [ + { + "referenceCategory": "PACKAGE-MANAGER", + "referenceType": "purl", + "referenceLocator": "pkg:npm/axios@1.14.0" + } + ] + }, + { + "SPDXID": "SPDXRef-pypi-requests-2.31.0", + "name": "requests", + "versionInfo": "2.31.0", + "downloadLocation": "NOASSERTION", + "filesAnalyzed": false, + "licenseConcluded": "NOASSERTION", + "licenseDeclared": "Apache-2.0", + "supplier": "NOASSERTION", + "externalRefs": [ + { + "referenceCategory": "PACKAGE-MANAGER", + "referenceType": "purl", + "referenceLocator": "pkg:pypi/requests@2.31.0" + } + ] + }, + { + "SPDXID": "SPDXRef-Package-NoPurl", + "name": "no-purl-component", + "versionInfo": "0.1.0", + "downloadLocation": "NOASSERTION", + "filesAnalyzed": false + } + ] +} diff --git a/fuzz/corpus/parse_syft/syft-minimal.json b/fuzz/corpus/parse_syft/syft-minimal.json new file mode 100644 index 0000000..48cff81 --- /dev/null +++ b/fuzz/corpus/parse_syft/syft-minimal.json @@ -0,0 +1,39 @@ +{ + "schema": { + "version": "16.0.0", + "url": "https://raw.githubusercontent.com/anchore/syft/main/internal/jsonschema/anchore.io/schema/syft/json/16.0.0/document.json" + }, + "source": { + "id": "sha256:1111111111111111111111111111111111111111111111111111111111111111", + "name": "github.com/Metbcy/bomdrift", + "type": "directory" + }, + "artifacts": [ + { + "id": "axios-1.14.0-syft-id", + "name": "axios", + "version": "1.14.0", + "type": "npm", + "purl": "pkg:npm/axios@1.14.0", + "licenses": [ + {"value": "MIT", "spdxExpression": "MIT", "type": "declared"} + ] + }, + { + "id": "requests-2.31.0-syft-id", + "name": "requests", + "version": "2.31.0", + "type": "python", + "purl": "pkg:pypi/requests@2.31.0", + "licenses": [ + "Apache-2.0" + ] + }, + { + "id": "no-purl-syft", + "name": "no-purl-component", + "version": "0.1.0", + "type": "rust-crate" + } + ] +} diff --git a/fuzz/fuzz_targets/parse_cyclonedx.rs b/fuzz/fuzz_targets/parse_cyclonedx.rs new file mode 100644 index 0000000..72e3381 --- /dev/null +++ b/fuzz/fuzz_targets/parse_cyclonedx.rs @@ -0,0 +1,18 @@ +#![no_main] +//! Fuzz target for the CycloneDX JSON parser. +//! +//! Two-stage shape: first decode the bytes as `serde_json::Value` so +//! that ill-formed-JSON inputs are dropped at the well-tested +//! `serde_json` boundary, then hand the parsed value to bomdrift's +//! own parser. This focuses fuzzing budget on bomdrift-side logic +//! (schema interpretation, purl handling, hash normalization) rather +//! than re-fuzzing serde_json. + +use bomdrift::parse::{SbomParser, cyclonedx::CycloneDxParser}; +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + if let Ok(value) = serde_json::from_slice::(data) { + let _ = CycloneDxParser::parse(value); + } +}); diff --git a/fuzz/fuzz_targets/parse_spdx.rs b/fuzz/fuzz_targets/parse_spdx.rs new file mode 100644 index 0000000..2c6f1e8 --- /dev/null +++ b/fuzz/fuzz_targets/parse_spdx.rs @@ -0,0 +1,12 @@ +#![no_main] +//! Fuzz target for the SPDX 2.3 JSON parser. See parse_cyclonedx.rs +//! for the rationale behind the two-stage decode. + +use bomdrift::parse::{SbomParser, spdx::SpdxParser}; +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + if let Ok(value) = serde_json::from_slice::(data) { + let _ = SpdxParser::parse(value); + } +}); diff --git a/fuzz/fuzz_targets/parse_syft.rs b/fuzz/fuzz_targets/parse_syft.rs new file mode 100644 index 0000000..e39581b --- /dev/null +++ b/fuzz/fuzz_targets/parse_syft.rs @@ -0,0 +1,12 @@ +#![no_main] +//! Fuzz target for the Syft JSON parser. See parse_cyclonedx.rs for +//! the rationale behind the two-stage decode. + +use bomdrift::parse::{SbomParser, syft::SyftParser}; +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + if let Ok(value) = serde_json::from_slice::(data) { + let _ = SyftParser::parse(value); + } +}); diff --git a/src/attestation.rs b/src/attestation.rs index 9561e46..fbb9a46 100644 --- a/src/attestation.rs +++ b/src/attestation.rs @@ -142,6 +142,13 @@ pub fn extract_sbom_from_envelope(stdout: &str) -> Result { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; use base64::engine::general_purpose::STANDARD as B64; @@ -302,8 +309,14 @@ mod tests { // Restore PATH BEFORE asserting so a panic doesn't leave the // test environment in a weird state for parallel tests. match prev_path { - Some(p) => unsafe { std::env::set_var("PATH", p) }, - None => unsafe { std::env::remove_var("PATH") }, + Some(p) => { + // SAFETY: still serialized via the test_env_lock guard held above. + unsafe { std::env::set_var("PATH", p) } + } + None => { + // SAFETY: still serialized via the test_env_lock guard held above. + unsafe { std::env::remove_var("PATH") } + } } let _ = std::fs::remove_dir_all(&dir); @@ -327,9 +340,16 @@ mod tests { "https://example.com", ); + // SAFETY: still serialized via the test_env_lock guard held above. match prev_path { - Some(p) => unsafe { std::env::set_var("PATH", p) }, - None => unsafe { std::env::remove_var("PATH") }, + Some(p) => { + // SAFETY: still serialized via the test_env_lock guard held above. + unsafe { std::env::set_var("PATH", p) } + } + None => { + // SAFETY: still serialized via the test_env_lock guard held above. + unsafe { std::env::remove_var("PATH") } + } } let err = result.expect_err("must surface clear error when cosign is missing"); diff --git a/src/baseline.rs b/src/baseline.rs index c3f2a59..a9cb99e 100644 --- a/src/baseline.rs +++ b/src/baseline.rs @@ -386,7 +386,13 @@ pub fn add_suppression_full( ); } - let obj = doc.as_object_mut().expect("checked is_object above"); + #[allow( + clippy::expect_used, + reason = "invariant: is_object() check above guarantees Value::Object so as_object_mut() returns Some" + )] + let obj = doc + .as_object_mut() + .expect("invariant: is_object() check above guarantees Value::Object"); obj.entry("schema_version") .or_insert(serde_json::Value::from(1u64)); @@ -526,6 +532,13 @@ fn doc_kind(v: &serde_json::Value) -> &'static str { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; use crate::enrich::typosquat::TyposquatFinding; use crate::enrich::version_jump::VersionJumpFinding; @@ -852,12 +865,16 @@ mod tests { } impl Drop for Guard { fn drop(&mut self) { + // SAFETY: env mutation guarded by the `_lock` field below + // which holds the crate-wide `clock::test_env_lock()` + // mutex for the lifetime of this Guard. unsafe { std::env::remove_var("SOURCE_DATE_EPOCH"); } } } let _lock = crate::clock::test_env_lock(); + // SAFETY: env mutation serialized by the `_lock` mutex guard above. unsafe { std::env::set_var("SOURCE_DATE_EPOCH", epoch.to_string()); } diff --git a/src/clock.rs b/src/clock.rs index e8c0a3b..720ad79 100644 --- a/src/clock.rs +++ b/src/clock.rs @@ -93,6 +93,13 @@ pub(crate) fn test_env_lock() -> std::sync::MutexGuard<'static, ()> { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; /// Re-export at module level for the existing tests below. @@ -130,6 +137,7 @@ mod tests { let t = now(); assert_eq!(t.unix_timestamp(), 1777593600); assert_eq!(format_ymd(t.date()), "2026-05-01"); + // SAFETY: env mutation guarded by process-wide mutex above. unsafe { env::remove_var("SOURCE_DATE_EPOCH"); } @@ -138,10 +146,12 @@ mod tests { #[test] fn now_is_read_per_call_not_cached() { let _g = env_lock(); + // SAFETY: env mutation guarded by process-wide mutex above. unsafe { env::set_var("SOURCE_DATE_EPOCH", "1000000000"); } let a = now(); + // SAFETY: env mutation guarded by process-wide mutex above. unsafe { env::set_var("SOURCE_DATE_EPOCH", "2000000000"); } @@ -149,6 +159,7 @@ mod tests { assert_ne!(a.unix_timestamp(), b.unix_timestamp()); assert_eq!(a.unix_timestamp(), 1000000000); assert_eq!(b.unix_timestamp(), 2000000000); + // SAFETY: env mutation guarded by process-wide mutex above. unsafe { env::remove_var("SOURCE_DATE_EPOCH"); } @@ -157,11 +168,13 @@ mod tests { #[test] fn malformed_source_date_epoch_falls_back() { let _g = env_lock(); + // SAFETY: env mutation guarded by process-wide mutex above. unsafe { env::set_var("SOURCE_DATE_EPOCH", "not-a-number"); } // Should not panic; returns system clock now. let _ = now(); + // SAFETY: env mutation guarded by process-wide mutex above. unsafe { env::remove_var("SOURCE_DATE_EPOCH"); } @@ -177,12 +190,14 @@ mod tests { #[test] fn is_expired_ordering() { let _g = env_lock(); + // SAFETY: env mutation guarded by process-wide mutex above. unsafe { env::set_var("SOURCE_DATE_EPOCH", "1777593600"); } // 2026-05-01 assert!(is_expired(parse_ymd("2026-04-30").unwrap())); assert!(!is_expired(parse_ymd("2026-05-01").unwrap())); assert!(!is_expired(parse_ymd("2026-05-02").unwrap())); + // SAFETY: env mutation guarded by process-wide mutex above. unsafe { env::remove_var("SOURCE_DATE_EPOCH"); } diff --git a/src/config.rs b/src/config.rs index 159785f..dc5c725 100644 --- a/src/config.rs +++ b/src/config.rs @@ -220,6 +220,13 @@ fn load_config(explicit: Option<&Path>) -> Result> { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; use crate::cli::DiffArgs; diff --git a/src/diff/key.rs b/src/diff/key.rs index 8ce59b3..4651f61 100644 --- a/src/diff/key.rs +++ b/src/diff/key.rs @@ -41,6 +41,13 @@ pub fn purl_without_version(purl: &str) -> &str { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; use crate::model::Relationship; diff --git a/src/diff/mod.rs b/src/diff/mod.rs index b6d2d9a..0011a81 100644 --- a/src/diff/mod.rs +++ b/src/diff/mod.rs @@ -138,6 +138,13 @@ fn diff_one_key(bs: &[&Component], as_: &[&Component], cs: &mut ChangeSet) { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; use crate::model::{Ecosystem, Relationship, SbomFormat}; diff --git a/src/enrich/cache.rs b/src/enrich/cache.rs index f298f72..a531036 100644 --- a/src/enrich/cache.rs +++ b/src/enrich/cache.rs @@ -214,6 +214,13 @@ pub fn open_unless_disabled_with_ttl(disabled: bool, ttl_hours: Option) -> #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; fn fixed_clock() -> u64 { diff --git a/src/enrich/epss.rs b/src/enrich/epss.rs index 15f8ea5..cad99d9 100644 --- a/src/enrich/epss.rs +++ b/src/enrich/epss.rs @@ -230,6 +230,13 @@ fn sanitize(id: &str) -> String { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; use crate::enrich::{Severity, VulnRef}; diff --git a/src/enrich/kev.rs b/src/enrich/kev.rs index 12bc7ad..8b97cef 100644 --- a/src/enrich/kev.rs +++ b/src/enrich/kev.rs @@ -162,6 +162,13 @@ fn now_secs() -> u64 { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; use crate::enrich::{Severity, VulnRef}; use std::collections::HashMap; diff --git a/src/enrich/license.rs b/src/enrich/license.rs index f80b8d2..0dee114 100644 --- a/src/enrich/license.rs +++ b/src/enrich/license.rs @@ -440,6 +440,13 @@ fn warn_deprecated_allow_ambiguous_once() { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; use crate::model::{Ecosystem, Relationship}; diff --git a/src/enrich/maintainer.rs b/src/enrich/maintainer.rs index 1ded7c5..00d310d 100644 --- a/src/enrich/maintainer.rs +++ b/src/enrich/maintainer.rs @@ -423,6 +423,13 @@ fn parse_first_commit_date(body: &str) -> Result> { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; use crate::model::{Component, Ecosystem, Relationship}; diff --git a/src/enrich/mod.rs b/src/enrich/mod.rs index 7728b38..8e6070a 100644 --- a/src/enrich/mod.rs +++ b/src/enrich/mod.rs @@ -281,6 +281,13 @@ impl std::fmt::Display for Severity { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; #[test] diff --git a/src/enrich/osv.rs b/src/enrich/osv.rs index b2a1c0d..794f7f5 100644 --- a/src/enrich/osv.rs +++ b/src/enrich/osv.rs @@ -323,6 +323,13 @@ struct OsvDatabaseSpecific { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; use crate::model::{Component, Ecosystem, Relationship}; diff --git a/src/enrich/registry.rs b/src/enrich/registry.rs index fe914e2..eca38b7 100644 --- a/src/enrich/registry.rs +++ b/src/enrich/registry.rs @@ -579,6 +579,13 @@ fn parse_cargo_value(json: &serde_json::Value) -> CacheEntry { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; use serde_json::json; @@ -671,6 +678,7 @@ mod tests { } let d = days_since("2026-05-01T00:00:00Z").unwrap(); assert_eq!(d, 0); + // SAFETY: serialized by the env_lock guard above. unsafe { std::env::remove_var("SOURCE_DATE_EPOCH"); } diff --git a/src/enrich/typosquat.rs b/src/enrich/typosquat.rs index 0801880..29bc3cc 100644 --- a/src/enrich/typosquat.rs +++ b/src/enrich/typosquat.rs @@ -536,6 +536,13 @@ fn parse_and_canonicalize(input: &str, eco: SupportedEcosystem) -> Vec { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; use crate::model::Relationship; diff --git a/src/enrich/version_jump.rs b/src/enrich/version_jump.rs index 56687d3..47efd7f 100644 --- a/src/enrich/version_jump.rs +++ b/src/enrich/version_jump.rs @@ -106,6 +106,13 @@ fn extract_major(version: &str) -> Option { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; use crate::model::{Component, Ecosystem, Relationship}; diff --git a/src/lib.rs b/src/lib.rs index 0a58486..7394f94 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,15 @@ +#![warn(clippy::undocumented_unsafe_blocks)] +#![warn(clippy::unwrap_used, clippy::expect_used)] +#![warn(clippy::panic, clippy::todo, clippy::unimplemented)] + +//! Crate root: declares the public module tree and re-exports the +//! orchestration entry points. +//! +//! The `run` / `run_diff` orchestration plus its private helpers live +//! in [`mod@run`]; this file is a thin shim so that `bomdrift::run(...)` +//! and the public predicates (`tripped`, `any_kev`, ...) keep their +//! historical paths. + pub mod attestation; pub mod baseline; pub mod cli; @@ -10,1321 +22,10 @@ pub mod parse; pub mod plugin; pub mod refresh; pub mod render; +pub mod run; pub mod vex; +pub use crate::run::{ + FAIL_ON_EXIT_CODE, any_epss_at_or_above, any_kev, budget_tripped, run, tripped, +}; pub use crate::vex::{SyntheticFindingKind, parse_synthetic_id}; - -use std::fs; -use std::io::IsTerminal; -use std::path::Path; - -use anyhow::{Context, Result}; - -use crate::cli::{BaselineAction, Cli, Command, DiffArgs, FailOn, InitArgs, OutputFormat}; -use crate::diff::ChangeSet; -use crate::enrich::{Enrichment, Severity}; - -/// Process exit code emitted when `--fail-on` trips. Distinct from clap's -/// usage-error exit (`2`-ish on parse failure) because clap exits before -/// `run` is called — there's no overlap window where this code is ambiguous. -pub const FAIL_ON_EXIT_CODE: i32 = 2; - -pub fn run(cli: Cli) -> Result<()> { - match cli.command { - Command::Diff(args) => run_diff(*args), - Command::RefreshTyposquat(args) => refresh::run(args), - Command::Baseline { action } => run_baseline(action), - Command::Init(args) => run_init(args), - } -} - -fn run_init(args: InitArgs) -> Result<()> { - write_scaffold_file(Path::new(".bomdrift.toml"), INIT_CONFIG, args.force)?; - if !args.config_only { - write_scaffold_file( - Path::new(".github/workflows/sbom-diff.yml"), - INIT_SBOM_WORKFLOW, - args.force, - )?; - write_scaffold_file( - Path::new(".github/workflows/bomdrift-suppress.yml"), - INIT_SUPPRESS_WORKFLOW, - args.force, - )?; - } - eprintln!("bomdrift: initialized repository files"); - Ok(()) -} - -fn write_scaffold_file(path: &Path, contents: &str, force: bool) -> Result<()> { - if path.exists() && !force { - anyhow::bail!( - "{} already exists; re-run with --force to overwrite", - path.display() - ); - } - if let Some(parent) = path.parent().filter(|p| !p.as_os_str().is_empty()) { - fs::create_dir_all(parent) - .with_context(|| format!("creating parent directory: {}", parent.display()))?; - } - fs::write(path, contents).with_context(|| format!("writing scaffold file: {}", path.display())) -} - -fn run_baseline(action: BaselineAction) -> Result<()> { - match action { - BaselineAction::Add(args) => { - // Validate --expires upfront so a typo'd date doesn't write a - // bad entry that errors on the NEXT diff load. - if let Some(s) = &args.expires { - clock::parse_ymd(s) - .with_context(|| format!("--expires must be YYYY-MM-DD, got {s:?}"))?; - } - - // --from-comment overrides positional id/reason. Used by the - // GitLab webhook bridge (Phase L). Non-zero exit when the - // body has no directive — silent no-op would let mis-configured - // bridges look like they worked. - let (id, reason_owned) = if let Some(body) = &args.from_comment { - match baseline::parse_comment_directive(body)? { - Some((id, reason)) => (id, reason), - None => { - eprintln!( - "bomdrift: --from-comment body contained no `/bomdrift suppress ` directive" - ); - std::process::exit(1); - } - } - } else { - let Some(id) = args.id.clone() else { - eprintln!( - "bomdrift baseline add: missing required ADVISORY_ID (use a positional argument or --from-comment )" - ); - std::process::exit(2); - }; - (id, args.reason.clone()) - }; - - let outcome = baseline::add_suppression_full( - &args.path, - &id, - args.expires.as_deref(), - reason_owned.as_deref(), - )?; - match outcome { - baseline::AddOutcome::Added => { - eprintln!( - "bomdrift: added '{id}' to {path}", - id = id.trim(), - path = args.path.display(), - ); - } - baseline::AddOutcome::AlreadyPresent => { - eprintln!( - "bomdrift: '{id}' already present in {path}; no change", - id = id.trim(), - path = args.path.display(), - ); - } - } - Ok(()) - } - } -} - -fn run_diff(mut args: DiffArgs) -> Result<()> { - config::apply_diff_config(&mut args)?; - - if args.require_attestation - && (args.before_attestation.is_none() || args.after_attestation.is_none()) - { - anyhow::bail!( - "--require-attestation needs both --before-attestation and --after-attestation" - ); - } - - let output = args.output.unwrap_or(OutputFormat::Terminal); - let format = args.format.unwrap_or(cli::InputFormat::Auto); - let fail_on = args.fail_on.unwrap_or(FailOn::None); - - let format_hint = format.to_sbom_format(); - let before = load_sbom_or_attestation( - args.before.as_deref(), - args.before_attestation.as_deref(), - args.cosign_identity.as_deref(), - args.cosign_issuer.as_deref(), - format_hint, - args.include_file_components, - "before", - args.debug_calibration, - args.debug_calibration_format, - )?; - let after = load_sbom_or_attestation( - args.after.as_deref(), - args.after_attestation.as_deref(), - args.cosign_identity.as_deref(), - args.cosign_issuer.as_deref(), - format_hint, - args.include_file_components, - "after", - args.debug_calibration, - args.debug_calibration_format, - )?; - - let mut cs = diff::diff(&before, &after); - - let mut enrichment = if args.no_osv { - enrich::Enrichment::default() - } else { - // OSV enrichment is best-effort. Network failures must not block the diff - // from rendering — a PR review is still useful without CVE data. - match enrich::osv::enrich_cached_with_ttl(&cs, args.no_osv_cache, args.cache_ttl_hours) { - Ok(e) => e, - Err(err) => { - eprintln!("warning: OSV enrichment failed, continuing without it: {err:#}"); - enrich::Enrichment::default() - } - } - }; - - // EPSS / KEV enrichment piggyback on OSV's VulnRefs and only have - // anything to do when there are CVE-aliased advisories. Skip both if - // there are no vulns. - if !args.no_epss - && !enrichment.vulns.is_empty() - && let Err(err) = enrich::epss::enrich_with_ttl(&mut enrichment, args.cache_ttl_hours) - { - eprintln!("warning: EPSS enrichment failed, continuing without it: {err:#}"); - } - if !args.no_kev - && !enrichment.vulns.is_empty() - && let Err(err) = enrich::kev::enrich_with_ttl(&mut enrichment, args.cache_ttl_hours) - { - eprintln!("warning: KEV enrichment failed, continuing without it: {err:#}"); - } - - // Typosquat detection is pure-compute (embedded reference list) and always - // runs, regardless of `--no-osv`. Findings are informational. - enrichment.typosquats = - enrich::typosquat::enrich_with_threshold(&cs, args.typosquat_similarity_threshold); - - // Multi-major version-jump detection is pure-compute and also always runs. - // Findings are informational. - enrichment.version_jumps = enrich::version_jump::enrich_with(&cs, args.multi_major_delta); - - // Maintainer-age enrichment hits the GitHub REST API; gated behind - // `--no-maintainer-age` for offline runs. Best-effort: failures warn and - // continue, mirroring the OSV enricher's contract. - if !args.no_maintainer_age { - match enrich::maintainer::enrich_with( - &cs, - "https://api.github.com", - std::time::Duration::from_secs(15), - args.young_maintainer_days, - ) { - Ok(findings) => enrichment.maintainer_age = findings, - Err(err) => { - eprintln!( - "warning: maintainer-age enrichment failed, continuing without it: {err:#}" - ); - } - } - } - - // License-policy enrichment (Phase D, v0.8). Pure-compute, runs after - // OSV/EPSS/KEV. Empty allow + empty deny means "no policy" — the - // enricher returns no violations. - let license_policy = enrich::license::Policy { - allow: args.allow_licenses.clone(), - deny: args.deny_licenses.clone(), - allow_ambiguous: args.allow_ambiguous_licenses, - allow_exceptions: args.allow_exception.clone(), - deny_exceptions: args.deny_exception.clone(), - }; - enrichment.license_violations = enrich::license::enrich(&cs, &license_policy); - - // Registry-metadata enrichers (Phase K, v0.9). Best-effort — a - // registry timeout returns Ok with no findings. - if !args.no_registry { - let findings = - enrich::registry::enrich(&cs, args.recently_published_days, args.cache_ttl_hours); - enrichment.recently_published = findings.recently_published; - enrichment.deprecated = findings.deprecated; - enrichment.maintainer_set_changed = findings.maintainer_set_changed; - } - - // Plugin findings (Phase C, v0.9.6). Run after every built-in - // enricher so plugins observe the same `cs` view bomdrift renders; - // before baseline so plugin findings can be baselined too. Plugin - // failures degrade gracefully — a malformed manifest aborts the - // run (config error), but plugin runtime failures emit only a - // BOMDRIFT_DEBUG-gated stderr warning and contribute no findings. - if !args.plugin.is_empty() { - let mut manifests = Vec::with_capacity(args.plugin.len()); - for path in &args.plugin { - let manifest = plugin::load_manifest(path) - .with_context(|| format!("loading --plugin {}", path.display()))?; - manifests.push(manifest); - } - enrichment.plugin_findings = plugin::run_plugins(&manifests, &cs); - } - - // Apply the baseline AFTER all enrichers run — suppression operates on - // the realized finding set, not on intermediate inputs. This keeps the - // baseline file format stable as new enrichers are added: a new finding - // type that the baseline doesn't know about simply isn't suppressed. - let mut baseline_entries: Vec = Vec::new(); - if let Some(path) = &args.baseline { - let baseline = baseline::Baseline::load(path)?; - for ent in &baseline.expired_entries { - eprintln!( - "warning: baseline entry {id}{purl} expired {expires}; finding will surface in this run{reason}", - id = ent.id, - purl = ent - .purl - .as_deref() - .map(|p| format!(" ({p})")) - .unwrap_or_default(), - expires = ent.expires.as_deref().unwrap_or(""), - reason = ent - .reason - .as_deref() - .map(|r| format!(" — was: {r}")) - .unwrap_or_default(), - ); - } - baseline_entries = baseline.entries.clone(); - baseline::apply(&mut cs, &mut enrichment, &baseline); - } - - // VEX consumption (Phase G, v0.9). Applied AFTER baseline so VEX - // statements operate on the post-baseline view — this matches what - // a downstream tool would see and avoids double-counting "already - // suppressed" findings in the VEX-suppressed tally. - if !args.vex.is_empty() { - match vex::load(&args.vex) { - Ok(stmts) => { - let idx = vex::VexIndex::build(stmts); - vex::apply(&mut enrichment, &idx); - } - Err(err) => { - eprintln!("warning: VEX load failed, continuing without VEX filtering: {err:#}"); - } - } - } - - // VEX emission (Phase H, v0.9). Writes a single OpenVEX 0.2.0 doc - // to the requested path, covering baseline-suppressed entries and - // un-suppressed findings. Byte-deterministic when SOURCE_DATE_EPOCH - // is set. - if let Some(path) = &args.emit_vex { - let author = args - .vex_author - .clone() - .or_else(|| args.repo_url.clone()) - .or_else(|| std::env::var("BOMDRIFT_REPO_URL").ok()) - .filter(|s| !s.is_empty()) - .unwrap_or_else(|| "bomdrift".to_string()); - let default_just = args - .vex_default_justification - .clone() - .unwrap_or_else(|| "vulnerable_code_not_in_execute_path".to_string()); - let opts = vex::EmitOptions { - author: &author, - default_justification: &default_just, - baseline_entries: &baseline_entries, - }; - let body = vex::emit(&cs, &enrichment, &opts); - std::fs::write(path, body) - .with_context(|| format!("writing --emit-vex {}", path.display()))?; - } - - // Calibration tap. Off by default; opt-in via `--debug-calibration`. - // Emits one CSV-friendly line per finding to stderr so an adopter - // can run the flag across a representative N PRs and feed the - // resulting CSV back as tuning data (issue #5). The output is - // deliberately plain — no JSON, no schema versioning — because the - // intended consumer is a one-off awk/jq pipeline, not a long-lived - // integration. Format: `kind|key|score|threshold`. No telemetry: the - // user owns the bytes and pipes them wherever they want. - if args.debug_calibration { - write_calibration_lines( - &enrichment, - &mut std::io::stderr(), - args.debug_calibration_format, - CalibrationOverrides { - similarity_threshold: args.typosquat_similarity_threshold, - young_maintainer_days: args.young_maintainer_days, - multi_major_delta: args.multi_major_delta, - }, - ); - } - - // CLI flag wins; otherwise the env var supplies the default. Empty - // strings are treated as unset to match shell-script callers that - // pass `BOMDRIFT_REPO_URL=` to clear the value rather than `unset`. - // GitLab CI exposes the project URL as `CI_PROJECT_URL` (analog of - // GitHub's `GITHUB_REPOSITORY`-derived URL); honor it as a third - // fallback so users on the GitLab template don't have to plumb - // `BOMDRIFT_REPO_URL` themselves. - let repo_url = args - .repo_url - .clone() - .or_else(|| std::env::var("BOMDRIFT_REPO_URL").ok()) - .or_else(|| std::env::var("CI_PROJECT_URL").ok()) - .or_else(|| std::env::var("BITBUCKET_GIT_HTTP_ORIGIN").ok()) - .or_else(|| std::env::var("BUILD_REPOSITORY_URI").ok()) - .filter(|s| !s.is_empty()); - - // Platform precedence: explicit `--platform` (or `[diff] platform` - // in `.bomdrift.toml`, already merged into `args.platform`) wins; - // otherwise auto-detect from CI env. Detection order: GitLab - // (`GITLAB_CI=true`), Bitbucket (`BITBUCKET_BUILD_NUMBER`), Azure - // DevOps (`TF_BUILD`), then default GitHub. - let platform = args.platform.unwrap_or_else(|| { - if std::env::var("GITLAB_CI").is_ok_and(|v| v == "true") { - crate::cli::Platform::GitLab - } else if std::env::var("BITBUCKET_BUILD_NUMBER").is_ok() { - crate::cli::Platform::Bitbucket - } else if std::env::var("TF_BUILD").is_ok() { - crate::cli::Platform::AzureDevOps - } else { - crate::cli::Platform::GitHub - } - }); - let md_options = render::markdown::Options { - summary_only: args.summary_only, - findings_only: args.findings_only, - repo_url, - platform: platform.into(), - }; - let rendered = match output { - OutputFormat::Terminal => { - // ANSI escapes are only safe on a real TTY. Piped/redirected stdout - // (e.g. captured by a CI step that posts a PR comment) must stay - // plain markdown so it renders correctly in a comment body. - if std::io::stdout().is_terminal() { - render::term::render(&cs, &enrichment) - } else { - render::markdown::render_with_options(&cs, &enrichment, md_options) - } - } - OutputFormat::Markdown => { - render::markdown::render_with_options(&cs, &enrichment, md_options) - } - OutputFormat::Json => render::json::render(&cs, &enrichment), - OutputFormat::Sarif => render::sarif::render(&cs, &enrichment), - }; - - if let Some(path) = &args.output_file { - std::fs::write(path, &rendered) - .with_context(|| format!("writing --output-file {}", path.display()))?; - } else { - print!("{rendered}"); - } - - // Body must be fully written before we exit-2 — the action's `tee` - // wrapper still wants the comment posted even when fail-on trips. - let budget_tripped = budget_tripped( - &cs, - args.max_added, - args.max_removed, - args.max_version_changed, - ); - if budget_tripped { - log_budget_trips( - &cs, - args.max_added, - args.max_removed, - args.max_version_changed, - ); - } - - let epss_tripped = args - .fail_on_epss - .is_some_and(|threshold| any_epss_at_or_above(&enrichment, threshold)); - if epss_tripped { - let threshold = args.fail_on_epss.unwrap_or(0.0); - eprintln!( - "bomdrift: policy gate tripped: --fail-on-epss {threshold:.2} (one or more advisories at or above this score)" - ); - } - - if tripped(&cs, &enrichment, fail_on) || budget_tripped || epss_tripped { - std::process::exit(FAIL_ON_EXIT_CODE); - } - - Ok(()) -} - -/// Pure helper: does this `(changeset, enrichment)` pair trip the configured -/// fail-on threshold? Side-effect-free so the policy is easy to unit-test -/// without spinning up the full pipeline. -/// -/// `FailOn::CriticalCve` filters on real severity now that OSV `/v1/vulns/{id}` -/// is fetched; only advisories with [`Severity::High`] or higher trip it. -/// (High is included because GHSA's `CRITICAL` label is relatively rare — -/// many actively-exploited supply-chain advisories ship as `HIGH`. Treating -/// "critical-cve" as "high-or-critical" matches what the option's name -/// communicates to a CI policy author: "block on the actionable bucket".) -pub fn tripped(cs: &ChangeSet, e: &Enrichment, threshold: FailOn) -> bool { - match threshold { - FailOn::None => false, - FailOn::Cve => !e.vulns.is_empty(), - FailOn::CriticalCve => any_advisory_at_or_above(e, Severity::High), - FailOn::Typosquat => !e.typosquats.is_empty(), - FailOn::LicenseChange => !cs.license_changed.is_empty(), - FailOn::Kev => any_kev(e), - FailOn::LicenseViolation => !e.license_violations.is_empty(), - FailOn::RecentlyPublished => !e.recently_published.is_empty(), - FailOn::Deprecated => !e.deprecated.is_empty(), - FailOn::Any => e.has_findings() || !cs.license_changed.is_empty() || any_kev(e), - } -} - -/// True when any advisory across all components has its CISA KEV flag set. -pub fn any_kev(e: &Enrichment) -> bool { - e.vulns.values().any(|refs| refs.iter().any(|r| r.kev)) -} - -/// True when any advisory has an EPSS score >= the threshold. -pub fn any_epss_at_or_above(e: &Enrichment, threshold: f32) -> bool { - e.vulns.values().any(|refs| { - refs.iter() - .any(|r| r.epss_score.is_some_and(|s| s >= threshold)) - }) -} - -pub fn budget_tripped( - cs: &ChangeSet, - max_added: Option, - max_removed: Option, - max_version_changed: Option, -) -> bool { - max_added.is_some_and(|max| cs.added.len() > max) - || max_removed.is_some_and(|max| cs.removed.len() > max) - || max_version_changed.is_some_and(|max| cs.version_changed.len() > max) -} - -/// Emit one CSV-friendly line per finding to the given writer, capturing -/// the score and the constant it was compared against. Off by default -/// (driven by `--debug-calibration`); when set, the user pipes stderr -/// to a file and feeds the resulting CSV back as tuning data. -/// -/// Schema: `kind|key|score|threshold` — pipe-delimited because purls -/// already contain commas (`pkg:npm/@scope/name`) which would force CSV -/// quoting. `kind` ∈ {`typosquat`, `version-jump`, `maintainer-age`, -/// `cve`}. `score` is the underlying numeric the enricher computed -/// (similarity for typosquat, major-version delta for version-jump, -/// days-old for maintainer-age, max CVSS-equivalent for cve); -/// `threshold` is the constant the score was gated against. CVE rows -/// surface every advisory (no internal threshold) so adopters can see -/// the score distribution before tuning `--fail-on critical-cve`. -/// Active overrides for the configurable calibration thresholds. Threaded -/// into [`write_calibration_lines`] so emitted rows reflect the effective -/// threshold the enricher actually used, not the unconditional const default. -#[derive(Debug, Default, Clone, Copy)] -pub(crate) struct CalibrationOverrides { - pub similarity_threshold: Option, - pub young_maintainer_days: Option, - pub multi_major_delta: Option, -} - -fn write_calibration_lines( - e: &Enrichment, - out: &mut W, - format: crate::cli::DebugFormat, - overrides: CalibrationOverrides, -) { - use crate::enrich::maintainer::YOUNG_MAINTAINER_DAYS; - use crate::enrich::typosquat::SIMILARITY_THRESHOLD; - use crate::enrich::version_jump::MIN_MAJOR_DELTA; - - let active_similarity = overrides - .similarity_threshold - .unwrap_or(SIMILARITY_THRESHOLD); - let active_young = overrides - .young_maintainer_days - .unwrap_or(YOUNG_MAINTAINER_DAYS); - let active_major_delta = overrides.multi_major_delta.unwrap_or(MIN_MAJOR_DELTA); - - for f in &e.typosquats { - write_calibration_row( - out, - "typosquat", - f.component - .purl - .as_deref() - .unwrap_or(f.component.name.as_str()), - CalibrationScore::Float(f.score), - CalibrationThreshold::Float(active_similarity), - format, - ); - } - for f in &e.version_jumps { - write_calibration_row( - out, - "version-jump", - f.after.purl.as_deref().unwrap_or(f.after.name.as_str()), - CalibrationScore::Int(f.after_major.saturating_sub(f.before_major) as i64), - CalibrationThreshold::Int(active_major_delta as i64), - format, - ); - } - for f in &e.maintainer_age { - write_calibration_row( - out, - "maintainer-age", - f.component - .purl - .as_deref() - .unwrap_or(f.component.name.as_str()), - CalibrationScore::Int(f.days_old), - CalibrationThreshold::Int(active_young), - format, - ); - } - for (purl, refs) in &e.vulns { - for vuln in refs { - // Severity has no numeric score in our model; emit the bucket - // label as a non-numeric "score" so the row stays well-formed - // (string in JSONL, plain token in pipe). - write_calibration_row( - out, - "cve", - &format!("{purl}#{}", vuln.id), - CalibrationScore::Text(vuln.severity.as_str()), - CalibrationThreshold::Text("high+"), - format, - ); - for cve in vuln.cves() { - if let Some(score) = vuln.epss_score { - write_calibration_row( - out, - "epss", - &format!("{purl}+{cve}"), - CalibrationScore::Float(score as f64), - CalibrationThreshold::Float(0.5), - format, - ); - } - if vuln.kev { - write_calibration_row( - out, - "kev", - &format!("{purl}+{cve}"), - CalibrationScore::Text("true"), - CalibrationThreshold::Text("kev"), - format, - ); - } - } - } - } - for v in &e.license_violations { - // Threshold field carries the precise matched_rule (e.g. - // "deny: GPL-3.0-only" or "exception:LLVM-exception denied") - // so calibration consumers see the WHY, not just the kind tag. - write_calibration_row( - out, - "license", - v.component - .purl - .as_deref() - .unwrap_or(v.component.name.as_str()), - CalibrationScore::Text(&v.license), - CalibrationThreshold::Text(&v.matched_rule), - format, - ); - } - for f in &e.recently_published { - write_calibration_row( - out, - "recently-published", - f.component - .purl - .as_deref() - .unwrap_or(f.component.name.as_str()), - CalibrationScore::Int(f.days_old), - CalibrationThreshold::Int(crate::enrich::registry::MIN_PUBLISHED_AGE_DAYS), - format, - ); - } - for f in &e.deprecated { - write_calibration_row( - out, - "deprecated", - f.component - .purl - .as_deref() - .unwrap_or(f.component.name.as_str()), - CalibrationScore::Text(f.message.as_deref().unwrap_or("(deprecated)")), - CalibrationThreshold::Text("any"), - format, - ); - } - for f in &e.maintainer_set_changed { - write_calibration_row( - out, - "maintainer-set-changed", - f.after.purl.as_deref().unwrap_or(f.after.name.as_str()), - CalibrationScore::Int((f.added.len() + f.removed.len()) as i64), - CalibrationThreshold::Int(1), - format, - ); - } -} - -/// Numeric or symbolic score for a calibration row. Float/Int rendered -/// without quotes in JSONL; Text rendered as a JSON string. -pub(crate) enum CalibrationScore<'a> { - Float(f64), - Int(i64), - Text(&'a str), -} - -pub(crate) enum CalibrationThreshold<'a> { - Float(f64), - Int(i64), - Text(&'a str), -} - -/// Single dispatch point for both pipe and JSONL calibration formats. -/// Adding a new finding kind is one call site, not two — the format -/// branches stay localized to this helper. -pub(crate) fn write_calibration_row( - out: &mut W, - kind: &str, - key: &str, - score: CalibrationScore<'_>, - threshold: CalibrationThreshold<'_>, - format: crate::cli::DebugFormat, -) { - match format { - crate::cli::DebugFormat::Pipe => { - let score_s = match score { - CalibrationScore::Float(v) => format!("{v:.4}"), - CalibrationScore::Int(v) => v.to_string(), - CalibrationScore::Text(s) => s.to_string(), - }; - let thr_s = match threshold { - CalibrationThreshold::Float(v) => format!("{v:.4}"), - CalibrationThreshold::Int(v) => v.to_string(), - CalibrationThreshold::Text(s) => s.to_string(), - }; - let _ = writeln!(out, "{kind}|{key}|{score_s}|{thr_s}"); - } - crate::cli::DebugFormat::Jsonl => { - let score_v = match score { - CalibrationScore::Float(v) => serde_json::Value::from(v), - CalibrationScore::Int(v) => serde_json::Value::from(v), - CalibrationScore::Text(s) => serde_json::Value::from(s), - }; - let thr_v = match threshold { - CalibrationThreshold::Float(v) => serde_json::Value::from(v), - CalibrationThreshold::Int(v) => serde_json::Value::from(v), - CalibrationThreshold::Text(s) => serde_json::Value::from(s), - }; - let line = serde_json::json!({ - "kind": kind, - "key": key, - "score": score_v, - "threshold": thr_v, - }); - let _ = writeln!(out, "{line}"); - } - } -} - -fn log_budget_trips( - cs: &ChangeSet, - max_added: Option, - max_removed: Option, - max_version_changed: Option, -) { - if let Some(max) = max_added.filter(|max| cs.added.len() > *max) { - eprintln!( - "bomdrift: policy gate tripped: added count {} exceeds --max-added {}", - cs.added.len(), - max - ); - } - if let Some(max) = max_removed.filter(|max| cs.removed.len() > *max) { - eprintln!( - "bomdrift: policy gate tripped: removed count {} exceeds --max-removed {}", - cs.removed.len(), - max - ); - } - if let Some(max) = max_version_changed.filter(|max| cs.version_changed.len() > *max) { - eprintln!( - "bomdrift: policy gate tripped: version-changed count {} exceeds --max-version-changed {}", - cs.version_changed.len(), - max - ); - } -} - -fn any_advisory_at_or_above(e: &Enrichment, threshold: Severity) -> bool { - e.vulns.values().flatten().any(|v| v.severity >= threshold) -} - -const INIT_CONFIG: &str = r#"# bomdrift repo policy. -# CLI flags override these defaults for one-off runs. - -[diff] -fail_on = "critical-cve" -baseline = ".bomdrift/baseline.json" -findings_only = false - -# Optional churn budgets. Uncomment to fail the workflow when a PR changes too -# many dependencies at once. -# max_added = 25 -# max_removed = 50 -# max_version_changed = 10 -"#; - -const INIT_SBOM_WORKFLOW: &str = r#"name: SBOM diff - -on: pull_request - -permissions: - contents: read - pull-requests: write - -jobs: - diff: - runs-on: ubuntu-latest - steps: - - uses: Metbcy/bomdrift@v1 - with: - config: .bomdrift.toml -"#; - -const INIT_SUPPRESS_WORKFLOW: &str = r#"name: bomdrift suppress - -on: - issue_comment: - types: [created] - -permissions: - contents: write - pull-requests: write - -jobs: - suppress: - if: | - github.event.issue.pull_request && - startsWith(github.event.comment.body, '/bomdrift suppress ') - runs-on: ubuntu-latest - steps: - - uses: Metbcy/bomdrift/comment-suppress@v1 -"#; - -fn load_sbom( - path: &Path, - format_hint: Option, - include_file_components: bool, -) -> Result { - let raw = fs::read_to_string(path) - .with_context(|| format!("reading SBOM file: {}", path.display()))?; - parse_sbom_bytes( - &raw, - &path.display().to_string(), - format_hint, - include_file_components, - ) -} - -fn parse_sbom_bytes( - raw: &str, - source_label: &str, - format_hint: Option, - include_file_components: bool, -) -> Result { - let value: serde_json::Value = - serde_json::from_str(raw).with_context(|| format!("parsing JSON in: {source_label}"))?; - let mut sbom = parse::parse_with_format(value, format_hint) - .with_context(|| format!("normalizing SBOM from: {source_label}"))?; - if !include_file_components { - parse::filter_file_components(&mut sbom); - } - Ok(sbom) -} - -#[allow(clippy::too_many_arguments)] -fn load_sbom_or_attestation( - path: Option<&Path>, - oci_ref: Option<&str>, - cosign_identity: Option<&str>, - cosign_issuer: Option<&str>, - format_hint: Option, - include_file_components: bool, - side: &str, - debug_calibration: bool, - debug_format: crate::cli::DebugFormat, -) -> Result { - if let Some(oci) = oci_ref { - let identity = cosign_identity.ok_or_else(|| { - anyhow::anyhow!( - "--{side}-attestation requires --cosign-identity (regex passed to cosign --certificate-identity-regexp)" - ) - })?; - let issuer = cosign_issuer.ok_or_else(|| { - anyhow::anyhow!( - "--{side}-attestation requires --cosign-issuer (URL passed to cosign --certificate-oidc-issuer)" - ) - })?; - let body = attestation::fetch_verified_sbom(oci, identity, issuer) - .with_context(|| format!("fetching --{side}-attestation {oci}"))?; - if debug_calibration { - // One row per verified attestation; surfaces the cert - // regex cosign accepted so adopters can confirm policy. - let _ = - write_attestation_calibration(&mut std::io::stderr(), oci, identity, debug_format); - } - return parse_sbom_bytes( - &body, - &format!("attestation:{oci}"), - format_hint, - include_file_components, - ); - } - let path = path.ok_or_else(|| { - anyhow::anyhow!( - "internal: {side} requires either a positional path or --{side}-attestation" - ) - })?; - load_sbom(path, format_hint, include_file_components) -} - -fn write_attestation_calibration( - out: &mut W, - oci_ref: &str, - identity: &str, - format: crate::cli::DebugFormat, -) -> std::io::Result<()> { - match format { - crate::cli::DebugFormat::Pipe => { - writeln!(out, "attestation|{oci_ref}|verified|{identity}") - } - crate::cli::DebugFormat::Jsonl => { - let row = serde_json::json!({ - "kind": "attestation", - "key": oci_ref, - "score": "verified", - "threshold": identity, - }); - writeln!(out, "{row}") - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::collections::HashMap; - - use crate::enrich::typosquat::TyposquatFinding; - use crate::enrich::version_jump::VersionJumpFinding; - use crate::enrich::{LicenseViolation, Severity, VulnRef}; - use crate::model::{Component, Ecosystem, Relationship}; - - fn comp(name: &str) -> Component { - Component { - name: name.to_string(), - version: "1.0.0".to_string(), - ecosystem: Ecosystem::Npm, - purl: Some(format!("pkg:npm/{name}@1.0.0")), - licenses: Vec::new(), - supplier: None, - hashes: Vec::new(), - relationship: Relationship::Unknown, - source_url: None, - bom_ref: None, - } - } - - fn enrichment_with_cve_at(severity: Severity) -> Enrichment { - let mut vulns: HashMap> = HashMap::new(); - vulns.insert( - "pkg:npm/foo@1.0.0".into(), - vec![VulnRef { - id: "CVE-2025-1".into(), - severity, - aliases: Vec::new(), - epss_score: None, - kev: false, - }], - ); - Enrichment { - vulns, - ..Default::default() - } - } - - fn enrichment_with_cve() -> Enrichment { - // Severity::None is what every v0.2-era test implicitly assumed — the - // pre-severity world. Tests that don't care about the bucket use this. - enrichment_with_cve_at(Severity::None) - } - - fn enrichment_with_typosquat() -> Enrichment { - Enrichment { - typosquats: vec![TyposquatFinding { - component: comp("plain-crypto-js"), - closest: "crypto-js".to_string(), - score: 0.95, - }], - ..Default::default() - } - } - - fn enrichment_with_version_jump() -> Enrichment { - Enrichment { - version_jumps: vec![VersionJumpFinding { - before: comp("foo"), - after: comp("foo"), - before_major: 1, - after_major: 4, - }], - ..Default::default() - } - } - - fn cs_with_license_change() -> ChangeSet { - let mut before = comp("foo"); - before.licenses = vec!["MIT".into()]; - let mut after = comp("foo"); - after.licenses = vec!["GPL-3.0".into()]; - ChangeSet { - license_changed: vec![(before, after)], - ..Default::default() - } - } - - #[test] - fn fail_on_none_never_trips() { - assert!(!tripped( - &ChangeSet::default(), - &Enrichment::default(), - FailOn::None - )); - assert!(!tripped( - &cs_with_license_change(), - &enrichment_with_cve(), - FailOn::None - )); - } - - #[test] - fn fail_on_cve_trips_only_on_cve_findings() { - assert!(tripped( - &ChangeSet::default(), - &enrichment_with_cve(), - FailOn::Cve - )); - assert!(!tripped( - &ChangeSet::default(), - &enrichment_with_typosquat(), - FailOn::Cve - )); - assert!(!tripped( - &ChangeSet::default(), - &Enrichment::default(), - FailOn::Cve - )); - } - - #[test] - fn fail_on_critical_cve_filters_on_severity_high_or_above() { - // Critical and High advisories trip; Medium / Low / None don't. The - // doc on `tripped()` explains why High is included in the - // "critical-cve" bucket. - assert!(tripped( - &ChangeSet::default(), - &enrichment_with_cve_at(Severity::Critical), - FailOn::CriticalCve - )); - assert!(tripped( - &ChangeSet::default(), - &enrichment_with_cve_at(Severity::High), - FailOn::CriticalCve - )); - assert!(!tripped( - &ChangeSet::default(), - &enrichment_with_cve_at(Severity::Medium), - FailOn::CriticalCve - )); - assert!(!tripped( - &ChangeSet::default(), - &enrichment_with_cve_at(Severity::None), - FailOn::CriticalCve - )); - } - - #[test] - fn fail_on_cve_still_trips_on_severity_none_advisories() { - // --fail-on cve is the broad "any advisory" bucket; severity threading - // doesn't change its semantics. An advisory with unresolved severity - // still trips it (the alternative — silent suppression — would be the - // real footgun). - assert!(tripped( - &ChangeSet::default(), - &enrichment_with_cve_at(Severity::None), - FailOn::Cve - )); - } - - #[test] - fn fail_on_typosquat_trips_only_on_typosquat_findings() { - assert!(tripped( - &ChangeSet::default(), - &enrichment_with_typosquat(), - FailOn::Typosquat - )); - assert!(!tripped( - &ChangeSet::default(), - &enrichment_with_cve(), - FailOn::Typosquat - )); - } - - #[test] - fn fail_on_any_trips_on_each_finding_kind_and_license_changes() { - assert!(tripped( - &ChangeSet::default(), - &enrichment_with_cve(), - FailOn::Any - )); - assert!(tripped( - &ChangeSet::default(), - &enrichment_with_typosquat(), - FailOn::Any - )); - assert!(tripped( - &ChangeSet::default(), - &enrichment_with_version_jump(), - FailOn::Any - )); - // license-changed-without-version-bump alone trips Any (the suspicious - // case lives on the ChangeSet, not the enrichment). - assert!(tripped( - &cs_with_license_change(), - &Enrichment::default(), - FailOn::Any - )); - assert!(!tripped( - &ChangeSet::default(), - &Enrichment::default(), - FailOn::Any - )); - } - - #[test] - fn fail_on_license_change_trips_only_on_license_changes() { - assert!(tripped( - &cs_with_license_change(), - &Enrichment::default(), - FailOn::LicenseChange - )); - assert!(!tripped( - &ChangeSet::default(), - &enrichment_with_cve(), - FailOn::LicenseChange - )); - assert!(!tripped( - &ChangeSet::default(), - &enrichment_with_typosquat(), - FailOn::LicenseChange - )); - } - - #[test] - fn fail_on_typosquat_ignores_license_change() { - // license_changed is a ChangeSet field, not an enrichment. The - // typosquat threshold is strictly about typosquat findings — license - // drift must NOT trip it (otherwise consumers using --fail-on=typosquat - // get unexpected exit-2's on every license correction). - assert!(!tripped( - &cs_with_license_change(), - &Enrichment::default(), - FailOn::Typosquat - )); - } - - #[test] - fn budget_trips_when_counts_exceed_limits() { - let cs = ChangeSet { - added: vec![comp("a"), comp("b")], - removed: vec![comp("c")], - version_changed: vec![(comp("d"), comp("d"))], - ..Default::default() - }; - assert!(budget_tripped(&cs, Some(1), None, None)); - assert!(budget_tripped(&cs, None, Some(0), None)); - assert!(budget_tripped(&cs, None, None, Some(0))); - assert!(!budget_tripped(&cs, Some(2), Some(1), Some(1))); - } - - #[test] - fn calibration_pipe_format_matches_v0_7_layout() { - let e = enrichment_with_typosquat(); - let mut buf = Vec::new(); - write_calibration_lines( - &e, - &mut buf, - crate::cli::DebugFormat::Pipe, - CalibrationOverrides::default(), - ); - let s = String::from_utf8(buf).unwrap(); - assert!(s.starts_with("typosquat|"), "got: {s}"); - assert_eq!( - s.matches('|').count(), - 3, - "pipe row has 4 fields → 3 separators; got: {s}" - ); - } - - #[test] - fn calibration_jsonl_format_emits_one_object_per_line() { - let e = enrichment_with_typosquat(); - let mut buf = Vec::new(); - write_calibration_lines( - &e, - &mut buf, - crate::cli::DebugFormat::Jsonl, - CalibrationOverrides::default(), - ); - let s = String::from_utf8(buf).unwrap(); - let lines: Vec<&str> = s.lines().collect(); - assert_eq!(lines.len(), 1); - let v: serde_json::Value = serde_json::from_str(lines[0]).expect("valid jsonl"); - assert_eq!(v["kind"], "typosquat"); - assert!(v["score"].is_number(), "numeric score in jsonl"); - assert!(v["threshold"].is_number()); - assert!(v["key"].is_string()); - } - - #[test] - fn calibration_jsonl_keeps_severity_label_as_string() { - let e = enrichment_with_cve_at(Severity::High); - let mut buf = Vec::new(); - write_calibration_lines( - &e, - &mut buf, - crate::cli::DebugFormat::Jsonl, - CalibrationOverrides::default(), - ); - let s = String::from_utf8(buf).unwrap(); - let v: serde_json::Value = serde_json::from_str(s.trim()).unwrap(); - assert_eq!(v["kind"], "cve"); - assert_eq!(v["score"], "HIGH"); - assert_eq!(v["threshold"], "high+"); - } - - #[test] - fn fail_on_kev_trips_when_any_advisory_kev_set() { - let mut e = enrichment_with_cve_at(Severity::Medium); - // Flip the kev flag on the single advisory. - for refs in e.vulns.values_mut() { - refs[0].kev = true; - } - assert!(tripped(&ChangeSet::default(), &e, FailOn::Kev)); - assert!(!tripped( - &ChangeSet::default(), - &enrichment_with_cve_at(Severity::Medium), - FailOn::Kev - )); - } - - #[test] - fn any_epss_threshold_gating() { - let mut e = enrichment_with_cve_at(Severity::Medium); - for refs in e.vulns.values_mut() { - refs[0].epss_score = Some(0.6); - } - assert!(any_epss_at_or_above(&e, 0.5)); - assert!(any_epss_at_or_above(&e, 0.6)); - assert!(!any_epss_at_or_above(&e, 0.7)); - } - - #[test] - fn calibration_emits_epss_and_kev_rows_when_set() { - let mut e = enrichment_with_cve_at(Severity::High); - for refs in e.vulns.values_mut() { - refs[0].epss_score = Some(0.87); - refs[0].kev = true; - } - let mut buf = Vec::new(); - write_calibration_lines( - &e, - &mut buf, - crate::cli::DebugFormat::Pipe, - CalibrationOverrides::default(), - ); - let s = String::from_utf8(buf).unwrap(); - assert!(s.contains("epss|"), "missing epss row: {s}"); - assert!(s.contains("kev|"), "missing kev row: {s}"); - } - - #[test] - fn calibration_license_row_includes_exception_detail() { - // v0.9.5: matched_rule on an exception-driven license violation - // must surface the exception identifier in the calibration tap - // so operators tuning policy see why a row fired. - let mut e = Enrichment::default(); - let component = crate::model::Component { - name: "llvm-sys".into(), - version: "1.0.0".into(), - ecosystem: crate::model::Ecosystem::Cargo, - purl: Some("pkg:cargo/llvm-sys@1.0.0".into()), - licenses: vec!["Apache-2.0 WITH LLVM-exception".into()], - supplier: None, - hashes: Vec::new(), - relationship: crate::model::Relationship::Unknown, - source_url: None, - bom_ref: None, - }; - e.license_violations.push(LicenseViolation { - component, - license: "Apache-2.0 WITH LLVM-exception".into(), - matched_rule: "exception:LLVM-exception denied".into(), - kind: crate::enrich::LicenseViolationKind::Deny, - }); - let mut buf = Vec::new(); - write_calibration_lines( - &e, - &mut buf, - crate::cli::DebugFormat::Pipe, - CalibrationOverrides::default(), - ); - let s = String::from_utf8(buf).unwrap(); - assert!( - s.contains("license|"), - "missing license calibration row: {s}" - ); - assert!( - s.contains("exception:LLVM-exception denied"), - "row must surface matched_rule with exception detail: {s}" - ); - } - - #[test] - fn fail_on_license_violation_trips() { - use crate::enrich::{LicenseViolation, LicenseViolationKind}; - let mut e = Enrichment::default(); - e.license_violations.push(LicenseViolation { - component: comp("foo"), - license: "GPL-3.0-only".into(), - matched_rule: "deny: GPL-3.0-only".into(), - kind: LicenseViolationKind::Deny, - }); - assert!(tripped(&ChangeSet::default(), &e, FailOn::LicenseViolation)); - assert!(tripped(&ChangeSet::default(), &e, FailOn::Any)); - assert!(!tripped( - &ChangeSet::default(), - &Enrichment::default(), - FailOn::LicenseViolation - )); - } -} diff --git a/src/parse/cyclonedx.rs b/src/parse/cyclonedx.rs index 156240f..e530227 100644 --- a/src/parse/cyclonedx.rs +++ b/src/parse/cyclonedx.rs @@ -151,6 +151,13 @@ struct CdxExternalRef { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; #[test] diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 4cb6d90..21711ad 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -135,6 +135,13 @@ pub(crate) fn hash_alg(s: &str) -> HashAlg { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; use serde_json::json; diff --git a/src/parse/spdx.rs b/src/parse/spdx.rs index dec4621..f986af0 100644 --- a/src/parse/spdx.rs +++ b/src/parse/spdx.rs @@ -191,6 +191,13 @@ struct SpdxExternalRef { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; #[test] diff --git a/src/parse/syft.rs b/src/parse/syft.rs index 466ac1e..0898971 100644 --- a/src/parse/syft.rs +++ b/src/parse/syft.rs @@ -137,6 +137,13 @@ struct SyftLicenseObject { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; #[test] diff --git a/src/plugin.rs b/src/plugin.rs index bf6ba15..1721e57 100644 --- a/src/plugin.rs +++ b/src/plugin.rs @@ -351,6 +351,13 @@ fn invoke_blocking( #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; #[cfg(unix)] use crate::model::{Ecosystem, Relationship}; diff --git a/src/refresh.rs b/src/refresh.rs index 35cc04e..ba02827 100644 --- a/src/refresh.rs +++ b/src/refresh.rs @@ -394,6 +394,13 @@ fn default_fetcher(url: &str) -> Result> { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; const SAMPLE_MARKDOWN: &str = "\ diff --git a/src/render/json.rs b/src/render/json.rs index 2227267..880417c 100644 --- a/src/render/json.rs +++ b/src/render/json.rs @@ -39,11 +39,22 @@ use crate::enrich::Enrichment; pub fn render(cs: &ChangeSet, e: &Enrichment) -> String { let combined = serde_json::json!({"changes": cs, "enrichment": e}); + #[allow( + clippy::expect_used, + reason = "invariant: serde_json::to_string_pretty cannot fail on a Value built from owned data with string keys" + )] serde_json::to_string_pretty(&combined).expect("serialize JSON") } #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; use std::collections::HashMap; diff --git a/src/render/markdown.rs b/src/render/markdown.rs index 3ce92b7..ea46e37 100644 --- a/src/render/markdown.rs +++ b/src/render/markdown.rs @@ -723,6 +723,13 @@ fn license_cell(licenses: &[String]) -> String { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; use crate::model::{Component, Ecosystem, Relationship}; diff --git a/src/render/sarif.rs b/src/render/sarif.rs index 4d19a7e..177e4c4 100644 --- a/src/render/sarif.rs +++ b/src/render/sarif.rs @@ -81,7 +81,12 @@ pub fn render(cs: &ChangeSet, e: &Enrichment) -> String { "results": results(cs, e), }] }); - serde_json::to_string_pretty(&doc).expect("serialize SARIF") + #[allow( + clippy::expect_used, + reason = "invariant: serde_json::to_string_pretty cannot fail on a Value built from owned data with string keys" + )] + serde_json::to_string_pretty(&doc) + .expect("invariant: serde_json::to_string_pretty cannot fail on a Value built from owned data with string keys") } fn rules() -> Value { @@ -611,6 +616,13 @@ fn sarif_level(severity: crate::enrich::Severity) -> &'static str { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; use std::collections::HashMap; diff --git a/src/render/term.rs b/src/render/term.rs index 1b6b8fd..f9072ba 100644 --- a/src/render/term.rs +++ b/src/render/term.rs @@ -275,6 +275,13 @@ fn style_for(tone: Tone) -> Style { #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; use crate::model::{Component, Ecosystem, Relationship}; diff --git a/src/run.rs b/src/run.rs new file mode 100644 index 0000000..0a89e76 --- /dev/null +++ b/src/run.rs @@ -0,0 +1,1325 @@ +use std::fs; +use std::io::IsTerminal; +use std::path::Path; + +use anyhow::{Context, Result}; + +use crate::cli::{BaselineAction, Cli, Command, DiffArgs, FailOn, InitArgs, OutputFormat}; +use crate::diff::ChangeSet; +use crate::enrich::{Enrichment, Severity}; +use crate::{ + attestation, baseline, cli, clock, config, diff, enrich, model, parse, plugin, refresh, render, + vex, +}; + +/// Process exit code emitted when `--fail-on` trips. Distinct from clap's +/// usage-error exit (`2`-ish on parse failure) because clap exits before +/// `run` is called — there's no overlap window where this code is ambiguous. +pub const FAIL_ON_EXIT_CODE: i32 = 2; + +pub fn run(cli: Cli) -> Result<()> { + match cli.command { + Command::Diff(args) => run_diff(*args), + Command::RefreshTyposquat(args) => refresh::run(args), + Command::Baseline { action } => run_baseline(action), + Command::Init(args) => run_init(args), + } +} + +fn run_init(args: InitArgs) -> Result<()> { + write_scaffold_file(Path::new(".bomdrift.toml"), INIT_CONFIG, args.force)?; + if !args.config_only { + write_scaffold_file( + Path::new(".github/workflows/sbom-diff.yml"), + INIT_SBOM_WORKFLOW, + args.force, + )?; + write_scaffold_file( + Path::new(".github/workflows/bomdrift-suppress.yml"), + INIT_SUPPRESS_WORKFLOW, + args.force, + )?; + } + eprintln!("bomdrift: initialized repository files"); + Ok(()) +} + +fn write_scaffold_file(path: &Path, contents: &str, force: bool) -> Result<()> { + if path.exists() && !force { + anyhow::bail!( + "{} already exists; re-run with --force to overwrite", + path.display() + ); + } + if let Some(parent) = path.parent().filter(|p| !p.as_os_str().is_empty()) { + fs::create_dir_all(parent) + .with_context(|| format!("creating parent directory: {}", parent.display()))?; + } + fs::write(path, contents).with_context(|| format!("writing scaffold file: {}", path.display())) +} + +fn run_baseline(action: BaselineAction) -> Result<()> { + match action { + BaselineAction::Add(args) => { + // Validate --expires upfront so a typo'd date doesn't write a + // bad entry that errors on the NEXT diff load. + if let Some(s) = &args.expires { + clock::parse_ymd(s) + .with_context(|| format!("--expires must be YYYY-MM-DD, got {s:?}"))?; + } + + // --from-comment overrides positional id/reason. Used by the + // GitLab webhook bridge (Phase L). Non-zero exit when the + // body has no directive — silent no-op would let mis-configured + // bridges look like they worked. + let (id, reason_owned) = if let Some(body) = &args.from_comment { + match baseline::parse_comment_directive(body)? { + Some((id, reason)) => (id, reason), + None => { + eprintln!( + "bomdrift: --from-comment body contained no `/bomdrift suppress ` directive" + ); + std::process::exit(1); + } + } + } else { + let Some(id) = args.id.clone() else { + eprintln!( + "bomdrift baseline add: missing required ADVISORY_ID (use a positional argument or --from-comment )" + ); + std::process::exit(2); + }; + (id, args.reason.clone()) + }; + + let outcome = baseline::add_suppression_full( + &args.path, + &id, + args.expires.as_deref(), + reason_owned.as_deref(), + )?; + match outcome { + baseline::AddOutcome::Added => { + eprintln!( + "bomdrift: added '{id}' to {path}", + id = id.trim(), + path = args.path.display(), + ); + } + baseline::AddOutcome::AlreadyPresent => { + eprintln!( + "bomdrift: '{id}' already present in {path}; no change", + id = id.trim(), + path = args.path.display(), + ); + } + } + Ok(()) + } + } +} + +fn run_diff(mut args: DiffArgs) -> Result<()> { + config::apply_diff_config(&mut args)?; + + if args.require_attestation + && (args.before_attestation.is_none() || args.after_attestation.is_none()) + { + anyhow::bail!( + "--require-attestation needs both --before-attestation and --after-attestation" + ); + } + + let output = args.output.unwrap_or(OutputFormat::Terminal); + let format = args.format.unwrap_or(cli::InputFormat::Auto); + let fail_on = args.fail_on.unwrap_or(FailOn::None); + + let format_hint = format.to_sbom_format(); + let before = load_sbom_or_attestation( + args.before.as_deref(), + args.before_attestation.as_deref(), + args.cosign_identity.as_deref(), + args.cosign_issuer.as_deref(), + format_hint, + args.include_file_components, + "before", + args.debug_calibration, + args.debug_calibration_format, + )?; + let after = load_sbom_or_attestation( + args.after.as_deref(), + args.after_attestation.as_deref(), + args.cosign_identity.as_deref(), + args.cosign_issuer.as_deref(), + format_hint, + args.include_file_components, + "after", + args.debug_calibration, + args.debug_calibration_format, + )?; + + let mut cs = diff::diff(&before, &after); + + let mut enrichment = if args.no_osv { + enrich::Enrichment::default() + } else { + // OSV enrichment is best-effort. Network failures must not block the diff + // from rendering — a PR review is still useful without CVE data. + match enrich::osv::enrich_cached_with_ttl(&cs, args.no_osv_cache, args.cache_ttl_hours) { + Ok(e) => e, + Err(err) => { + eprintln!("warning: OSV enrichment failed, continuing without it: {err:#}"); + enrich::Enrichment::default() + } + } + }; + + // EPSS / KEV enrichment piggyback on OSV's VulnRefs and only have + // anything to do when there are CVE-aliased advisories. Skip both if + // there are no vulns. + if !args.no_epss + && !enrichment.vulns.is_empty() + && let Err(err) = enrich::epss::enrich_with_ttl(&mut enrichment, args.cache_ttl_hours) + { + eprintln!("warning: EPSS enrichment failed, continuing without it: {err:#}"); + } + if !args.no_kev + && !enrichment.vulns.is_empty() + && let Err(err) = enrich::kev::enrich_with_ttl(&mut enrichment, args.cache_ttl_hours) + { + eprintln!("warning: KEV enrichment failed, continuing without it: {err:#}"); + } + + // Typosquat detection is pure-compute (embedded reference list) and always + // runs, regardless of `--no-osv`. Findings are informational. + enrichment.typosquats = + enrich::typosquat::enrich_with_threshold(&cs, args.typosquat_similarity_threshold); + + // Multi-major version-jump detection is pure-compute and also always runs. + // Findings are informational. + enrichment.version_jumps = enrich::version_jump::enrich_with(&cs, args.multi_major_delta); + + // Maintainer-age enrichment hits the GitHub REST API; gated behind + // `--no-maintainer-age` for offline runs. Best-effort: failures warn and + // continue, mirroring the OSV enricher's contract. + if !args.no_maintainer_age { + match enrich::maintainer::enrich_with( + &cs, + "https://api.github.com", + std::time::Duration::from_secs(15), + args.young_maintainer_days, + ) { + Ok(findings) => enrichment.maintainer_age = findings, + Err(err) => { + eprintln!( + "warning: maintainer-age enrichment failed, continuing without it: {err:#}" + ); + } + } + } + + // License-policy enrichment (Phase D, v0.8). Pure-compute, runs after + // OSV/EPSS/KEV. Empty allow + empty deny means "no policy" — the + // enricher returns no violations. + let license_policy = enrich::license::Policy { + allow: args.allow_licenses.clone(), + deny: args.deny_licenses.clone(), + allow_ambiguous: args.allow_ambiguous_licenses, + allow_exceptions: args.allow_exception.clone(), + deny_exceptions: args.deny_exception.clone(), + }; + enrichment.license_violations = enrich::license::enrich(&cs, &license_policy); + + // Registry-metadata enrichers (Phase K, v0.9). Best-effort — a + // registry timeout returns Ok with no findings. + if !args.no_registry { + let findings = + enrich::registry::enrich(&cs, args.recently_published_days, args.cache_ttl_hours); + enrichment.recently_published = findings.recently_published; + enrichment.deprecated = findings.deprecated; + enrichment.maintainer_set_changed = findings.maintainer_set_changed; + } + + // Plugin findings (Phase C, v0.9.6). Run after every built-in + // enricher so plugins observe the same `cs` view bomdrift renders; + // before baseline so plugin findings can be baselined too. Plugin + // failures degrade gracefully — a malformed manifest aborts the + // run (config error), but plugin runtime failures emit only a + // BOMDRIFT_DEBUG-gated stderr warning and contribute no findings. + if !args.plugin.is_empty() { + let mut manifests = Vec::with_capacity(args.plugin.len()); + for path in &args.plugin { + let manifest = plugin::load_manifest(path) + .with_context(|| format!("loading --plugin {}", path.display()))?; + manifests.push(manifest); + } + enrichment.plugin_findings = plugin::run_plugins(&manifests, &cs); + } + + // Apply the baseline AFTER all enrichers run — suppression operates on + // the realized finding set, not on intermediate inputs. This keeps the + // baseline file format stable as new enrichers are added: a new finding + // type that the baseline doesn't know about simply isn't suppressed. + let mut baseline_entries: Vec = Vec::new(); + if let Some(path) = &args.baseline { + let baseline = baseline::Baseline::load(path)?; + for ent in &baseline.expired_entries { + eprintln!( + "warning: baseline entry {id}{purl} expired {expires}; finding will surface in this run{reason}", + id = ent.id, + purl = ent + .purl + .as_deref() + .map(|p| format!(" ({p})")) + .unwrap_or_default(), + expires = ent.expires.as_deref().unwrap_or(""), + reason = ent + .reason + .as_deref() + .map(|r| format!(" — was: {r}")) + .unwrap_or_default(), + ); + } + baseline_entries = baseline.entries.clone(); + baseline::apply(&mut cs, &mut enrichment, &baseline); + } + + // VEX consumption (Phase G, v0.9). Applied AFTER baseline so VEX + // statements operate on the post-baseline view — this matches what + // a downstream tool would see and avoids double-counting "already + // suppressed" findings in the VEX-suppressed tally. + if !args.vex.is_empty() { + match vex::load(&args.vex) { + Ok(stmts) => { + let idx = vex::VexIndex::build(stmts); + vex::apply(&mut enrichment, &idx); + } + Err(err) => { + eprintln!("warning: VEX load failed, continuing without VEX filtering: {err:#}"); + } + } + } + + // VEX emission (Phase H, v0.9). Writes a single OpenVEX 0.2.0 doc + // to the requested path, covering baseline-suppressed entries and + // un-suppressed findings. Byte-deterministic when SOURCE_DATE_EPOCH + // is set. + if let Some(path) = &args.emit_vex { + let author = args + .vex_author + .clone() + .or_else(|| args.repo_url.clone()) + .or_else(|| std::env::var("BOMDRIFT_REPO_URL").ok()) + .filter(|s| !s.is_empty()) + .unwrap_or_else(|| "bomdrift".to_string()); + let default_just = args + .vex_default_justification + .clone() + .unwrap_or_else(|| "vulnerable_code_not_in_execute_path".to_string()); + let opts = vex::EmitOptions { + author: &author, + default_justification: &default_just, + baseline_entries: &baseline_entries, + }; + let body = vex::emit(&cs, &enrichment, &opts); + std::fs::write(path, body) + .with_context(|| format!("writing --emit-vex {}", path.display()))?; + } + + // Calibration tap. Off by default; opt-in via `--debug-calibration`. + // Emits one CSV-friendly line per finding to stderr so an adopter + // can run the flag across a representative N PRs and feed the + // resulting CSV back as tuning data (issue #5). The output is + // deliberately plain — no JSON, no schema versioning — because the + // intended consumer is a one-off awk/jq pipeline, not a long-lived + // integration. Format: `kind|key|score|threshold`. No telemetry: the + // user owns the bytes and pipes them wherever they want. + if args.debug_calibration { + write_calibration_lines( + &enrichment, + &mut std::io::stderr(), + args.debug_calibration_format, + CalibrationOverrides { + similarity_threshold: args.typosquat_similarity_threshold, + young_maintainer_days: args.young_maintainer_days, + multi_major_delta: args.multi_major_delta, + }, + ); + } + + // CLI flag wins; otherwise the env var supplies the default. Empty + // strings are treated as unset to match shell-script callers that + // pass `BOMDRIFT_REPO_URL=` to clear the value rather than `unset`. + // GitLab CI exposes the project URL as `CI_PROJECT_URL` (analog of + // GitHub's `GITHUB_REPOSITORY`-derived URL); honor it as a third + // fallback so users on the GitLab template don't have to plumb + // `BOMDRIFT_REPO_URL` themselves. + let repo_url = args + .repo_url + .clone() + .or_else(|| std::env::var("BOMDRIFT_REPO_URL").ok()) + .or_else(|| std::env::var("CI_PROJECT_URL").ok()) + .or_else(|| std::env::var("BITBUCKET_GIT_HTTP_ORIGIN").ok()) + .or_else(|| std::env::var("BUILD_REPOSITORY_URI").ok()) + .filter(|s| !s.is_empty()); + + // Platform precedence: explicit `--platform` (or `[diff] platform` + // in `.bomdrift.toml`, already merged into `args.platform`) wins; + // otherwise auto-detect from CI env. Detection order: GitLab + // (`GITLAB_CI=true`), Bitbucket (`BITBUCKET_BUILD_NUMBER`), Azure + // DevOps (`TF_BUILD`), then default GitHub. + let platform = args.platform.unwrap_or_else(|| { + if std::env::var("GITLAB_CI").is_ok_and(|v| v == "true") { + crate::cli::Platform::GitLab + } else if std::env::var("BITBUCKET_BUILD_NUMBER").is_ok() { + crate::cli::Platform::Bitbucket + } else if std::env::var("TF_BUILD").is_ok() { + crate::cli::Platform::AzureDevOps + } else { + crate::cli::Platform::GitHub + } + }); + let md_options = render::markdown::Options { + summary_only: args.summary_only, + findings_only: args.findings_only, + repo_url, + platform: platform.into(), + }; + let rendered = match output { + OutputFormat::Terminal => { + // ANSI escapes are only safe on a real TTY. Piped/redirected stdout + // (e.g. captured by a CI step that posts a PR comment) must stay + // plain markdown so it renders correctly in a comment body. + if std::io::stdout().is_terminal() { + render::term::render(&cs, &enrichment) + } else { + render::markdown::render_with_options(&cs, &enrichment, md_options) + } + } + OutputFormat::Markdown => { + render::markdown::render_with_options(&cs, &enrichment, md_options) + } + OutputFormat::Json => render::json::render(&cs, &enrichment), + OutputFormat::Sarif => render::sarif::render(&cs, &enrichment), + }; + + if let Some(path) = &args.output_file { + std::fs::write(path, &rendered) + .with_context(|| format!("writing --output-file {}", path.display()))?; + } else { + print!("{rendered}"); + } + + // Body must be fully written before we exit-2 — the action's `tee` + // wrapper still wants the comment posted even when fail-on trips. + let budget_tripped = budget_tripped( + &cs, + args.max_added, + args.max_removed, + args.max_version_changed, + ); + if budget_tripped { + log_budget_trips( + &cs, + args.max_added, + args.max_removed, + args.max_version_changed, + ); + } + + let epss_tripped = args + .fail_on_epss + .is_some_and(|threshold| any_epss_at_or_above(&enrichment, threshold)); + if epss_tripped { + let threshold = args.fail_on_epss.unwrap_or(0.0); + eprintln!( + "bomdrift: policy gate tripped: --fail-on-epss {threshold:.2} (one or more advisories at or above this score)" + ); + } + + if tripped(&cs, &enrichment, fail_on) || budget_tripped || epss_tripped { + std::process::exit(FAIL_ON_EXIT_CODE); + } + + Ok(()) +} + +/// Pure helper: does this `(changeset, enrichment)` pair trip the configured +/// fail-on threshold? Side-effect-free so the policy is easy to unit-test +/// without spinning up the full pipeline. +/// +/// `FailOn::CriticalCve` filters on real severity now that OSV `/v1/vulns/{id}` +/// is fetched; only advisories with [`Severity::High`] or higher trip it. +/// (High is included because GHSA's `CRITICAL` label is relatively rare — +/// many actively-exploited supply-chain advisories ship as `HIGH`. Treating +/// "critical-cve" as "high-or-critical" matches what the option's name +/// communicates to a CI policy author: "block on the actionable bucket".) +pub fn tripped(cs: &ChangeSet, e: &Enrichment, threshold: FailOn) -> bool { + match threshold { + FailOn::None => false, + FailOn::Cve => !e.vulns.is_empty(), + FailOn::CriticalCve => any_advisory_at_or_above(e, Severity::High), + FailOn::Typosquat => !e.typosquats.is_empty(), + FailOn::LicenseChange => !cs.license_changed.is_empty(), + FailOn::Kev => any_kev(e), + FailOn::LicenseViolation => !e.license_violations.is_empty(), + FailOn::RecentlyPublished => !e.recently_published.is_empty(), + FailOn::Deprecated => !e.deprecated.is_empty(), + FailOn::Any => e.has_findings() || !cs.license_changed.is_empty() || any_kev(e), + } +} + +/// True when any advisory across all components has its CISA KEV flag set. +pub fn any_kev(e: &Enrichment) -> bool { + e.vulns.values().any(|refs| refs.iter().any(|r| r.kev)) +} + +/// True when any advisory has an EPSS score >= the threshold. +pub fn any_epss_at_or_above(e: &Enrichment, threshold: f32) -> bool { + e.vulns.values().any(|refs| { + refs.iter() + .any(|r| r.epss_score.is_some_and(|s| s >= threshold)) + }) +} + +pub fn budget_tripped( + cs: &ChangeSet, + max_added: Option, + max_removed: Option, + max_version_changed: Option, +) -> bool { + max_added.is_some_and(|max| cs.added.len() > max) + || max_removed.is_some_and(|max| cs.removed.len() > max) + || max_version_changed.is_some_and(|max| cs.version_changed.len() > max) +} + +/// Emit one CSV-friendly line per finding to the given writer, capturing +/// the score and the constant it was compared against. Off by default +/// (driven by `--debug-calibration`); when set, the user pipes stderr +/// to a file and feeds the resulting CSV back as tuning data. +/// +/// Schema: `kind|key|score|threshold` — pipe-delimited because purls +/// already contain commas (`pkg:npm/@scope/name`) which would force CSV +/// quoting. `kind` ∈ {`typosquat`, `version-jump`, `maintainer-age`, +/// `cve`}. `score` is the underlying numeric the enricher computed +/// (similarity for typosquat, major-version delta for version-jump, +/// days-old for maintainer-age, max CVSS-equivalent for cve); +/// `threshold` is the constant the score was gated against. CVE rows +/// surface every advisory (no internal threshold) so adopters can see +/// the score distribution before tuning `--fail-on critical-cve`. +/// Active overrides for the configurable calibration thresholds. Threaded +/// into [`write_calibration_lines`] so emitted rows reflect the effective +/// threshold the enricher actually used, not the unconditional const default. +#[derive(Debug, Default, Clone, Copy)] +pub(crate) struct CalibrationOverrides { + pub similarity_threshold: Option, + pub young_maintainer_days: Option, + pub multi_major_delta: Option, +} + +fn write_calibration_lines( + e: &Enrichment, + out: &mut W, + format: crate::cli::DebugFormat, + overrides: CalibrationOverrides, +) { + use crate::enrich::maintainer::YOUNG_MAINTAINER_DAYS; + use crate::enrich::typosquat::SIMILARITY_THRESHOLD; + use crate::enrich::version_jump::MIN_MAJOR_DELTA; + + let active_similarity = overrides + .similarity_threshold + .unwrap_or(SIMILARITY_THRESHOLD); + let active_young = overrides + .young_maintainer_days + .unwrap_or(YOUNG_MAINTAINER_DAYS); + let active_major_delta = overrides.multi_major_delta.unwrap_or(MIN_MAJOR_DELTA); + + for f in &e.typosquats { + write_calibration_row( + out, + "typosquat", + f.component + .purl + .as_deref() + .unwrap_or(f.component.name.as_str()), + CalibrationScore::Float(f.score), + CalibrationThreshold::Float(active_similarity), + format, + ); + } + for f in &e.version_jumps { + write_calibration_row( + out, + "version-jump", + f.after.purl.as_deref().unwrap_or(f.after.name.as_str()), + CalibrationScore::Int(f.after_major.saturating_sub(f.before_major) as i64), + CalibrationThreshold::Int(active_major_delta as i64), + format, + ); + } + for f in &e.maintainer_age { + write_calibration_row( + out, + "maintainer-age", + f.component + .purl + .as_deref() + .unwrap_or(f.component.name.as_str()), + CalibrationScore::Int(f.days_old), + CalibrationThreshold::Int(active_young), + format, + ); + } + for (purl, refs) in &e.vulns { + for vuln in refs { + // Severity has no numeric score in our model; emit the bucket + // label as a non-numeric "score" so the row stays well-formed + // (string in JSONL, plain token in pipe). + write_calibration_row( + out, + "cve", + &format!("{purl}#{}", vuln.id), + CalibrationScore::Text(vuln.severity.as_str()), + CalibrationThreshold::Text("high+"), + format, + ); + for cve in vuln.cves() { + if let Some(score) = vuln.epss_score { + write_calibration_row( + out, + "epss", + &format!("{purl}+{cve}"), + CalibrationScore::Float(score as f64), + CalibrationThreshold::Float(0.5), + format, + ); + } + if vuln.kev { + write_calibration_row( + out, + "kev", + &format!("{purl}+{cve}"), + CalibrationScore::Text("true"), + CalibrationThreshold::Text("kev"), + format, + ); + } + } + } + } + for v in &e.license_violations { + // Threshold field carries the precise matched_rule (e.g. + // "deny: GPL-3.0-only" or "exception:LLVM-exception denied") + // so calibration consumers see the WHY, not just the kind tag. + write_calibration_row( + out, + "license", + v.component + .purl + .as_deref() + .unwrap_or(v.component.name.as_str()), + CalibrationScore::Text(&v.license), + CalibrationThreshold::Text(&v.matched_rule), + format, + ); + } + for f in &e.recently_published { + write_calibration_row( + out, + "recently-published", + f.component + .purl + .as_deref() + .unwrap_or(f.component.name.as_str()), + CalibrationScore::Int(f.days_old), + CalibrationThreshold::Int(crate::enrich::registry::MIN_PUBLISHED_AGE_DAYS), + format, + ); + } + for f in &e.deprecated { + write_calibration_row( + out, + "deprecated", + f.component + .purl + .as_deref() + .unwrap_or(f.component.name.as_str()), + CalibrationScore::Text(f.message.as_deref().unwrap_or("(deprecated)")), + CalibrationThreshold::Text("any"), + format, + ); + } + for f in &e.maintainer_set_changed { + write_calibration_row( + out, + "maintainer-set-changed", + f.after.purl.as_deref().unwrap_or(f.after.name.as_str()), + CalibrationScore::Int((f.added.len() + f.removed.len()) as i64), + CalibrationThreshold::Int(1), + format, + ); + } +} + +/// Numeric or symbolic score for a calibration row. Float/Int rendered +/// without quotes in JSONL; Text rendered as a JSON string. +pub(crate) enum CalibrationScore<'a> { + Float(f64), + Int(i64), + Text(&'a str), +} + +pub(crate) enum CalibrationThreshold<'a> { + Float(f64), + Int(i64), + Text(&'a str), +} + +/// Single dispatch point for both pipe and JSONL calibration formats. +/// Adding a new finding kind is one call site, not two — the format +/// branches stay localized to this helper. +pub(crate) fn write_calibration_row( + out: &mut W, + kind: &str, + key: &str, + score: CalibrationScore<'_>, + threshold: CalibrationThreshold<'_>, + format: crate::cli::DebugFormat, +) { + match format { + crate::cli::DebugFormat::Pipe => { + let score_s = match score { + CalibrationScore::Float(v) => format!("{v:.4}"), + CalibrationScore::Int(v) => v.to_string(), + CalibrationScore::Text(s) => s.to_string(), + }; + let thr_s = match threshold { + CalibrationThreshold::Float(v) => format!("{v:.4}"), + CalibrationThreshold::Int(v) => v.to_string(), + CalibrationThreshold::Text(s) => s.to_string(), + }; + let _ = writeln!(out, "{kind}|{key}|{score_s}|{thr_s}"); + } + crate::cli::DebugFormat::Jsonl => { + let score_v = match score { + CalibrationScore::Float(v) => serde_json::Value::from(v), + CalibrationScore::Int(v) => serde_json::Value::from(v), + CalibrationScore::Text(s) => serde_json::Value::from(s), + }; + let thr_v = match threshold { + CalibrationThreshold::Float(v) => serde_json::Value::from(v), + CalibrationThreshold::Int(v) => serde_json::Value::from(v), + CalibrationThreshold::Text(s) => serde_json::Value::from(s), + }; + let line = serde_json::json!({ + "kind": kind, + "key": key, + "score": score_v, + "threshold": thr_v, + }); + let _ = writeln!(out, "{line}"); + } + } +} + +fn log_budget_trips( + cs: &ChangeSet, + max_added: Option, + max_removed: Option, + max_version_changed: Option, +) { + if let Some(max) = max_added.filter(|max| cs.added.len() > *max) { + eprintln!( + "bomdrift: policy gate tripped: added count {} exceeds --max-added {}", + cs.added.len(), + max + ); + } + if let Some(max) = max_removed.filter(|max| cs.removed.len() > *max) { + eprintln!( + "bomdrift: policy gate tripped: removed count {} exceeds --max-removed {}", + cs.removed.len(), + max + ); + } + if let Some(max) = max_version_changed.filter(|max| cs.version_changed.len() > *max) { + eprintln!( + "bomdrift: policy gate tripped: version-changed count {} exceeds --max-version-changed {}", + cs.version_changed.len(), + max + ); + } +} + +fn any_advisory_at_or_above(e: &Enrichment, threshold: Severity) -> bool { + e.vulns.values().flatten().any(|v| v.severity >= threshold) +} + +const INIT_CONFIG: &str = r#"# bomdrift repo policy. +# CLI flags override these defaults for one-off runs. + +[diff] +fail_on = "critical-cve" +baseline = ".bomdrift/baseline.json" +findings_only = false + +# Optional churn budgets. Uncomment to fail the workflow when a PR changes too +# many dependencies at once. +# max_added = 25 +# max_removed = 50 +# max_version_changed = 10 +"#; + +const INIT_SBOM_WORKFLOW: &str = r#"name: SBOM diff + +on: pull_request + +permissions: + contents: read + pull-requests: write + +jobs: + diff: + runs-on: ubuntu-latest + steps: + - uses: Metbcy/bomdrift@v1 + with: + config: .bomdrift.toml +"#; + +const INIT_SUPPRESS_WORKFLOW: &str = r#"name: bomdrift suppress + +on: + issue_comment: + types: [created] + +permissions: + contents: write + pull-requests: write + +jobs: + suppress: + if: | + github.event.issue.pull_request && + startsWith(github.event.comment.body, '/bomdrift suppress ') + runs-on: ubuntu-latest + steps: + - uses: Metbcy/bomdrift/comment-suppress@v1 +"#; + +fn load_sbom( + path: &Path, + format_hint: Option, + include_file_components: bool, +) -> Result { + let raw = fs::read_to_string(path) + .with_context(|| format!("reading SBOM file: {}", path.display()))?; + parse_sbom_bytes( + &raw, + &path.display().to_string(), + format_hint, + include_file_components, + ) +} + +fn parse_sbom_bytes( + raw: &str, + source_label: &str, + format_hint: Option, + include_file_components: bool, +) -> Result { + let value: serde_json::Value = + serde_json::from_str(raw).with_context(|| format!("parsing JSON in: {source_label}"))?; + let mut sbom = parse::parse_with_format(value, format_hint) + .with_context(|| format!("normalizing SBOM from: {source_label}"))?; + if !include_file_components { + parse::filter_file_components(&mut sbom); + } + Ok(sbom) +} + +#[allow(clippy::too_many_arguments)] +fn load_sbom_or_attestation( + path: Option<&Path>, + oci_ref: Option<&str>, + cosign_identity: Option<&str>, + cosign_issuer: Option<&str>, + format_hint: Option, + include_file_components: bool, + side: &str, + debug_calibration: bool, + debug_format: crate::cli::DebugFormat, +) -> Result { + if let Some(oci) = oci_ref { + let identity = cosign_identity.ok_or_else(|| { + anyhow::anyhow!( + "--{side}-attestation requires --cosign-identity (regex passed to cosign --certificate-identity-regexp)" + ) + })?; + let issuer = cosign_issuer.ok_or_else(|| { + anyhow::anyhow!( + "--{side}-attestation requires --cosign-issuer (URL passed to cosign --certificate-oidc-issuer)" + ) + })?; + let body = attestation::fetch_verified_sbom(oci, identity, issuer) + .with_context(|| format!("fetching --{side}-attestation {oci}"))?; + if debug_calibration { + // One row per verified attestation; surfaces the cert + // regex cosign accepted so adopters can confirm policy. + let _ = + write_attestation_calibration(&mut std::io::stderr(), oci, identity, debug_format); + } + return parse_sbom_bytes( + &body, + &format!("attestation:{oci}"), + format_hint, + include_file_components, + ); + } + let path = path.ok_or_else(|| { + anyhow::anyhow!( + "internal: {side} requires either a positional path or --{side}-attestation" + ) + })?; + load_sbom(path, format_hint, include_file_components) +} + +fn write_attestation_calibration( + out: &mut W, + oci_ref: &str, + identity: &str, + format: crate::cli::DebugFormat, +) -> std::io::Result<()> { + match format { + crate::cli::DebugFormat::Pipe => { + writeln!(out, "attestation|{oci_ref}|verified|{identity}") + } + crate::cli::DebugFormat::Jsonl => { + let row = serde_json::json!({ + "kind": "attestation", + "key": oci_ref, + "score": "verified", + "threshold": identity, + }); + writeln!(out, "{row}") + } + } +} + +#[cfg(test)] +mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] + use super::*; + use std::collections::HashMap; + + use crate::enrich::typosquat::TyposquatFinding; + use crate::enrich::version_jump::VersionJumpFinding; + use crate::enrich::{LicenseViolation, Severity, VulnRef}; + use crate::model::{Component, Ecosystem, Relationship}; + + fn comp(name: &str) -> Component { + Component { + name: name.to_string(), + version: "1.0.0".to_string(), + ecosystem: Ecosystem::Npm, + purl: Some(format!("pkg:npm/{name}@1.0.0")), + licenses: Vec::new(), + supplier: None, + hashes: Vec::new(), + relationship: Relationship::Unknown, + source_url: None, + bom_ref: None, + } + } + + fn enrichment_with_cve_at(severity: Severity) -> Enrichment { + let mut vulns: HashMap> = HashMap::new(); + vulns.insert( + "pkg:npm/foo@1.0.0".into(), + vec![VulnRef { + id: "CVE-2025-1".into(), + severity, + aliases: Vec::new(), + epss_score: None, + kev: false, + }], + ); + Enrichment { + vulns, + ..Default::default() + } + } + + fn enrichment_with_cve() -> Enrichment { + // Severity::None is what every v0.2-era test implicitly assumed — the + // pre-severity world. Tests that don't care about the bucket use this. + enrichment_with_cve_at(Severity::None) + } + + fn enrichment_with_typosquat() -> Enrichment { + Enrichment { + typosquats: vec![TyposquatFinding { + component: comp("plain-crypto-js"), + closest: "crypto-js".to_string(), + score: 0.95, + }], + ..Default::default() + } + } + + fn enrichment_with_version_jump() -> Enrichment { + Enrichment { + version_jumps: vec![VersionJumpFinding { + before: comp("foo"), + after: comp("foo"), + before_major: 1, + after_major: 4, + }], + ..Default::default() + } + } + + fn cs_with_license_change() -> ChangeSet { + let mut before = comp("foo"); + before.licenses = vec!["MIT".into()]; + let mut after = comp("foo"); + after.licenses = vec!["GPL-3.0".into()]; + ChangeSet { + license_changed: vec![(before, after)], + ..Default::default() + } + } + + #[test] + fn fail_on_none_never_trips() { + assert!(!tripped( + &ChangeSet::default(), + &Enrichment::default(), + FailOn::None + )); + assert!(!tripped( + &cs_with_license_change(), + &enrichment_with_cve(), + FailOn::None + )); + } + + #[test] + fn fail_on_cve_trips_only_on_cve_findings() { + assert!(tripped( + &ChangeSet::default(), + &enrichment_with_cve(), + FailOn::Cve + )); + assert!(!tripped( + &ChangeSet::default(), + &enrichment_with_typosquat(), + FailOn::Cve + )); + assert!(!tripped( + &ChangeSet::default(), + &Enrichment::default(), + FailOn::Cve + )); + } + + #[test] + fn fail_on_critical_cve_filters_on_severity_high_or_above() { + // Critical and High advisories trip; Medium / Low / None don't. The + // doc on `tripped()` explains why High is included in the + // "critical-cve" bucket. + assert!(tripped( + &ChangeSet::default(), + &enrichment_with_cve_at(Severity::Critical), + FailOn::CriticalCve + )); + assert!(tripped( + &ChangeSet::default(), + &enrichment_with_cve_at(Severity::High), + FailOn::CriticalCve + )); + assert!(!tripped( + &ChangeSet::default(), + &enrichment_with_cve_at(Severity::Medium), + FailOn::CriticalCve + )); + assert!(!tripped( + &ChangeSet::default(), + &enrichment_with_cve_at(Severity::None), + FailOn::CriticalCve + )); + } + + #[test] + fn fail_on_cve_still_trips_on_severity_none_advisories() { + // --fail-on cve is the broad "any advisory" bucket; severity threading + // doesn't change its semantics. An advisory with unresolved severity + // still trips it (the alternative — silent suppression — would be the + // real footgun). + assert!(tripped( + &ChangeSet::default(), + &enrichment_with_cve_at(Severity::None), + FailOn::Cve + )); + } + + #[test] + fn fail_on_typosquat_trips_only_on_typosquat_findings() { + assert!(tripped( + &ChangeSet::default(), + &enrichment_with_typosquat(), + FailOn::Typosquat + )); + assert!(!tripped( + &ChangeSet::default(), + &enrichment_with_cve(), + FailOn::Typosquat + )); + } + + #[test] + fn fail_on_any_trips_on_each_finding_kind_and_license_changes() { + assert!(tripped( + &ChangeSet::default(), + &enrichment_with_cve(), + FailOn::Any + )); + assert!(tripped( + &ChangeSet::default(), + &enrichment_with_typosquat(), + FailOn::Any + )); + assert!(tripped( + &ChangeSet::default(), + &enrichment_with_version_jump(), + FailOn::Any + )); + // license-changed-without-version-bump alone trips Any (the suspicious + // case lives on the ChangeSet, not the enrichment). + assert!(tripped( + &cs_with_license_change(), + &Enrichment::default(), + FailOn::Any + )); + assert!(!tripped( + &ChangeSet::default(), + &Enrichment::default(), + FailOn::Any + )); + } + + #[test] + fn fail_on_license_change_trips_only_on_license_changes() { + assert!(tripped( + &cs_with_license_change(), + &Enrichment::default(), + FailOn::LicenseChange + )); + assert!(!tripped( + &ChangeSet::default(), + &enrichment_with_cve(), + FailOn::LicenseChange + )); + assert!(!tripped( + &ChangeSet::default(), + &enrichment_with_typosquat(), + FailOn::LicenseChange + )); + } + + #[test] + fn fail_on_typosquat_ignores_license_change() { + // license_changed is a ChangeSet field, not an enrichment. The + // typosquat threshold is strictly about typosquat findings — license + // drift must NOT trip it (otherwise consumers using --fail-on=typosquat + // get unexpected exit-2's on every license correction). + assert!(!tripped( + &cs_with_license_change(), + &Enrichment::default(), + FailOn::Typosquat + )); + } + + #[test] + fn budget_trips_when_counts_exceed_limits() { + let cs = ChangeSet { + added: vec![comp("a"), comp("b")], + removed: vec![comp("c")], + version_changed: vec![(comp("d"), comp("d"))], + ..Default::default() + }; + assert!(budget_tripped(&cs, Some(1), None, None)); + assert!(budget_tripped(&cs, None, Some(0), None)); + assert!(budget_tripped(&cs, None, None, Some(0))); + assert!(!budget_tripped(&cs, Some(2), Some(1), Some(1))); + } + + #[test] + fn calibration_pipe_format_matches_v0_7_layout() { + let e = enrichment_with_typosquat(); + let mut buf = Vec::new(); + write_calibration_lines( + &e, + &mut buf, + crate::cli::DebugFormat::Pipe, + CalibrationOverrides::default(), + ); + let s = String::from_utf8(buf).unwrap(); + assert!(s.starts_with("typosquat|"), "got: {s}"); + assert_eq!( + s.matches('|').count(), + 3, + "pipe row has 4 fields → 3 separators; got: {s}" + ); + } + + #[test] + fn calibration_jsonl_format_emits_one_object_per_line() { + let e = enrichment_with_typosquat(); + let mut buf = Vec::new(); + write_calibration_lines( + &e, + &mut buf, + crate::cli::DebugFormat::Jsonl, + CalibrationOverrides::default(), + ); + let s = String::from_utf8(buf).unwrap(); + let lines: Vec<&str> = s.lines().collect(); + assert_eq!(lines.len(), 1); + let v: serde_json::Value = serde_json::from_str(lines[0]).expect("valid jsonl"); + assert_eq!(v["kind"], "typosquat"); + assert!(v["score"].is_number(), "numeric score in jsonl"); + assert!(v["threshold"].is_number()); + assert!(v["key"].is_string()); + } + + #[test] + fn calibration_jsonl_keeps_severity_label_as_string() { + let e = enrichment_with_cve_at(Severity::High); + let mut buf = Vec::new(); + write_calibration_lines( + &e, + &mut buf, + crate::cli::DebugFormat::Jsonl, + CalibrationOverrides::default(), + ); + let s = String::from_utf8(buf).unwrap(); + let v: serde_json::Value = serde_json::from_str(s.trim()).unwrap(); + assert_eq!(v["kind"], "cve"); + assert_eq!(v["score"], "HIGH"); + assert_eq!(v["threshold"], "high+"); + } + + #[test] + fn fail_on_kev_trips_when_any_advisory_kev_set() { + let mut e = enrichment_with_cve_at(Severity::Medium); + // Flip the kev flag on the single advisory. + for refs in e.vulns.values_mut() { + refs[0].kev = true; + } + assert!(tripped(&ChangeSet::default(), &e, FailOn::Kev)); + assert!(!tripped( + &ChangeSet::default(), + &enrichment_with_cve_at(Severity::Medium), + FailOn::Kev + )); + } + + #[test] + fn any_epss_threshold_gating() { + let mut e = enrichment_with_cve_at(Severity::Medium); + for refs in e.vulns.values_mut() { + refs[0].epss_score = Some(0.6); + } + assert!(any_epss_at_or_above(&e, 0.5)); + assert!(any_epss_at_or_above(&e, 0.6)); + assert!(!any_epss_at_or_above(&e, 0.7)); + } + + #[test] + fn calibration_emits_epss_and_kev_rows_when_set() { + let mut e = enrichment_with_cve_at(Severity::High); + for refs in e.vulns.values_mut() { + refs[0].epss_score = Some(0.87); + refs[0].kev = true; + } + let mut buf = Vec::new(); + write_calibration_lines( + &e, + &mut buf, + crate::cli::DebugFormat::Pipe, + CalibrationOverrides::default(), + ); + let s = String::from_utf8(buf).unwrap(); + assert!(s.contains("epss|"), "missing epss row: {s}"); + assert!(s.contains("kev|"), "missing kev row: {s}"); + } + + #[test] + fn calibration_license_row_includes_exception_detail() { + // v0.9.5: matched_rule on an exception-driven license violation + // must surface the exception identifier in the calibration tap + // so operators tuning policy see why a row fired. + let mut e = Enrichment::default(); + let component = crate::model::Component { + name: "llvm-sys".into(), + version: "1.0.0".into(), + ecosystem: crate::model::Ecosystem::Cargo, + purl: Some("pkg:cargo/llvm-sys@1.0.0".into()), + licenses: vec!["Apache-2.0 WITH LLVM-exception".into()], + supplier: None, + hashes: Vec::new(), + relationship: crate::model::Relationship::Unknown, + source_url: None, + bom_ref: None, + }; + e.license_violations.push(LicenseViolation { + component, + license: "Apache-2.0 WITH LLVM-exception".into(), + matched_rule: "exception:LLVM-exception denied".into(), + kind: crate::enrich::LicenseViolationKind::Deny, + }); + let mut buf = Vec::new(); + write_calibration_lines( + &e, + &mut buf, + crate::cli::DebugFormat::Pipe, + CalibrationOverrides::default(), + ); + let s = String::from_utf8(buf).unwrap(); + assert!( + s.contains("license|"), + "missing license calibration row: {s}" + ); + assert!( + s.contains("exception:LLVM-exception denied"), + "row must surface matched_rule with exception detail: {s}" + ); + } + + #[test] + fn fail_on_license_violation_trips() { + use crate::enrich::{LicenseViolation, LicenseViolationKind}; + let mut e = Enrichment::default(); + e.license_violations.push(LicenseViolation { + component: comp("foo"), + license: "GPL-3.0-only".into(), + matched_rule: "deny: GPL-3.0-only".into(), + kind: LicenseViolationKind::Deny, + }); + assert!(tripped(&ChangeSet::default(), &e, FailOn::LicenseViolation)); + assert!(tripped(&ChangeSet::default(), &e, FailOn::Any)); + assert!(!tripped( + &ChangeSet::default(), + &Enrichment::default(), + FailOn::LicenseViolation + )); + } +} diff --git a/src/vex.rs b/src/vex.rs index b16f25a..6271e69 100644 --- a/src/vex.rs +++ b/src/vex.rs @@ -929,11 +929,23 @@ pub fn emit( "version": 1, "statements": statements_json, }); - serde_json::to_string_pretty(&doc).expect("serialize OpenVEX doc") + #[allow( + clippy::expect_used, + reason = "invariant: serde_json::to_string_pretty cannot fail on a Value built from owned data with string keys" + )] + serde_json::to_string_pretty(&doc) + .expect("invariant: serde_json::to_string_pretty cannot fail on a Value built from owned data with string keys") } #[cfg(test)] mod tests { + #![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented + )] use super::*; use std::io::Write as _; @@ -1120,6 +1132,8 @@ mod tests { lock } fn unpin_clock() { + // SAFETY: caller must hold the `pin_clock` mutex guard for the + // duration of this call so env mutation stays serialized. unsafe { std::env::remove_var("SOURCE_DATE_EPOCH"); }