diff --git a/CHANGELOG.md b/CHANGELOG.md index 5559ea8..c045f52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,145 @@ project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +## [0.9.6] - 2026-04-29 + +The "finish the roadmap" milestone. v0.9.6 closes out every entry in the +prior roadmap's "Future candidates (not committed)" section — by shipping +it, by closing it as a non-goal, or by documenting the explicit upstream +blocker. Headline new features: **OCI attestation verification** via +`cosign verify-attestation`, an **external-process plugin system** for +custom rules, full **CLI calibration knobs** for the remaining hardcoded +thresholds, and a **comprehensive documentation refresh** across every +chapter to reflect v0.7 → v0.9.6 reality. + +### Added + +- **OCI attestation verification.** `bomdrift diff + --before-attestation --after-attestation ` shells + out to `cosign verify-attestation --type=cyclonedx`, parses the + in-toto envelope, and feeds the verified SBOM payload into the + standard parser. New `--cosign-identity ` and + `--cosign-issuer ` flags pass through to cosign's + `--certificate-identity-regexp` / `--certificate-oidc-issuer`. New + `--require-attestation` boolean refuses falling back to the + `--before` / `--after` file flags so production CI gates can enforce + attested SBOMs only. Documented in `docs/src/attestation.md`. New + `attestation` row in `--debug-calibration` so users can confirm + cosign accepted the right cert. +- **External-process plugin system.** New `--plugin + ` flag (repeatable). Plugin manifest in TOML + (`name` / `description` / `exec` / `timeout_ms` / `invoke_on`). + bomdrift invokes the plugin once per Added or VersionChanged + component with JSON on stdin (`{component, event, before}`) and + parses JSON from stdout (`{findings: [...]}`). Best-effort: timeout, + non-zero exit, or malformed JSON drops the offending plugin's + findings and logs a warning at `BOMDRIFT_DEBUG=1`; the rest of the + diff renders. New `bomdrift.plugin` SARIF rule with stable + `partialFingerprints` per `(plugin_name, purl, rule_id)`. Worked + example shipped under `examples/plugins/banned-packages/`. + Documented in `docs/src/plugins.md`. Protocol carries + `protocol_version: 1` for forward-compat. +- **CLI calibration knobs** for the three previously-hardcoded + thresholds: + - `--typosquat-similarity-threshold ` (default `0.92`, + validated 0.0..=1.0). + - `--young-maintainer-days ` (default `90`, validated >= 1). + - `--cache-ttl-hours ` (default `24`, validated >= 1; applies + uniformly to OSV / EPSS / KEV / Registry caches). + - Matching `[diff]` config keys: `typosquat_similarity_threshold`, + `young_maintainer_days`, `cache_ttl_hours`. + - `--debug-calibration` rows now emit the *active* threshold rather + than the hardcoded default, so calibration data collection + reflects the real run. + +### Changed + +- **`CACHE_TTL_SECS` unified.** Previously duplicated in four + modules (`src/enrich/{cache,epss,kev,registry}.rs`). Now a single + source of truth in `src/enrich/cache.rs` with `effective_ttl_secs` + helper that honors per-run overrides without globals. +- **Comprehensive documentation refresh** across every chapter. + Notable updates: + - `README.md` rewritten with capability-grouped feature list + (Ingest / Enrichers / Suppression / Output / Forge / + Extensibility / Packaging) and a 5-column comparison table + against Socket / Snyk / Trivy / OSV-Scanner / Grype with 11 + feature-row dimensions sourced from the v0.7-v0.9 competitor + research doc. + - `docs/src/cli-reference.md` rewritten end-to-end. Every CLI flag + now documented and grouped by purpose (Output / Suppression / + Enrichment / Calibration / License / Failure thresholds / + Forge / Attestation / Plugins / Diagnostics) with each entry + annotated with introduced-in version. + - `docs/src/architecture.md` module map expanded to cover the 8 + modules added across v0.7-v0.9.6 (`config`, `clock`, + `attestation`, `plugin`, `vex`, `epss`, `kev`, `registry`, + `license`); new "Best-effort enricher contract" and + "Byte-determinism contract" subsections; approved-deps table + including `base64 = "0.22"` (v0.9.6) and `spdx = "=0.10.9"` + exact pin (v0.9.5). + - `docs/src/baseline.md` 6-row schema-reference table for the + unified `BaselineEntry` (id / purl / expires / reason / + vex_status / vex_justification). + - Per-enricher chapters (`docs/src/enrichers/{typosquat, + maintainer-age,version-jump,kev,epss,registry}.md`) gained + consistent Calibration + Disabling + See-also subsections; + overview table grew from 4 to 9 rows. + - `docs/src/SUMMARY.md` reorganized into Output / Enrichers / + Suppressions / Advanced groups for new-reader navigation. + - `CONTRIBUTING.md` "Test conventions (v0.9.5+)" subsection added + documenting the `clock::test_env_lock()` recipe; "Adding a new + enricher" and "Adding a new finding kind" worked recipes. + - Stale content rewritten across multiple chapters + (`gitlab-ci.md`'s v0.7-deferred section now covers what v0.9 + actually shipped; release-signing pins refreshed; etc.). + +### Roadmap + +- Closed out every "Future candidates" entry from the v0.9.5 + roadmap with explicit dispositions: + - **Reachability** → moved to Non-goals (pair with Endor / Snyk). + - **GraphQL maintainer-age** → decided: REST stays + (cursor-pagination-cost analysis lifted into the maintainer + enricher's module doc). + - **VEX vocabulary beyond OpenVEX 8 justifications** → + spec-bound; documented in `docs/src/vex.md` that bomdrift + follows the OpenVEX 0.2.0 vocab verbatim. + - **PyPI / crates.io maintainer-set-changed** → moved to a new + "Blocked on upstream" subsection with the precise API gap + documented (PyPI lacks per-version maintainers; crates.io + lacks per-version `published_by` history). +- Calibration backlog section removed entirely — every threshold + (similarity, young-maintainer-days, recently-published-days, + cache-ttl-hours) is now CLI/config-configurable. + +### Deps + +- Added `base64 = "0.22"` for the cosign in-toto envelope payload + decode in `src/attestation.rs`. Already a transitive dep via + `ureq`; promoted to direct so we own the pin. + +### Tests + +- 389 → 420 (+31). Plugin manifest parse, plugin success/timeout/ + non-zero-exit/malformed-output paths, attestation envelope parse, + fake-cosign integration test (PATH-injection with serialized env + lock), calibration knobs override default + reflect in + `--debug-calibration`, cache-TTL override per enricher. + +### Scope notes + +What stayed deferred to v1.0 candidates (carried to roadmap "Blocked +on upstream" or new "Future candidates"): + +- PyPI / crates.io maintainer-set-changed (upstream API blockers). +- WASM / sandboxed plugin model (current external-process model + works; revisit if demand materializes). +- Bitbucket / Azure DevOps action-side `vex:` / `emit-vex:` / + `plugin:` inputs (CLI surface is broader than action surface). +- Multi-major version-jump `MIN_MAJOR_DELTA` calibration knob + (only remaining hardcoded threshold; revisit with calibration data). + ## [0.9.5] - 2026-04-29 The "polish + multi-SCM parity" milestone. v0.9.5 ships the v0.9 follow-up diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bfea528..9e8cbb9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -28,12 +28,14 @@ git clone https://github.com/Metbcy/bomdrift cd bomdrift cargo check --all-targets # fast feedback while editing -cargo test --release # full test suite (~270+ tests as of v0.5) -cargo clippy --all-targets --all-features -- -D warnings -cargo fmt --all --check # MUST pass; run `cargo fmt --all` to fix +cargo test --release # full test suite (~420 tests as of v0.9.6) +rustup run 1.88 cargo clippy --all-targets --all-features -- -D warnings +cargo fmt --all -- --check # MUST pass; run `cargo fmt --all` to fix ``` -Rust 1.85+ required (the project uses edition 2024). +Rust 1.88+ required (the project uses edition 2024; CI is pinned to +1.88 to keep clippy lints stable across releases — see +`Cargo.toml`'s `rust-version` field). ## Project conventions @@ -101,6 +103,70 @@ Network-touching enrichers should have a unit test for the network- failure path (fake fetcher returns `Err`) — the best-effort contract matters and silently breaking it would be an easy regression. +### Test conventions (v0.9.5+) + +Tests that mutate `SOURCE_DATE_EPOCH` (directly or indirectly via +`bomdrift::clock::*`) MUST acquire `clock::test_env_lock()` to serialize +across the crate's parallel test threads. Without the lock, two tests +running in parallel can read each other's mutated env var and +intermittently fail in ways that look format-deterministic but aren't. + +```rust +#[test] +fn baseline_expiry_relative_to_source_date_epoch() { + let _lock = bomdrift::clock::test_env_lock(); + // SAFETY: serialized by _lock above. + unsafe { std::env::set_var("SOURCE_DATE_EPOCH", "1735689600") }; // 2025-01-01 + // ... test body ... +} +``` + +The lock is a `std::sync::Mutex<()>` — re-entrant calls within a single +test thread are fine, but a panic without the guard will poison it. If +you see "PoisonError" in CI but not locally, a previous test panicked +without releasing — fix the panicking test, not the poison handling. + +### Adding a new enricher + +The shortest viable PR shape, mirroring how `enrich::epss` was added in +v0.8 and `enrich::registry` in v0.9: + +1. **`src/enrich/.rs`** — pure `enrich(cs: &ChangeSet, ...) -> + Vec<Finding>` with a fail-soft fetcher boundary. Mirror the + shape of `src/enrich/osv.rs`. +2. **Wire into `Enrichment`** — add a field to the + `bomdrift::enrich::Enrichment` struct in `src/enrich/mod.rs`; have + `lib.rs::run_diff` populate it. +3. **Add a `--no-` flag** to `src/cli.rs::DiffArgs`, plumb + through the `[diff] no_` config key. +4. **Renderers** — add a section to `render::markdown`, + `render::term`, `render::json`. For SARIF, add a stable rule ID + (`bomdrift.`), a `partialFingerprints.primaryHash/v1` + identity tuple, and a fingerprint-stability test. +5. **`--debug-calibration` row** — emit one + `|||` line per finding considered. +6. **Docs** — add `docs/src/enrichers/.md` and link it from + `docs/src/SUMMARY.md` and `docs/src/enrichers/overview.md`. +7. **CHANGELOG** — `## [Unreleased]` entry under `### Added`. + +### Adding a new finding kind + +When a new finding kind is purely a rendering layer (e.g., a new +synthetic ID for VEX export or a new SARIF rule for an existing +enricher), the recipe is shorter: + +1. **Synthetic-id grammar** — extend + `bomdrift::vex::SyntheticFindingKind` and the + `parse_synthetic_id` parser. Round-trip must be exact. +2. **SARIF rule** — add the rule descriptor to + `render::sarif::ALL_RULES` so it appears in `tool.driver.rules` + even with zero results, then a `partialFingerprints` identity + tuple for the new rule. +3. **Markdown / terminal / JSON sections** — mirror the existing + per-finding sections. +4. **Determinism test** — round-trip the rendered SARIF / VEX through + the parser and assert byte-for-byte equality with the input. + ## Documentation When you add a CLI flag / action input / enricher, update: diff --git a/Cargo.lock b/Cargo.lock index f26dbf6..0368c65 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -123,9 +123,10 @@ dependencies = [ [[package]] name = "bomdrift" -version = "0.9.5" +version = "0.9.6" dependencies = [ "anyhow", + "base64", "clap", "criterion", "directories", diff --git a/Cargo.toml b/Cargo.toml index 3415214..70603bc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "bomdrift" -version = "0.9.5" +version = "0.9.6" edition = "2024" rust-version = "1.88" description = "SBOM diff with supply-chain risk signals (CVEs, typosquats, maintainer-age)." @@ -35,6 +35,7 @@ time = { version = "0.3", default-features = false, features = ["serde", "parsin sha2 = { version = "0.10", default-features = false } # Exact-pinned: SPDX list updates can shift LicenseId.is_gnu() / is_osi_approved membership and silently change license-policy semantics. Bump deliberately. spdx = { version = "=0.10.9", default-features = false } +base64 = { version = "0.22", default-features = false, features = ["std"] } [dev-dependencies] criterion = { version = "0.5", default-features = false, features = ["html_reports"] } diff --git a/README.md b/README.md index ead7d2d..6a61644 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # bomdrift -> **SBOM diff with supply-chain risk signals.** Flags new CVEs, typosquats, multi-major version jumps, and young-maintainer signals on added or upgraded dependencies — posted as a GitHub PR comment. +> **SBOM diff with supply-chain risk signals.** Flags new CVEs (with EPSS + CISA KEV signal), typosquats across 8 ecosystems, multi-major version jumps, young-maintainer takeovers, recently-published / deprecated / maintainer-set-changed registry signals, and license-policy violations on every changed dependency — posted as a comment on GitHub, GitLab, Bitbucket, or Azure DevOps PRs. [![CI](https://github.com/Metbcy/bomdrift/actions/workflows/ci.yml/badge.svg)](https://github.com/Metbcy/bomdrift/actions/workflows/ci.yml) [![Release](https://img.shields.io/github/v/release/Metbcy/bomdrift?sort=semver&display_name=tag)](https://github.com/Metbcy/bomdrift/releases/latest) @@ -24,7 +24,7 @@ jobs: That's it. `Metbcy/bomdrift@v1` runs Syft against your project at the PR base + head, diffs the SBOMs, and posts a single PR comment that updates on every push. See it live on [#1](https://github.com/Metbcy/bomdrift/pull/1) — bomdrift dogfoods itself on its own PRs. -**Quick links:** [Why?](#why-bomdrift) · [vs Socket / Snyk / Trivy](#how-it-compares) · [Action reference](https://metbcy.github.io/bomdrift/github-action.html) · [CLI reference](https://metbcy.github.io/bomdrift/cli-reference.html) · [Suppress findings](https://metbcy.github.io/bomdrift/baseline.html#in-comment-suppression-v05) · [Release signing](#release-signing) · [Examples](./examples/) +**Quick links:** [Why?](#why-bomdrift) · [vs Socket / Snyk / Trivy / OSV-Scanner / Grype](#how-it-compares) · [Action reference](https://metbcy.github.io/bomdrift/github-action.html) · [CLI reference](https://metbcy.github.io/bomdrift/cli-reference.html) · [License policy](https://metbcy.github.io/bomdrift/license-policy.html) · [VEX](https://metbcy.github.io/bomdrift/vex.html) · [SARIF](https://metbcy.github.io/bomdrift/sarif.html) · [OCI attestation](https://metbcy.github.io/bomdrift/attestation.html) · [Plugins](https://metbcy.github.io/bomdrift/plugins.html) · [GitLab](https://metbcy.github.io/bomdrift/gitlab-ci.html) · [Bitbucket](https://metbcy.github.io/bomdrift/bitbucket.html) · [Azure DevOps](https://metbcy.github.io/bomdrift/azure-devops.html) · [Suppress findings](https://metbcy.github.io/bomdrift/baseline.html#in-comment-suppression-v05) · [Release signing](#release-signing) · [Examples](./examples/) ## Why bomdrift @@ -43,20 +43,33 @@ Recent incidents bomdrift would have surfaced: ## How it compares -| | bomdrift | Socket | Snyk | Trivy | -|------------------------------------------|:---:|:---:|:---:|:---:| -| **Diff-focused** (what *changed*, not what *is*) | yes | yes | partial | no | -| **Open source, no hosted dashboard required** | yes | no | no | yes | -| **Maintainer-age signal (xz pattern)** | yes | partial | no | no | -| **Cosign-signed releases (Sigstore + GitHub OIDC)** | yes | n/a | n/a | no | -| **Single self-contained binary, no Docker** | yes | no | no | yes | -| **In-comment suppression (`/bomdrift suppress`)** | yes | partial | yes | no | -| **No telemetry / no account / no signup** | yes | no | no | yes | -| **SARIF v2.1.0 to GitHub Code Scanning** | yes | no | yes | yes | -| **Eight ecosystems for typosquat detection** | yes | yes | no | no | -| **Apache-2.0** | yes | proprietary | proprietary | yes | - -bomdrift fills a specific gap: a free, OSS-first, single-binary tool for the *diff-time* question. It's not a replacement for Snyk's scan-everything posture or Socket's SaaS UX — it's the right answer when you want supply-chain risk signals on PRs without paying for a vendor or running a dashboard. +The dimensions adopters actually filter on. Sourced from +[`files/competitor-research-v0.7-v0.9.md`](./files/competitor-research-v0.7-v0.9.md); +correct as of v0.9.6. + +| | bomdrift | Socket | Snyk | Trivy | OSV-Scanner | Grype | +|------------------------------------------|:---:|:---:|:---:|:---:|:---:|:---:| +| **Diff-focused** (what *changed*, not what *is*) | yes | yes | partial | no | no | no | +| **Open source, no hosted dashboard required** | yes | no | no | yes | yes | yes | +| **Maintainer-age signal (xz pattern)** | yes | partial | no | no | no | no | +| **Multi-SCM PR comments** (GitHub / GitLab / Bitbucket / Azure DevOps) | yes (all four, v0.9.5+) | GitHub mainly | GitHub + GitLab | no | no | no | +| **In-comment suppression** (`/bomdrift suppress`) | yes (all four SCMs) | partial | yes | no | no | no | +| **License policy with SPDX expression evaluation + per-exception allow/deny** | yes (v0.9.5) | no | partial | no | no | no | +| **VEX consume + emit** (OpenVEX 0.2.0 + CycloneDX VEX 1.6) | yes (v0.9) | no | partial | partial (consume) | no | no | +| **OCI attestation verification** (`cosign verify-attestation`) | yes (v0.9.6) | no | no | partial | no | no | +| **External-process plugin system** (custom rules) | yes (v0.9.6) | no | partial | no | no | no | +| **SARIF v2.1.0 → GitHub Code Scanning** | yes (v0.8) | no | yes | yes | yes | yes | +| **Eight-ecosystem typosquat detection** (npm/PyPI/Cargo/Maven/Go/Gem/NuGet/Composer) | yes | yes | no | no | no | no | +| **EPSS + CISA KEV signals** | yes (v0.8) | partial | yes | no | partial | no | +| **Cosign-signed releases (Sigstore + GitHub OIDC)** | yes | n/a | n/a | no | yes | yes | +| **Byte-deterministic output** (SOURCE_DATE_EPOCH-honored) | yes | n/a | no | no | no | no | +| **Single self-contained binary, no Docker** | yes | no | no | yes | yes | yes | +| **No telemetry / no account / no signup** | yes | no | no | yes | yes | yes | +| **Auto-fix PR generation** | **no** (pair with Renovate / Dependabot) | no | yes | no | no | no | +| **Reachability / call-graph analysis** | **no** (pair with Endor / Snyk Reachability) | partial | yes | no | no | no | +| **License** | Apache-2.0 | proprietary | proprietary | Apache-2.0 | Apache-2.0 | Apache-2.0 | + +bomdrift fills a specific gap: a free, OSS-first, single-binary tool for the *diff-time* question. It's not a replacement for Snyk's scan-everything posture or Socket's SaaS UX — it's the right answer when you want supply-chain risk signals on PRs without paying for a vendor or running a dashboard. For reachability and tarball-behavior analysis, pair bomdrift with the tools called out in the [Pair with…](#pair-with) table. ## Detailed install @@ -81,7 +94,9 @@ jobs: # verify-signatures: true (set false on trusted mirrors) ``` -Pin to `@v1` for the latest v0.x; pin to `@v0.9.5` for reproducible builds. Run `bomdrift init` if you want a checked-in `.bomdrift.toml` policy and both workflows scaffolded locally. See the [Action reference](https://metbcy.github.io/bomdrift/github-action.html) for every input. +Pin to `@v1` for the latest v0.x; pin to `@v0.9.6` for reproducible builds. Run `bomdrift init` if you want a checked-in `.bomdrift.toml` policy and both workflows scaffolded locally. See the [Action reference](https://metbcy.github.io/bomdrift/github-action.html) for every input — including `upload-to-code-scanning`, `verify-signatures`, `comment-size-limit`, and the `before-sbom`/`after-sbom` escape hatch. + +**Other forges:** GitLab CI, Bitbucket Pipelines, and Azure DevOps Pipelines all have ready-to-copy templates under [`examples/`](./examples/) and dedicated docs chapters: [GitLab CI](https://metbcy.github.io/bomdrift/gitlab-ci.html), [Bitbucket](https://metbcy.github.io/bomdrift/bitbucket.html), [Azure DevOps](https://metbcy.github.io/bomdrift/azure-devops.html). Comment-driven `/bomdrift suppress` works on all four SCMs via the Cloudflare Worker bridges added in v0.9.5. #### Optional: in-comment suppression (v0.5+) @@ -112,7 +127,7 @@ Comment `/bomdrift suppress GHSA-xxxx` on any PR; the sub-action appends to `.bo Pre-built binaries cover Linux x86_64 + aarch64, macOS aarch64, and Windows x86_64. Each archive is cosign-signed via Sigstore + GitHub OIDC. ```bash -VERSION=v0.9.5 +VERSION=v0.9.6 TARGET=x86_64-unknown-linux-gnu curl -sSL -o bomdrift.tar.gz \ "https://github.com/Metbcy/bomdrift/releases/download/${VERSION}/bomdrift-${VERSION}-${TARGET}.tar.gz" @@ -128,7 +143,7 @@ Verify the archive's signature before you trust the binary — see [Release sign ### From source ```bash -cargo install --locked --git https://github.com/Metbcy/bomdrift --tag v0.9.5 bomdrift +cargo install --locked --git https://github.com/Metbcy/bomdrift --tag v0.9.6 bomdrift ``` Requires Rust 1.85+ (the project uses edition 2024). @@ -209,20 +224,54 @@ With network access, an additional Vulnerabilities section lists each advisory I ## Features -- Diff **CycloneDX 1.5/1.6**, **SPDX 2.3**, and **Syft** JSON SBOMs against each other (any combination), via a unified component model. -- For added & upgraded packages, enrich with **OSV.dev CVE data** through `/v1/querybatch`, then a per-advisory `/v1/vulns/{id}` follow-up to populate **severity** (Critical / High / Medium / Low). -- 24h on-disk **OSV severity cache** (`/bomdrift/osv/`) so reruns within a working day don't re-fetch — opt out with `--no-osv-cache`. -- Flag possible **typosquats** across **npm**, **PyPI**, **Cargo**, **Maven**, **Go**, **RubyGems**, **NuGet**, and **Composer** via Jaro-Winkler similarity (Levenshtein for Maven artifactIds), with a suffix-containment boost rule that catches the `plain-crypto-js` to `crypto-js` pattern that pure JW alone misses. Refreshable from each ecosystem's canonical upstream via `bomdrift refresh-typosquat`. -- Flag deps whose **top GitHub maintainer joined the project recently** (the xz-style takeover signal). Honors `GITHUB_TOKEN`, rate-limit-aware, skipped when the repo has > 50 contributors. -- Flag **multi-major version jumps** (≥ 2 majors) in a single diff — often correlates with takeover swaps and namespace reuse. -- **Output formats**: terminal (colored, TTY-aware), Markdown (PR comment, with collapsible sections + severity sort), **JSON**, and **SARIF v2.1.0** for GitHub Code Scanning ingestion. -- **`--fail-on`** thresholds (`cve` / `critical-cve` / `typosquat` / `license-change` / `any`) and diff budgets (`--max-added`, `--max-removed`, `--max-version-changed`) exit code 2 on trip while still emitting the comment body, so the PR comment posts even when the workflow step fails. -- **`.bomdrift.toml` + `bomdrift init`** let repos keep policy in version control instead of repeating inputs in workflow YAML. -- **`/bomdrift suppress `** in-comment suppression (v0.5+) via a companion sub-action. -- **`--baseline `** suppresses findings already captured in a previously stored `bomdrift diff --output json` snapshot. -- **`--summary-only`**, **`--findings-only`**, and automatic comment-size fallback (default 60 KB) keep big SBOM diffs under GitHub's 65,536-char comment-body cap. -- Ships as a **single Rust binary** (~3.4 MB, stripped + LTO) **and** a composite GitHub Action — no Docker. -- Releases are **cosign-signed** keyless via Sigstore + GitHub OIDC — eat-your-own-supply-chain-dogfood. +### SBOM ingest + +- Diff **CycloneDX 1.5/1.6**, **SPDX 2.3**, and **Syft JSON** against each other (any combination), via a unified component model. +- Optional **`--before-attestation` / `--after-attestation`**: fetch the SBOM from an OCI registry as a `cosign verify-attestation`-verified artifact instead of a local file (v0.9.6). See [OCI attestation](https://metbcy.github.io/bomdrift/attestation.html). + +### Risk-signal enrichers + +- **OSV.dev CVE lookup** via `/v1/querybatch` + per-advisory `/v1/vulns/{id}` for severity (Critical / High / Medium / Low). On-disk severity cache, configurable TTL via `--cache-ttl-hours` (v0.9.6). +- **EPSS** (FIRST.org Exploit Prediction Scoring System) per CVE, with `--fail-on-epss <0.0–1.0>` threshold gating (v0.8). +- **CISA KEV** known-exploited flag per advisory, with `--fail-on kev` gating (v0.8). +- **Typosquat detection** across **npm**, **PyPI**, **Cargo**, **Maven**, **Go**, **RubyGems**, **NuGet**, and **Composer**. Jaro-Winkler + suffix-containment boost (Levenshtein for Maven artifactIds, last-path-segment match for Go, package-portion match for Composer). Threshold tunable via `--typosquat-similarity-threshold` (v0.9.6). Refreshable via `bomdrift refresh-typosquat`. +- **Maintainer-age signal** — top GitHub contributor's first commit younger than `--young-maintainer-days` (default 90; tunable v0.9.6). The xz / Jia Tan pattern. Honors `GITHUB_TOKEN`, skipped on repos with > 50 contributors. +- **Multi-major version jumps** (≥ 2 majors) — pure compute, correlates with takeover swaps and namespace reuse. +- **Registry-metadata enrichers (npm / PyPI / crates.io)** — recently-published, deprecated, maintainer-set-changed (npm-only) (v0.9). Threshold via `--recently-published-days`, opt-out via `--no-registry`. +- **License policy** — `--allow-licenses` / `--deny-licenses` with SPDX expression evaluation (v0.9), plus per-exception `--allow-exception` / `--deny-exception` for `WITH`-clause granularity (v0.9.5). + +### Suppression + +- **`--baseline `** — JSON snapshot suppression with conservative per-purl-and-version match keys. +- **`/bomdrift suppress [reason: …]`** in-comment workflow on **all four SCMs**: GitHub (v0.5), GitLab (v0.9 via Cloudflare Worker), Bitbucket Cloud (v0.9.5), Azure DevOps (v0.9.5). +- **Time-boxed suppressions** with `expires` + `reason` fields per baseline entry (v0.8). Expired entries warn and surface; never silently keep suppressing. +- **VEX consume / emit** — OpenVEX 0.2.0 + CycloneDX VEX 1.6 on input (`--vex `, repeatable); OpenVEX 0.2.0 on output (`--emit-vex `) (v0.9). See [VEX](https://metbcy.github.io/bomdrift/vex.html). + +### Output + +- Terminal (TTY-aware ANSI), Markdown (PR comment, severity-sorted), JSON, and **SARIF v2.1.0** with stable rule IDs + `partialFingerprints.primaryHash/v1` for Code Scanning ingestion (v0.8). See [SARIF](https://metbcy.github.io/bomdrift/sarif.html). +- `--output-file ` writes to a file instead of stdout (v0.8) — useful for `--output sarif` in YAML pipelines where `>` redirection is fragile. +- **Byte-deterministic** — identical inputs produce byte-identical output, honoring `SOURCE_DATE_EPOCH`. PR-comment upserts patch in place rather than accumulating duplicates. + +### Failure thresholds + +- `--fail-on` (`cve` / `critical-cve` / `typosquat` / `license-change` / `any` / `kev`) and `--fail-on-epss `. Diff budgets (`--max-added`, `--max-removed`, `--max-version-changed`). All emit the comment body before the exit-2 trip so reviewers see findings even on failed runs. + +### Forge integration + +- `--platform ` controls comment-footer shape; auto-detects from `GITLAB_CI` / `BITBUCKET_BUILD_NUMBER` / `TF_BUILD` env vars. +- Composite GitHub Action with `upload-to-code-scanning`, `verify-signatures`, `comment-size-limit` inputs. +- Per-SCM Cloudflare Worker bridges under `examples//comment-bridge/` (v0.9 / v0.9.5). + +### Extensibility + +- **External-process plugin system** via `--plugin ` (repeatable). JSON over stdin/stdout, fail-soft. See [Plugins](https://metbcy.github.io/bomdrift/plugins.html) and the worked example at [`examples/plugins/banned-packages/`](./examples/plugins/banned-packages/) (v0.9.6). + +### Packaging + +- Single Rust binary (~3.4 MB stripped + LTO) **and** a composite GitHub Action — no Docker. +- Releases are **cosign-signed** keyless via Sigstore + GitHub OIDC. +- `.bomdrift.toml` + `bomdrift init` keep policy in version control rather than repeating inputs in YAML. ## Release signing @@ -230,7 +279,7 @@ Every release archive is signed with cosign keyless via Sigstore (GitHub OIDC). ```bash # Replace VERSION + TARGET with your downloaded archive's pair -VERSION=v0.9.5 +VERSION=v0.9.6 TARGET=x86_64-unknown-linux-gnu ARCHIVE=bomdrift-${VERSION}-${TARGET}.tar.gz @@ -260,14 +309,26 @@ PRs welcome. The `good first issue` label tracks focused asks for new contributo bomdrift's design constraints (OSS-first, single-binary, no telemetry, change-focused) put a number of capabilities deliberately out of scope. We don't ship them, but we recommend pairing bomdrift -with tools that do. +with tools that do. See [STATUS.md](./STATUS.md) and the +[roadmap](https://metbcy.github.io/bomdrift/roadmap.html) for the +canonical, version-controlled list. - **SBOM generation.** Use [Syft](https://github.com/anchore/syft) — it's already great. bomdrift only consumes SBOMs (and as of v0.5 invokes Syft itself inside the Action so consumers don't have to). +- **Replacing your SCA scanner.** OSV-scanner, Grype, Trivy all + have richer vulnerability databases for *full-tree* scans. + bomdrift's CVE enrichment is **change-focused**: only on what's + new in this diff. - **Dependency-tree visualization.** [`cargo tree`](https://doc.rust-lang.org/cargo/commands/cargo-tree.html), [`pnpm why`](https://pnpm.io/cli/why), and friends do this well. +- **Per-language deep parsing** (resolving lockfile edge cases beyond + what Syft already handles). bomdrift consumes whatever the + upstream SBOM generator produces. +- **Web UI / dashboard.** bomdrift output is markdown / SARIF / JSON + for ingestion by tooling you already have (PR comments, Code + Scanning, your own scripts). No daemon, no hosted UI. - **Reachability / call-graph analysis.** "Is this CVE reachable from my code's entry points?" requires AST + call-graph infrastructure orthogonal to SBOM diffing. *Pair with Endor Labs @@ -277,10 +338,6 @@ with tools that do. *Pair with [Socket](https://socket.dev/).* - **Auto-fix PR generation.** bomdrift surfaces findings; it doesn't open follow-up PRs. *Pair with Renovate or Dependabot.* -- **Continuous monitoring / always-on agent.** bomdrift is a - one-shot CLI invoked from CI. There's no daemon, no telemetry, no - scheduled background polling. *Run bomdrift in a scheduled CI - workflow if you want periodic re-checks.* - **Container / OCI image scanning.** SBOM + image-layer scanning is Trivy / Grype's lane. Use them; bomdrift focuses on application-dependency drift between two SBOMs. @@ -290,13 +347,13 @@ with tools that do. dashboards inevitably require telemetry, which violates bomdrift's no-telemetry tenet. *Pair with Endor / Snyk if your org needs centralized risk reporting.* +- **Continuous monitoring / always-on agent.** bomdrift is a + one-shot CLI invoked from CI. There's no daemon, no telemetry, no + scheduled background polling. *Run bomdrift in a scheduled CI + workflow if you want periodic re-checks.* - **Closed-source advisory databases.** bomdrift uses OSV.dev (the open advisory aggregator). Closed proprietary feeds aren't consumed in the OSS distribution. -- **Replacing your SCA scanner.** OSV-scanner, Grype, Trivy all - have richer vulnerability databases for *full-tree* scans. - bomdrift's CVE enrichment is **change-focused**: only on what's - new in this diff. ### Pair with… diff --git a/STATUS.md b/STATUS.md index 2067025..91333b9 100644 --- a/STATUS.md +++ b/STATUS.md @@ -1,29 +1,59 @@ # Project status -bomdrift is usable today as a local CLI and GitHub Action. The v0.5 line -focuses on making the Action copy-pasteable for first-time users while -keeping the project OSS-first: no hosted dashboard, no account, no telemetry. +bomdrift is usable today as a local CLI and as a composite GitHub Action, +with first-class templates + comment-driven suppression bridges for GitLab +CI, Bitbucket Pipelines, and Azure DevOps Pipelines. The v0.9.6 line ships +the last items off the public roadmap (calibration knobs, OCI attestation, +a plugin system) while keeping the project OSS-first: no hosted dashboard, +no account, no telemetry. + +## What's new in v0.9.6 + +Four feature themes for skim-readers; full notes live in +[CHANGELOG.md](./CHANGELOG.md): + +1. **Cache-TTL unification.** The four duplicated `CACHE_TTL_SECS` + constants (OSV, EPSS, KEV, registry) collapse into one shared + `enrich::cache::CACHE_TTL_SECS`. No behavior change at the default, + but a single source of truth for the calibration knob below. +2. **Calibration knobs.** Three previously hardcoded thresholds become + user-tunable: `--typosquat-similarity-threshold` (default 0.92), + `--young-maintainer-days` (default 90), `--cache-ttl-hours` + (default 24). Matching `[diff]` config keys, all CLI-overridable. +3. **OCI attestation verification.** New `--before-attestation` / + `--after-attestation` flags fetch the SBOM from an OCI registry as + a `cosign verify-attestation`-verified artifact. Required pair: + `--cosign-identity` (regex) + `--cosign-issuer` (URL). + `--require-attestation` refuses to fall back to local files. See + [docs/src/attestation.md](docs/src/attestation.md). +4. **External-process plugin system.** New `--plugin ` + (repeatable) lets organizations layer custom rules on top of + bomdrift's bundled enrichers. JSON over stdin/stdout; fail-soft. + Worked example at [`examples/plugins/banned-packages/`](examples/plugins/banned-packages/); + protocol reference at [docs/src/plugins.md](docs/src/plugins.md). ## Current support | Area | Status | |---|---| -| GitHub.com pull requests | Supported through `Metbcy/bomdrift@v1` | -| Local CLI | Supported on Linux x86_64/aarch64, macOS aarch64, Windows x86_64 | -| SBOM formats | CycloneDX JSON, SPDX JSON, Syft JSON | -| In-comment suppression (GitHub) | Supported through `Metbcy/bomdrift/comment-suppress@v1` | -| GitHub Code Scanning (SARIF upload) | Supported (v0.8+) — set `upload-to-code-scanning: 'true'` | -| EPSS exploit-prediction scoring | Supported (v0.8+) — auto, opt-out via `--no-epss` | -| CISA KEV (known-exploited) flagging | Supported (v0.8+) — auto, opt-out via `--no-kev` | -| License allow/deny policy | Supported (v0.8+) — `[license]` block / CLI flags | -| Suppression expiry (`expires` + `reason`) | Supported (v0.8+) — time-boxed risk acceptance | -| GitLab CI merge requests | Supported through the `examples/gitlab-ci/` template (v0.7+); comment-driven suppression supported via Cloudflare Worker bridge (v0.9+) | +| GitHub.com pull requests | Supported through `Metbcy/bomdrift@v1` — see [github-action.md](docs/src/github-action.md) | +| Local CLI | Supported on Linux x86_64 + aarch64, macOS aarch64, Windows x86_64 — see [quickstart.md](docs/src/quickstart.md) | +| SBOM formats | CycloneDX 1.5 / 1.6 JSON, SPDX 2.3 JSON, Syft JSON | +| In-comment suppression (GitHub) | Supported through `Metbcy/bomdrift/comment-suppress@v1` — see [baseline.md](docs/src/baseline.md#in-comment-suppression-v05) | +| GitHub Code Scanning (SARIF upload) | Supported (v0.8+) — set `upload-to-code-scanning: 'true'`; see [sarif.md](docs/src/sarif.md) | +| EPSS exploit-prediction scoring | Supported (v0.8+) — auto, opt-out via `--no-epss`; see [enrichers/epss.md](docs/src/enrichers/epss.md) | +| CISA KEV (known-exploited) flagging | Supported (v0.8+) — auto, opt-out via `--no-kev`; see [enrichers/kev.md](docs/src/enrichers/kev.md) | +| License allow/deny policy | Supported (v0.8+, full SPDX expression evaluation v0.9, per-exception `WITH`-clause granularity v0.9.5) — see [license-policy.md](docs/src/license-policy.md) | +| Suppression expiry (`expires` + `reason`) | Supported (v0.8+) — time-boxed risk acceptance; see [baseline.md](docs/src/baseline.md#time-boxed-suppressions-expires--reason) | +| GitLab CI merge requests | Supported through `examples/gitlab-ci/` (v0.7+); comment-driven `/bomdrift suppress` via Cloudflare Worker bridge (v0.9+); see [gitlab-ci.md](docs/src/gitlab-ci.md) | +| Bitbucket Cloud Pipelines | Supported (v0.9+) — `examples/bitbucket-pipelines/`; comment-driven suppression via Worker bridge (v0.9.5+); see [bitbucket.md](docs/src/bitbucket.md) | +| Azure DevOps Pipelines | Supported (v0.9+) — `examples/azure-devops/`; comment-driven suppression via Worker bridge (v0.9.5+); see [azure-devops.md](docs/src/azure-devops.md) | +| VEX consume / emit | Supported (v0.9+) — OpenVEX 0.2.0 + CycloneDX VEX 1.6; see [vex.md](docs/src/vex.md) | +| Registry-metadata enrichers (npm/PyPI/crates.io) | Supported (v0.9+) — recently-published, deprecated, maintainer-set-changed; see [enrichers/registry.md](docs/src/enrichers/registry.md) | +| Calibration knobs (similarity / young-maintainer / cache TTL) | Supported (v0.9.6+) — see [cli-reference.md](docs/src/cli-reference.md#calibration) | +| OCI attestation verification | Supported (v0.9.6+) — via `cosign verify-attestation` shell-out; see [attestation.md](docs/src/attestation.md) | +| Custom rules / plugin system | Supported (v0.9.6+) — external-process plugins via `--plugin `; see [plugins.md](docs/src/plugins.md) | | GitHub Enterprise / self-hosted runners | Expected to work, not broadly tested yet | -| Bitbucket Pipelines | Supported (v0.9+) — `examples/bitbucket-pipelines/`; comment-driven suppression via Cloudflare Worker bridge (v0.9.5+) | -| Azure DevOps Pipelines | Supported (v0.9+) — `examples/azure-devops/`; comment-driven suppression via Cloudflare Worker bridge (v0.9.5+) | -| VEX consume / emit | Supported (v0.9+) — OpenVEX 0.2.0 + CycloneDX VEX 1.6 | -| SPDX expression evaluation | Supported (v0.9+) — full `Expression::evaluate` via `spdx` crate | -| Registry-metadata enrichers (npm/PyPI/crates.io) | Supported (v0.9+) — recently-published, deprecated, maintainer-set-changed | | Hosted dashboard / SaaS | Not planned | ## Out-of-scope by design @@ -37,13 +67,16 @@ for the rationale. | Out-of-scope | Pair with | |---|---| +| SBOM generation | Syft (bomdrift bundles this in the Action) | | Reachability / call-graph analysis | Endor Labs, Snyk Reachability | | Tarball / behavior analysis | Socket | | Auto-fix PR generation | Renovate, Dependabot | | Container / OCI image scanning | Trivy, Grype | | SAST / secrets scanning | GitHub Advanced Security, Semgrep, gitleaks | | Risk-score dashboards (cross-repo) | Endor, Snyk | +| Web UI / hosted dashboard | n/a — out of scope | | Continuous monitoring / always-on agent | Run bomdrift in scheduled CI | +| Per-language deep parsing beyond Syft | Use a richer SBOM generator upstream | | Closed-source advisory feeds | bomdrift uses OSV.dev only | ## Known limitations diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index 660c411..74f4a44 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -11,13 +11,10 @@ - [Azure DevOps Pipelines](./azure-devops.md) - [CLI reference](./cli-reference.md) -# Output +# Output formats -- [Output formats](./output-formats.md) +- [Output formats overview](./output-formats.md) - [SARIF + Code Scanning](./sarif.md) -- [VEX](./vex.md) -- [License policy](./license-policy.md) -- [Baseline & suppression](./baseline.md) # Enrichers @@ -30,10 +27,19 @@ - [Maintainer age signal](./enrichers/maintainer-age.md) - [Registry metadata (npm/PyPI/crates.io)](./enrichers/registry.md) -# Operations +# Suppressions +- [Baseline & suppression](./baseline.md) +- [VEX](./vex.md) +- [License policy](./license-policy.md) + +# Advanced + +- [OCI attestation](./attestation.md) +- [Plugins](./plugins.md) - [Release signing](./release-signing.md) - [Architecture](./architecture.md) +- [Contributing](./contributing.md) # Development @@ -43,4 +49,4 @@ # Project - [Roadmap](./roadmap.md) -- [Contributing](./contributing.md) + diff --git a/docs/src/architecture.md b/docs/src/architecture.md index 82ef763..218938e 100644 --- a/docs/src/architecture.md +++ b/docs/src/architecture.md @@ -9,33 +9,42 @@ the upsert contract. ```text src/ -├── main.rs — clap entry point; dispatches to lib::run -├── lib.rs — top-level wiring: load_sbom -> diff -> enrich -> render -├── cli.rs — clap derive types: DiffArgs, RefreshArgs, FailOn, etc. -├── model/ — unified component / SBOM types -│ ├── component.rs — Component, Ecosystem, Hash, Relationship -│ └── sbom.rs — Sbom, SbomFormat -├── parse/ — format-specific parsers -│ ├── cyclonedx.rs — CDX 1.5/1.6 JSON -│ ├── spdx.rs — SPDX 2.3 JSON -│ └── syft.rs — Syft JSON -├── diff/ — pair-by-version ChangeSet computation -│ ├── mod.rs — diff(), ChangeSet -│ └── key.rs — ComponentKey (purl-without-version | (eco, name)) -├── enrich/ — risk-signal enrichers -│ ├── osv.rs — OSV.dev /v1/querybatch + /v1/vulns/{id} -│ ├── cache.rs — on-disk OSV severity cache (24h TTL) -│ ├── typosquat.rs — Jaro-Winkler + suffix boost (npm/PyPI/Cargo); Levenshtein (Maven) +├── main.rs — clap entry point; dispatches to lib::run +├── lib.rs — top-level wiring: load_sbom -> diff -> enrich -> render +├── cli.rs — clap derive types: DiffArgs, RefreshArgs, FailOn, etc. +├── config.rs — `.bomdrift.toml` policy (de)serialization + merge +├── clock.rs — single source of truth for "now" (honors SOURCE_DATE_EPOCH) +├── attestation.rs — `cosign verify-attestation` shell-out (v0.9.6) +├── plugin.rs — external-process plugin loader (v0.9.6) +├── vex.rs — VEX consume (OpenVEX 0.2.0, CycloneDX VEX 1.6) + emit (OpenVEX) +├── baseline.rs — `--baseline` snapshot suppression + `expires`/`reason`/`vex_status` +├── refresh.rs — `bomdrift refresh-typosquat` subcommand +├── model/ — unified component / SBOM types +│ ├── component.rs — Component, Ecosystem, Hash, Relationship +│ └── sbom.rs — Sbom, SbomFormat +├── parse/ — format-specific parsers +│ ├── cyclonedx.rs — CDX 1.5/1.6 JSON +│ ├── spdx.rs — SPDX 2.3 JSON +│ └── syft.rs — Syft JSON +├── diff/ — pair-by-version ChangeSet computation +│ ├── mod.rs — diff(), ChangeSet +│ └── key.rs — ComponentKey (purl-without-version | (eco, name)) +├── enrich/ — risk-signal enrichers +│ ├── osv.rs — OSV.dev /v1/querybatch + /v1/vulns/{id} +│ ├── epss.rs — FIRST.org EPSS per-CVE scores (v0.8) +│ ├── kev.rs — CISA KEV catalog (v0.8) +│ ├── registry.rs — npm / PyPI / crates.io metadata (v0.9) +│ ├── license.rs — SPDX expression evaluation + allow/deny + per-exception (v0.8 / v0.9 / v0.9.5) +│ ├── typosquat.rs — Jaro-Winkler + suffix boost / Levenshtein / last-segment / package-portion │ ├── version_jump.rs — major-delta >= 2 heuristic -│ ├── maintainer.rs — GitHub REST contributor age -│ └── mod.rs — Enrichment graph aggregating findings -├── baseline.rs — load + apply --baseline JSON snapshots -├── refresh.rs — bomdrift refresh-typosquat subcommand -└── render/ — output formatters - ├── markdown.rs — GFM PR-comment body - ├── term.rs — TTY-aware ANSI - ├── json.rs — pretty-printed serde graph - └── sarif.rs — SARIF v2.1.0 with stable rule IDs +│ ├── maintainer.rs — GitHub REST contributor-age (the xz pattern) +│ ├── cache.rs — single source of truth for CACHE_TTL_SECS (v0.9.6 unified) +│ └── mod.rs — Enrichment graph aggregating findings +└── render/ — output formatters + ├── markdown.rs — GFM PR-comment body + ├── term.rs — TTY-aware ANSI + ├── json.rs — pretty-printed serde graph + └── sarif.rs — SARIF v2.1.0 with stable rule IDs + partialFingerprints ``` ## The pipeline @@ -143,6 +152,50 @@ Result: identical inputs render to byte-identical output every time, which is what `peter-evans/create-or-update-comment` relies on for the upsert behavior in the action. +## Best-effort enricher contract + +Every enricher — network (OSV / EPSS / KEV / GitHub / registries), +shell-out (cosign attestation), or external process (plugins) — honors +the same fail-soft contract: + +1. **Per-request timeout** so a misbehaving upstream can't hang a CI job. +2. **Errors warn once to stderr** (deduped by key) and the diff renders + without that source's findings. +3. **Per-component caching within a single run** so monorepo subpackages + sharing a parent project don't multiply HTTP requests. +4. **Best-effort never blocks the diff render.** Exit code stays 0 from + the enricher itself; the only way an enricher influences exit code is + indirectly via `--fail-on` thresholds tripping on findings it + produced. + +`src/enrich/osv.rs` is the canonical pattern; new enrichers MUST mirror +its `Result>`-where-`Err`-is-warned-not-propagated shape. +The `attestation.rs` and `plugin.rs` modules apply the same contract to +non-network shell-outs: a missing `cosign` binary, a plugin timeout, or +a malformed plugin response all warn and continue. + +## Byte-determinism contract + +Identical inputs MUST render to byte-identical outputs across every +format. This is what `peter-evans/create-or-update-comment` relies on +to upsert a PR comment in place rather than accumulating duplicates, +and what makes SARIF / VEX / JSON safe to commit to git. + +Concretely: + +- All `HashMap`s emitted into output are sorted by key first. +- All `Vec`s populated from `cs.added` / `version_changed` iteration + inherit the diff core's BTreeMap-derived order. +- Every "now" reference goes through `clock::now()`, which honors + `SOURCE_DATE_EPOCH` for reproducible-build contexts and for tests. +- VEX `@id` UUIDs and CycloneDX VEX `bom-ref` strings are deterministic + hashes of the finding tuple, never random. + +Tests that mutate `SOURCE_DATE_EPOCH` MUST acquire `clock::test_env_lock()` +to serialize across the crate's parallel test threads — a v0.9.5 +discovery during the `release/v0.9.5` cleanup. See +[Contributing](./contributing.md#test-conventions-v095) for the recipe. + ## Why no async / tokio? bomdrift is intentionally **synchronous**. The single-binary CLI runs @@ -158,39 +211,55 @@ unique CVEs, at the cost of: The OSV `/v1/querybatch` endpoint already batches (1000 queries per request), so the parallelism we'd want is mostly already there. The N+1 stage-2 `/v1/vulns/{id}` calls are gated by the on-disk severity -cache, which makes reruns within 24h essentially free. +cache, which makes reruns within the configured TTL essentially free. + +Plugin processes (v0.9.6+) are also invoked synchronously: at most +one external child at a time, with a per-component timeout. Parallel +plugin execution would re-introduce the tokio dependency cost without +solving a measured bottleneck. -## Why no chrono / no semver? +## Why no chrono / no semver / no octocrab? Same reasoning. We need: -- **One** ISO-8601 timestamp shape (the canonical `YYYY-MM-DDTHH:MM:SSZ` - GitHub always emits). Hand-rolled parser is ~25 LOC. -- **The major version** of a SemVer string. Hand-rolled extractor is ~5 LOC. -Both pulls would add transitive weight for no functional gain. The -constraint is documented at the top of each file (`enrich/maintainer.rs`, -`enrich/version_jump.rs`) so future contributors don't reflexively -reach for the popular crate. +- **One** ISO-8601 timestamp shape (the canonical `YYYY-MM-DDTHH:MM:SSZ` + GitHub always emits). Hand-rolled parser is ~25 LOC, lives in + `clock.rs`. +- **The major version** of a SemVer string. Hand-rolled extractor is + ~5 LOC in `enrich/version_jump.rs`. +- **GitHub REST**: a small set of endpoints (contributors, commits) + hand-rolled atop `ureq`. `octocrab` would pull in tokio. + +All three pulls would add transitive weight for no functional gain. +The constraint is documented at the top of each affected file so future +contributors don't reflexively reach for the popular crate. + +## Approved dependencies + +As of v0.9.6: + +| Crate | Purpose | Notes | +|---|---|---| +| `clap` | CLI parsing | derive feature only | +| `serde`, `serde_json` | (de)serialization | parse + render | +| `anyhow`, `thiserror` | error types | | +| `ureq` | HTTP | sync, rustls — no tokio | +| `strsim` | typosquat scoring | Jaro-Winkler + Levenshtein | +| `owo-colors`, `supports-color` | terminal renderer | | +| `directories` | XDG paths | | +| `toml` | `.bomdrift.toml` parsing | | +| `time = "0.3.47"` | timestamp formatting | minimal feature set | +| `sha2 = "0.10"` | partialFingerprint hashes (SARIF), VEX `@id` | | +| `spdx = "=0.10.9"` | exact-pinned SPDX expression evaluation | License-policy semantics shift on minor list updates; pin exactly | +| `base64 = "0.22"` | OCI attestation payload decoding (v0.9.6) | | + +Forbidden by policy: `tokio`, `chrono`, `semver`, `octocrab`, +`async-trait`, anything pulling rustls + ring + tokio transitively +beyond what `ureq` already brings. ## Binary size budget - **Target**: ≤ 5 MB stripped + LTO on Linux x86_64. -- **Current** (v0.3.0): ~3.4 MB. -- **Audit**: `cargo bloat --release --crates -n 20` periodically - to confirm no unexpected dep-tree growth. - -The dep tree as of v0.3: - -```text -clap (CLI) -serde + serde_json (parse/render) -anyhow + thiserror (errors) -ureq + rustls + ring (HTTP) -strsim (typosquat) -owo-colors + supports-color (terminal) -directories (XDG paths) -``` - -No tokio, no chrono, no octocrab, no semver, no async-trait. The -constraint is load-bearing: keep the binary small enough that cosign -verification + extraction stay sub-second on cold runners. +- **Current** (v0.9.6): ~3.4 MB. +- **Audit**: `cargo bloat --release --crates -n 20` periodically to + confirm no unexpected dep-tree growth. diff --git a/docs/src/attestation.md b/docs/src/attestation.md new file mode 100644 index 0000000..62b063a --- /dev/null +++ b/docs/src/attestation.md @@ -0,0 +1,230 @@ +# OCI artifact attestation + +bomdrift can verify that the SBOMs it diffs were signed by your +build system before any drift signal is computed. This closes the +"who produced this SBOM?" gap: you already trust the binary you +shipped through SLSA-style signing — the SBOM that describes that +binary's supply chain deserves the same scrutiny. + +Shipped in v0.9.6. The verification path is **opt-in** per flag; +existing file-based diffs (`bomdrift diff before.json after.json`) +are unaffected unless you explicitly pass attestation flags. + +## Overview + +An OCI attestation is a signed in-toto envelope, stored next to a +container image in an OCI registry, that asserts a claim about that +image. bomdrift consumes attestations whose **predicate type** is +`cyclonedx`: the predicate body is a CycloneDX SBOM, which bomdrift +then diffs against another (also-attested) SBOM. + +bomdrift does **not** ship a Sigstore client. It shells out to +[`cosign`](https://github.com/sigstore/cosign), which handles: + +- in-toto envelope signature verification, +- certificate-chain validation against Fulcio, +- transparency-log inclusion proof (Rekor), +- certificate-identity matching against your supplied regex/issuer. + +bomdrift trusts cosign's verdict. If cosign exits 0, bomdrift parses +the verified predicate and feeds it to the diff core. If cosign +exits non-zero, bomdrift surfaces the cosign stderr verbatim and +exits 1. + +### Threat model gap NOT addressed + +bomdrift does not implement Sigstore protocol verification itself. +You are trusting cosign's implementation, the cosign binary on +`PATH`, and whichever Sigstore instance cosign is configured against +(public-good by default; see [Self-managed Sigstore](#self-managed-sigstore-instances)). + +## Prerequisites + +1. **Install cosign.** Follow + . v0.9.6 + was developed and tested against `cosign 2.x`. Pin to a specific + cosign version in your CI image so signature-verification + semantics don't drift across runs. +2. **Push your SBOMs as cyclonedx attestations** on the same OCI + reference as the binary they describe (see next section). + +## Generating attestations + +The canonical guide is [the sigstore docs](https://docs.sigstore.dev/cosign/signing/other_types/); +this section is a sketch. + +```bash +# Produce the SBOM however you do today (Syft, etc.). +syft -o cyclonedx-json > sbom.cdx.json + +# Sign it as an attestation against the same digest. +cosign attest \ + --predicate sbom.cdx.json \ + --type cyclonedx \ + ghcr.io/myorg/myapp@sha256:abc... +``` + +The `--type cyclonedx` flag is the predicate-type matcher bomdrift +filters on. Other predicate types (SPDX, SLSA provenance, custom) +are ignored — see [What's NOT in v0.9.6](#whats-not-in-v096). + +## Verifying with bomdrift + +Pass an OCI reference instead of a local file path via the +attestation flags: + +```bash +bomdrift diff \ + --before-attestation oci://ghcr.io/myorg/myapp@sha256:abc... \ + --after-attestation oci://ghcr.io/myorg/myapp@sha256:def... \ + --cosign-identity '^https://github.com/myorg/.+@refs/tags/v.+$' \ + --cosign-issuer https://token.actions.githubusercontent.com +``` + +### `--before-attestation ` + +OCI reference (with `oci://` scheme) of the "before" image whose +attached cyclonedx attestation is the "before" SBOM. Mutually +exclusive with the positional `` argument; pass one or the +other. + +### `--after-attestation ` + +Same as above, for the "after" SBOM. + +### `--cosign-identity ` + +Required when any `--*-attestation` flag is set. RE2-syntax regex +that the certificate's `subject` Subject Alternative Name must +match. For GitHub Actions OIDC, this is typically the workflow URL +plus a refs constraint, e.g. +`^https://github.com/myorg/myapp/.github/workflows/release\.yml@refs/tags/v.+$`. + +bomdrift passes this to cosign as `--certificate-identity-regexp`. + +### `--cosign-issuer ` + +Required when any `--*-attestation` flag is set. The OIDC issuer +that minted the signing certificate. For GitHub Actions, this is +`https://token.actions.githubusercontent.com`. + +bomdrift passes this to cosign as `--certificate-oidc-issuer`. + +## `--require-attestation` + +Hard-mode flag. When set: + +- Both `--before-attestation` and `--after-attestation` must be + provided. +- Positional `` and `` file arguments are **rejected** + (clap conflict). +- Any cosign verification failure exits 1; there is no fallback to + unverified file inputs. + +Use this on the production-CI gate that blocks releases. In dev +loops where you sometimes diff a local file against a published +attestation, leave `--require-attestation` off and let the operator +mix file inputs with attestation inputs. + +## What bomdrift trusts + +The trust boundaries, made explicit: + +- bomdrift trusts **cosign** to verify the in-toto envelope's + signature, certificate chain, and Rekor inclusion proof. +- bomdrift trusts **cosign** to enforce the certificate identity + regex and OIDC issuer match. +- bomdrift does **not** independently re-verify the Sigstore + transparency log. That is `cosign verify-attestation`'s job. +- bomdrift assumes the predicate-type filter (`--type=cyclonedx`) + is honored by cosign. It is, but the assumption is documented + here so future cosign behavior changes are visible to auditors. +- bomdrift parses the verified predicate as **CycloneDX JSON**. + Anything cosign hands back that doesn't parse as CycloneDX exits + bomdrift with a parse error. + +## Self-managed Sigstore instances + +If you run your own Sigstore stack (private Fulcio + Rekor), cosign +honors the standard Sigstore env vars: + +| Variable | Purpose | +|---|---| +| `COSIGN_REKOR_URL` / `SIGSTORE_REKOR_URL` | Override the public-good Rekor instance. | +| `COSIGN_FULCIO_URL` / `SIGSTORE_FULCIO_URL` | Override Fulcio. | +| `COSIGN_OIDC_ISSUER` | Override the default OIDC issuer probed during signing. | +| `SIGSTORE_ROOT_FILE` | Pin a custom Sigstore TUF root for verification. | + +bomdrift inherits the parent process environment when shelling out +to cosign, so exporting these before invoking `bomdrift diff` is +sufficient. No bomdrift-side flags are needed. + +```bash +export SIGSTORE_REKOR_URL=https://rekor.internal.example.com +bomdrift diff --before-attestation ... --after-attestation ... ... +``` + +This path works in principle but is **not specifically tested in +v0.9.6**; please file an issue with the Sigstore stack you're +running if anything misbehaves. + +## Troubleshooting + +### `executable file not found in $PATH: cosign` + +bomdrift couldn't find cosign on `PATH`. Install per +[Prerequisites](#prerequisites), or set `PATH` so the cosign binary +is reachable from the bomdrift process. + +### `Error: no matching signatures` + +The cosign verification rejected every attached signature. Most +common cause: `--cosign-identity` regex doesn't match the actual +certificate SAN. Debug with cosign directly first: + +```bash +cosign verify-attestation \ + --type cyclonedx \ + --certificate-identity-regexp '' \ + --certificate-oidc-issuer '' \ + ghcr.io/myorg/myapp@sha256:abc... +``` + +If cosign's own output is more revealing, you've isolated the +problem outside bomdrift. + +### `predicate type mismatch` / `no attestations of the requested type` + +The OCI reference has attestations, but none of type `cyclonedx`. +bomdrift only consumes CycloneDX SBOM attestations in v0.9.6 — see +the next section. + +### `Error: parsing CycloneDX: ...` + +cosign verified the envelope but bomdrift couldn't parse the +predicate body as CycloneDX. Inspect the raw predicate by running +the cosign command above with `-o json` and look at +`payload.predicate`. + +## What's NOT in v0.9.6 + +- **SPDX SBOM attestations.** Only CycloneDX. SPDX-attestation + support is a future ask; file an issue if you need it. The + predicate parser is the only piece that needs to grow. +- **Direct Rekor verification.** Deferred to cosign. bomdrift will + not grow a Sigstore client implementation. +- **Air-gapped Sigstore.** The env-var path described above works + in principle (cosign supports it) but isn't part of bomdrift's + v0.9.6 test matrix. Treat it as best-effort. +- **In-process attestation (no shell-out).** Pulling in a + full-fat Sigstore Rust SDK contradicts the OSS-first / + small-dep-tree design constraint. Revisit once a minimal, + audited Rust Sigstore client exists. + +## Related + +- [Plugins](./plugins.md) — for verifying additional org-specific + signals on attested SBOMs. +- [Output formats](./output-formats.md) — verified diffs render + identically to file-based diffs. +- [Roadmap](./roadmap.md) — for the broader v0.9.6 dispositions. diff --git a/docs/src/baseline.md b/docs/src/baseline.md index 48156bd..fc4282d 100644 --- a/docs/src/baseline.md +++ b/docs/src/baseline.md @@ -313,6 +313,49 @@ the `typosquat` array with `version_jump`, key by the after-version's `purl`. Update the entry on the next jump. +## Schema reference + +The unified `BaselineEntry` shape (introduced in v0.9.5; v0.5 string +entries continue to parse as the back-compat case): + +| Field | Type | Required | Introduced | Description | +|---|---|---|---|---| +| `id` | string | yes (when not the bare-string form) | v0.5 | Advisory identifier — `GHSA-…`, `CVE-…`, `MAL-…`, or `OSV-…`. | +| `purl` | string | no | v0.5 | Restrict the suppression to a specific component (otherwise wildcards across all components). May be versionless (`pkg:npm/foo`) or version-pinned (`pkg:npm/foo@1.2.3`). | +| `expires` | string `YYYY-MM-DD` | no | v0.8 | Strict-format expiry date. After this date the entry surfaces a warning and stops suppressing. Malformed dates fail loudly — no silent never-expiring entries. | +| `reason` | string | no | v0.8 | Free-form rationale; surfaces in the expiry warning and as the OpenVEX `statement_text` in `--emit-vex` output. | +| `vex_status` | string | no | v0.9 | One of OpenVEX's vocabulary: `not_affected`, `affected`, `fixed`, `under_investigation`. Drives `--emit-vex` output. Defaults to `under_investigation` so `--emit-vex` doesn't fabricate `not_affected` claims. | +| `vex_justification` | string | no | v0.9 | OpenVEX justification when `vex_status = not_affected`. E.g., `vulnerable_code_not_in_execute_path`, `component_not_present`. | + +Cross-link: `vex_status` and `vex_justification` are passthrough to the +[VEX emit format](./vex.md#emitting-vex). The +[License policy](./license-policy.md#suppression) +chapter covers using baseline entries to suppress `LicenseViolation` +findings (the same `id` / `purl` / `reason` schema applies; license +violations key by a synthetic ID `bomdrift.license-violation:`). + +### Two valid shapes per entry + +The `suppressed_advisories` array accepts either form per entry: + +```json +{ + "suppressed_advisories": [ + "GHSA-old-school", + { + "id": "GHSA-evil-1234", + "purl": "pkg:npm/foo", + "expires": "2026-12-31", + "reason": "Awaiting upstream patch (issue #42)", + "vex_status": "under_investigation" + } + ] +} +``` + +Bare strings remain in the file for v0.5 compatibility; `bomdrift +baseline add --reason …` always emits the object form. + ## Time-boxed suppressions (`expires` + `reason`) v0.8 adds two optional fields on each `suppressed_advisories` entry: diff --git a/docs/src/cli-reference.md b/docs/src/cli-reference.md index 92e48b4..d3f42e7 100644 --- a/docs/src/cli-reference.md +++ b/docs/src/cli-reference.md @@ -2,79 +2,101 @@ This page documents every `bomdrift` subcommand and flag. The authoritative help text is `bomdrift --help` / `bomdrift --help`; this page -groups the same information by behavior so it's easier to look up. +groups the same information by behavior so it's easier to look up. Each +flag carries an *introduced-in* annotation so future readers can reason +about which version a feature first shipped in. ## Subcommands ```text bomdrift diff [OPTIONS] bomdrift init [--config-only] [--force] -bomdrift baseline add [--path ] +bomdrift baseline add [] [--path ] [--expires ] [--reason ] [--from-comment ] bomdrift refresh-typosquat [--ecosystem ] ``` +| Subcommand | Purpose | +|---|---| +| [`diff`](#bomdrift-diff) | Diff two SBOMs and emit findings. The everything-flag. | +| [`init`](#bomdrift-init) | Scaffold `.bomdrift.toml` + GitHub workflows. | +| [`baseline add`](#bomdrift-baseline-add) | Append an advisory ID to a baseline file. | +| [`refresh-typosquat`](#bomdrift-refresh-typosquat) | Re-fetch the bundled top-package lists. | + ## `bomdrift diff` Diff two SBOMs and surface supply-chain risk signals on changed components. ### Positional arguments -- `` — path to the "before" SBOM (CycloneDX 1.5/1.6, SPDX 2.3, or Syft JSON). -- `` — path to the "after" SBOM. +- `` — path to the "before" SBOM (CycloneDX 1.5/1.6, SPDX 2.3, or + Syft JSON). Optional when `--before-attestation` is set instead. +- `` — path to the "after" SBOM. Optional when + `--after-attestation` is set instead. -### Output flags +### Output formats #### `--output ` +*Introduced in v0.1.* Output format. One of: -- `terminal` — ANSI-colored tree-style output for human consumption. The - default when stdout is a TTY. +- `terminal` — ANSI-colored tree-style output. Default when stdout is a TTY. - `markdown` — GitHub-Flavored Markdown ready for PR-comment posting. - The default when stdout is piped/redirected. -- `json` — pretty-printed `{"changes": ..., "enrichment": ...}` graph - for downstream tooling. + Default when stdout is piped/redirected. +- `json` — pretty-printed `{"changes": ..., "enrichment": ...}` graph. - `sarif` — SARIF v2.1.0 for GitHub Code Scanning ingestion. -#### `--format ` +Config key: `[diff] output`. -Force input format detection. One of `auto` (default), `cdx`, `spdx`, `syft`. +#### `--output-file ` +*Introduced in v0.8.* -`auto` looks at the JSON top-level fields to dispatch (`bomFormat` for -CycloneDX, `spdxVersion` for SPDX, `schema` for Syft). Force-pinning is -useful when an SBOM lacks the canonical magic markers. +Write the chosen `--output` format to this path instead of stdout. Useful +for SARIF (`--output sarif --output-file bomdrift.sarif`) where YAML +quoting `>` redirection is fragile in CI templates. + +#### `--format ` +*Introduced in v0.1.* + +Force input format detection. `auto` (default) / `cdx` / `spdx` / `syft`. +`auto` looks at the JSON top-level fields to dispatch. #### `--summary-only` +*Introduced in v0.3.* Markdown-only. -Markdown-only. Emits just the summary table + a footer pointing at the -full output. Used by the action's comment-size fallback when the full -diff exceeds GitHub's 65,536-char comment-body cap. +Emits just the summary table + a footer pointing at the full output. Used +by the action's comment-size fallback when the full diff exceeds GitHub's +65,536-char comment-body cap. #### `--findings-only` +*Introduced in v0.6.* Markdown-only. + +Keeps the summary table and risk-bearing sections (vulnerabilities, +typosquats, version jumps, young maintainers, license changes, +registry-metadata findings) but omits raw Added / Removed / Version +changed detail tables. Useful when a PR intentionally updates a large +lockfile and reviewers only want the actionable findings inline. -Markdown-only. Keeps the summary table and risk-bearing sections -(vulnerabilities, typosquats, version jumps, young maintainers, license -changes) but omits raw Added / Removed / Version changed detail tables. -This is useful when a PR intentionally updates a large lockfile and -reviewers only want the actionable findings inline. +#### `--include-file-components` +*Introduced in v0.5.* -The counts still appear in the summary table, so churn is visible even -when the long per-dependency rows are hidden. +Keep `Ecosystem::Other("file")` pseudo-components emitted by Syft's +directory cataloger. Off by default — those produce phantom Added/Removed +pairs that drown real package changes. ### Repo policy config #### `--config ` +*Introduced in v0.6.* -Load defaults from a `.bomdrift.toml` policy file. When omitted, -`bomdrift diff` auto-loads `.bomdrift.toml` from the current working -directory if it exists; missing default config is ignored. An explicit -`--config` path must exist and parse. +Load defaults from a `.bomdrift.toml` policy file. When omitted, an +existing `.bomdrift.toml` in the current working directory is loaded +automatically; missing default config is ignored. An explicit `--config` +path must exist and parse. -CLI flags override config values for one-off runs. Positive booleans in -config, such as `findings_only = true`, turn the behavior on; v0.6 does -not add parallel `--no-*` flags to turn those booleans off from the CLI. +CLI flags override config values for one-off runs. -Example: +Example `.bomdrift.toml`: ```toml [diff] @@ -83,16 +105,193 @@ baseline = ".bomdrift/baseline.json" findings_only = true max_added = 25 max_version_changed = 10 + +# Calibration knobs (v0.9.6+) +typosquat_similarity_threshold = 0.92 +young_maintainer_days = 90 +cache_ttl_hours = 24 + +[license] +allow = ["Apache-2.0", "MIT", "BSD-*"] +deny = ["GPL-*", "AGPL-*"] +allow_exceptions = ["LLVM-exception", "Classpath-exception-2.0"] ``` -Supported `[diff]` keys map to the CLI flags: `output`, `format`, -`no_osv`, `no_osv_cache`, `baseline`, `no_maintainer_age`, `fail_on`, -`summary_only`, `findings_only`, `include_file_components`, `repo_url`, -`platform`, `max_added`, `max_removed`, and `max_version_changed`. +### Suppression + +#### `--baseline ` +*Introduced in v0.5.* + +Path to a JSON snapshot whose findings should be suppressed from this +run's output (and from the `--fail-on` trip evaluation). Match keys are +conservative — a finding at a different version than baseline still +surfaces. See [Baseline & suppression](./baseline.md) for the schema and +match-key semantics. + +#### `--vex ` +*Introduced in v0.9.* Repeatable. + +Path(s) to VEX (Vulnerability Exploitability eXchange) files to consume. +Each file is auto-detected as either OpenVEX 0.2.0 or CycloneDX VEX 1.6. +Statements with status `not_affected` / `fixed` suppress matching +findings; `under_investigation` annotates without suppressing; +`affected` annotates as a no-op badge. See [VEX](./vex.md) for the +finding-id matching rules including the synthetic-id convention. + +#### `--emit-vex ` +*Introduced in v0.9.* + +Emit a single OpenVEX 0.2.0 doc covering every finding in the +post-baseline diff. Baseline-suppressed entries inherit their +`vex_status` from the baseline entry (defaulting to +`under_investigation`); un-suppressed findings emit as `affected`. + +#### `--vex-author ` +*Introduced in v0.9.* + +VEX `author` for `--emit-vex`. Falls back to `repo_url`, then to +`"bomdrift"`. + +#### `--vex-default-justification ` +*Introduced in v0.9.* + +Default OpenVEX `justification` written into emitted statements when +the source baseline entry doesn't supply one. Defaults to +`"vulnerable_code_not_in_execute_path"`. + +### Enrichment toggles + +Each of these disables one enricher entirely (no network, no cache +lookup). All default to **on**. + +| Flag | Disables | Introduced | +|---|---|---| +| `--no-osv` | OSV.dev CVE lookup | v0.1 | +| `--no-osv-cache` | The 24h on-disk OSV severity cache only — keeps OSV enabled | v0.3 | +| `--no-maintainer-age` | GitHub-REST maintainer-age enricher | v0.2 | +| `--no-epss` | FIRST.org EPSS enricher | v0.8 | +| `--no-kev` | CISA KEV enricher | v0.8 | +| `--no-registry` | Registry-metadata enrichers (npm/PyPI/crates.io) | v0.9 | + +#### `--recently-published-days ` +*Introduced in v0.9.* + +Recently-published threshold in days for the registry enricher +(default `14`). Set to `0` to disable that specific kind without +disabling the other registry checks. + +### Calibration + +bomdrift's heuristic enrichers ship with conservative defaults that work +for most repos. When the defaults aren't right at scale, every threshold +is tunable either through `[diff]` keys in `.bomdrift.toml` or via the +matching CLI flag. CLI flags override config values for one-off runs. + +#### `--typosquat-similarity-threshold ` +*Introduced in v0.9.6.* + +Type: float in `[0.0, 1.0]`. Default: `0.92`. +Config key: `[diff] typosquat_similarity_threshold`. + +Minimum normalized edit-distance similarity between a candidate package +name and a top-list entry before bomdrift flags it as a possible +typosquat. Higher values = stricter (fewer false positives, more false +negatives). Lowering to `0.85` catches softer near-misses; raising to +`0.95` only catches one- or two-character swaps on short names. + +#### `--young-maintainer-days ` +*Introduced in v0.9.6.* + +Type: positive integer (days). Default: `90`. +Config key: `[diff] young_maintainer_days`. + +A package's top contributor whose oldest commit is newer than this many +days is flagged as a young-maintainer signal. Defaults to a quarter; +raise to `180` for stricter ecosystems, lower to `30` for tighter +signals. + +#### `--cache-ttl-hours ` +*Introduced in v0.9.6.* + +Type: positive integer (hours). Default: `24`. +Config key: `[diff] cache_ttl_hours`. + +Time-to-live for the OSV / EPSS / KEV / registry-metadata caches under +`/bomdrift/`. The same TTL applies to all four caches +(v0.9.6 unified the previously duplicated constants). Lower to `1` for +fast-changing security feeds in long-running self-hosted runners; raise +to `168` (one week) when running offline. + +### License policy + +#### `--allow-licenses ` / `--deny-licenses ` +*Introduced in v0.8.* Comma-separated, repeatable. + +SPDX license identifiers (or `*`-suffix globs) permitted / forbidden by +policy. Deny wins when a license matches both. CLI flag takes precedence +over `[license] allow` / `deny` in `.bomdrift.toml` (override, not +merge). v0.9 adds full SPDX expression evaluation via the `spdx` crate +so compound expressions like `(MIT OR GPL-3.0)` evaluate correctly. + +#### `--allow-exception ` / `--deny-exception ` +*Introduced in v0.9.5.* Comma-separated, repeatable. + +SPDX exception identifiers (e.g. `LLVM-exception`, +`Classpath-exception-2.0`) permitted / forbidden as the right-hand side +of a `WITH` clause. When set, `Apache-2.0 WITH ` violates policy +even if `Apache-2.0` is on the base allow list. Empty lists preserve +v0.9 behavior (exception treated as informational). + +#### `--allow-ambiguous-licenses` +*Introduced in v0.8.* + +When set, compound SPDX expressions like `(MIT OR GPL-3.0)` are +permitted. Off by default — fail-closed. + +See [License policy](./license-policy.md) for the full evaluation +semantics. + +### Failure thresholds + +#### `--fail-on ` +*Introduced in v0.2; expanded across v0.4 / v0.8 / v0.9.* + +Exit with code 2 when findings of the configured threshold surface. One of: + +- `none` — never trips (default). +- `cve` — trips on any CVE / GHSA / MAL advisory finding. +- `critical-cve` — trips when at least one finding has `severity >= High` + per the OSV-fetched severity. (Naming kept for back-compat — covers + the HIGH-or-CRITICAL bucket; HIGH alone is the common + actively-exploited case.) +- `typosquat` — trips on any typosquat finding. +- `license-change` — trips on same-version license changes. +- `kev` — trips on any advisory in the CISA KEV catalog (v0.8+). +- `recently-published` / `deprecated` — registry-metadata finding gates (v0.9+). +- `any` — trips on any finding. + +The PR-comment body is written to stdout **before** exit-2 — the +action's `tee` + `PIPESTATUS` wrapper relies on this so the comment +posts even when the workflow step fails. + +#### `--fail-on-epss ` +*Introduced in v0.8.* + +Trip exit-2 when any advisory's EPSS score is >= this threshold +(`0.0` – `1.0`). Recommended starting point: `0.5` (top decile of +actively-exploited CVEs). + +#### Diff budgets + +`--max-added `, `--max-removed `, and `--max-version-changed ` +fail the run with exit code 2 when a diff exceeds the configured +dependency-churn budget. Introduced in v0.4. The rendered body is still +written before exit, just like `--fail-on`. -### Forge / CI integration +### Forge integration #### `--platform ` +*Introduced in v0.7; expanded in v0.9 (Bitbucket / Azure DevOps).* `github` (default), `gitlab`, `bitbucket`, or `azure-devops`. Drives the rendered markdown comment's footer: @@ -101,86 +300,111 @@ the rendered markdown comment's footer: comment-driven flow (requires the [comment-suppress sub-action](./baseline.md#in-comment-suppression-v05)). - `gitlab` — `/-/issues/new?issuable_template=false-positive` URL - shape, points reviewers at `bomdrift baseline add ` (with an - optional advanced webhook bridge for in-comment suppression — see - [GitLab CI](./gitlab-ci.md)). -- `bitbucket` — `/issues/new` URL shape, `bomdrift baseline add ` - manual suppression flow. + shape; manual `bomdrift baseline add ` flow or the optional + Cloudflare Worker bridge for in-comment suppression. See + [GitLab CI](./gitlab-ci.md). +- `bitbucket` — `/issues/new` URL shape; comment-bridge in v0.9.5+. + See [Bitbucket](./bitbucket.md). - `azure-devops` — `/_workitems/create?templateName=false-positive` - URL shape, `bomdrift baseline add ` manual suppression flow. + URL shape; comment-bridge in v0.9.5+. See + [Azure DevOps](./azure-devops.md). When the flag is omitted, bomdrift auto-detects from CI environment variables in this order: `GITLAB_CI=true` → GitLab, `BITBUCKET_BUILD_NUMBER` → Bitbucket, `TF_BUILD` → Azure DevOps, -otherwise GitHub. The explicit flag always wins. Also configurable -via `[diff] platform = ""` in `.bomdrift.toml`. +otherwise GitHub. The explicit flag always wins. Also configurable via +`[diff] platform = ""` in `.bomdrift.toml`. Set in lockstep with `--repo-url` (or `BOMDRIFT_REPO_URL`, or — on GitLab CI — `CI_PROJECT_URL`). Without a URL the footer is omitted entirely; the platform flag controls only the footer's *shape*. -See [GitLab CI](./gitlab-ci.md) for the full template. +#### `--repo-url ` +*Introduced in v0.5.* -### Enrichment flags +Repository URL used to render the markdown comment's +action-affordance footer. Falls back to `BOMDRIFT_REPO_URL` env var. -#### `--no-osv` +### Attestation (OCI-fetched SBOMs) -Skip OSV.dev CVE enrichment entirely. Use for offline runs and tests. -Equivalent to `--fail-on=cve` not tripping (no vulns to trip on). +*All flags in this section introduced in v0.9.6.* See +[OCI attestation](./attestation.md) for end-to-end usage. -#### `--no-osv-cache` +#### `--before-attestation ` -Bypass the on-disk OSV severity cache at `/bomdrift/osv/`. -Use for paranoid reruns where you want fresh fetches even within the -24h TTL window. The cache is purely an optimization — `--no-osv-cache` -always works. +Fetch the "before" SBOM as a `cosign verify-attestation`-verified +attestation attached to an OCI artifact instead of reading a local +file. Mutually exclusive with the positional `before` argument. +Requires `--cosign-identity` and `--cosign-issuer`. -#### `--no-maintainer-age` +#### `--after-attestation ` -Skip the maintainer-age enricher (no GitHub API calls). Use for offline -runs and tests; required when `GITHUB_TOKEN` is unset and the -unauthenticated rate limit (60/hr) is too low for the diff being -analyzed. +Same, for the "after" side. Mutually exclusive with the positional +`after` argument. -### Failure thresholds +#### `--cosign-identity ` -#### `--fail-on ` +Regex passed to `cosign verify-attestation +--certificate-identity-regexp`. Required when either +`--before-attestation` or `--after-attestation` is set. Example: +`https://github.com/owner/.+`. -Exit with code 2 when findings of the configured threshold surface. One of: +#### `--cosign-issuer ` -- `none` — never trips (default). -- `cve` — trips on any CVE / GHSA / MAL advisory finding. -- `critical-cve` — trips when at least one finding has `severity >= High` - per the OSV-fetched severity. The "critical" name covers the - HIGH-or-CRITICAL bucket; CRITICAL alone is rare in GHSA tagging, and - many actively-exploited advisories ship as HIGH. -- `typosquat` — trips on any typosquat finding (always `severity = none`, - but the threshold lets you gate on the structural signal). -- `license-change` — trips on same-version license changes. -- `any` — trips on any finding (CVE, typosquat, version-jump, - maintainer-age) OR any license-changed-without-version-bump. +URL passed to `cosign verify-attestation --certificate-oidc-issuer`. +Required alongside `--cosign-identity`. Example: +`https://token.actions.githubusercontent.com`. -The PR-comment body is written to stdout **before** exit-2 — the action's -`tee` + `PIPESTATUS` wrapper relies on this so the comment posts even -when the workflow step fails. +#### `--require-attestation` -#### Diff budgets +Refuse to fall back to local-file SBOMs: both sides MUST come from a +verified OCI attestation. Implies `--before-attestation` and +`--after-attestation` are both set. -`--max-added `, `--max-removed `, and -`--max-version-changed ` fail the run with exit code 2 when a diff -exceeds the configured dependency-churn budget. The rendered body is -still written before exit, just like `--fail-on`, so GitHub Actions can -post the PR comment and then block the merge. +### Plugins -#### `--baseline ` +#### `--plugin ` +*Introduced in v0.9.6.* Repeatable. + +Path to a plugin manifest TOML. Each plugin is an external executable +invoked once per added / version-changed component with JSON over +stdin/stdout. Plugin failures (timeout, non-zero exit, malformed JSON) +drop their findings without failing the diff. See [Plugins](./plugins.md) +for the protocol reference and a worked example. + +### Diagnostics + +#### `--debug-calibration` +*Introduced in v0.7.* + +Off by default. When set, `bomdrift diff` writes one row to stderr per +finding it considers, with the schema: + +``` +kind|key|score|threshold +``` + +`kind` is one of `typosquat`, `version-jump`, `maintainer-age`, `cve`, +`recently-published`, `deprecated`, `maintainer-set-changed`. `key` is +a stable identifier (the package purl, advisory ID, etc.). `score` and +`threshold` are the numeric inputs to the gating decision. + +The flag is purely diagnostic — it doesn't change which findings get +rendered. Pipe to a file: -Path to a previously captured `bomdrift diff --output json` snapshot. -Findings present in the baseline are suppressed from the rendered output -and from the `--fail-on` trip-evaluation. Match keys are conservative — -a finding at a different version than baseline still surfaces. See -[Baseline & suppression](./baseline.md) for full match-key semantics. +```bash +bomdrift diff old.cdx.json new.cdx.json --debug-calibration 2> calibration.tsv +``` + +#### `--debug-calibration-format ` +*Introduced in v0.8.* + +`pipe` (default, back-compat with v0.7) emits `kind|key|score|threshold` +per line; `jsonl` emits one JSON object per line for downstream tooling +that doesn't want to maintain a custom CSV-ish parser. ## `bomdrift init` +*Introduced in v0.6.* Scaffold a copy-paste adoption setup in the current repository: @@ -188,7 +412,7 @@ Scaffold a copy-paste adoption setup in the current repository: bomdrift init ``` -This writes: +Writes: - `.bomdrift.toml` - `.github/workflows/sbom-diff.yml` @@ -200,65 +424,58 @@ Flags: - `--force` — overwrite existing generated files. Without `--force`, existing files are preserved and the command fails loudly. -## `bomdrift refresh-typosquat` - -Refresh the bundled typosquat top-package lists from upstream sources. - -### Flags +## `bomdrift baseline add` +*Introduced in v0.5; `--expires`/`--reason` v0.8; `--from-comment` v0.9.* -#### `--ecosystem ` +Append an advisory ID to a baseline file's `suppressed_advisories` list. +The file is created if missing; existing fields are preserved. Idempotent +(re-adding an existing ID is a no-op). -Which ecosystem's list to refresh. One of: - -- `all` — refresh every ecosystem with a wired-up fetcher (default). - Expands to all eight supported ecosystems as of v0.4. -- `npm` — top-1000 from the anvaka/npmrank gist. -- `pypi` — top-200 from hugovk/top-pypi-packages. -- `cargo` — top-200 from the crates.io API (paginated, polite 1 req/s). -- `nuget` — top-200 from the nuget.org v3 search API - (`orderby=totalDownloads&take=200`). No pagination at this list size. -- `maven` / `go` / `gem` / `composer` — accepted but no-op. Each - ecosystem lacks a stable public popularity feed; the curated - `data/-top*.txt` snapshots shipped in the binary remain the - source of truth. Refreshing those means editing the file and - rebuilding. +```bash +bomdrift baseline add GHSA-xxxx-yyyy-zzzz +bomdrift baseline add CVE-2026-12345 --path custom/baseline.json +bomdrift baseline add GHSA-evil-1234 \ + --expires 2026-12-31 \ + --reason "Awaiting upstream patch (issue #42)" +``` -Refreshed lists are written to `/bomdrift/typosquat/.txt` -via temp-file + atomic rename. The typosquat enricher prefers cache files -over the embedded snapshot when present and parseable. +### Flags -## Calibration +- `` — advisory identifier (GHSA/CVE/MAL/OSV). Optional when + `--from-comment` is supplied. +- `--path ` — baseline file path. Default + `.bomdrift/baseline.json`. +- `--expires ` — strict-format expiry; bomdrift refuses + malformed dates (no silent never-expiring entries). +- `--reason ` — free-form rationale; surfaces in VEX exports + + expiry warnings. +- `--from-comment ` — parse a `/bomdrift suppress [reason: ...]` + directive from a forge-issued comment body. Used by the GitLab / + Bitbucket / Azure DevOps comment-bridge Workers. Exits non-zero on + no-match so a webhook never silently no-ops. -#### `--debug-calibration` +## `bomdrift refresh-typosquat` +*Introduced in v0.4.* -Off by default. When set, `bomdrift diff` writes one -pipe-delimited line to stderr per finding it considers, with the -schema: +Refresh the bundled typosquat top-package lists from upstream sources. -``` -kind|key|score|threshold +```bash +bomdrift refresh-typosquat # all wired-up ecosystems +bomdrift refresh-typosquat --ecosystem pypi # one specific list ``` -`kind` is one of `typosquat`, `version-jump`, `maintainer-age`, or -`cve`. `key` is a stable identifier (the package purl, advisory ID, -etc.). `score` and `threshold` are the numeric inputs to the -gating decision — for `cve` the score column carries the severity -bucket label rather than a numeric CVSS score (bomdrift doesn't -parse CVSS numerically). - -Pipe-delimited because purls contain commas. The flag is purely -diagnostic — it doesn't change which findings get rendered. Pipe -to a file: +#### `--ecosystem ` -```bash -bomdrift diff old.cdx.json new.cdx.json --debug-calibration 2> calibration.tsv -``` +Which ecosystem's list to refresh. One of `all` (default), `npm`, +`pypi`, `cargo`, `nuget`, `maven`, `go`, `gem`, `composer`. The first +four fetch from canonical upstream feeds; the remaining four are +curated `data/-top*.txt` snapshots and `--ecosystem ` for +those emits a notice rather than fetching. -If you collect a calibration sample across many PRs and have a -hunch on a better default for `SIMILARITY_THRESHOLD` / -`YOUNG_MAINTAINER_DAYS`, please share on issue -[#5](https://github.com/Metbcy/bomdrift/issues/5) — there is no -telemetry; you own the file. +Refreshed lists are written to +`/bomdrift/typosquat/.txt` via temp-file + atomic +rename. The typosquat enricher prefers cache files over the embedded +snapshot when present and parseable. ## Exit codes @@ -267,16 +484,20 @@ telemetry; you own the file. | 0 | Success. | | 1 | bomdrift internal error (parse failure, network mishap not gated by best-effort path, etc.). | | 2 | `--fail-on` threshold or diff budget tripped. The body is still on stdout — the action posts it before propagating the exit code. | -| (clap 2) | Usage error from clap (unknown flag, missing required argument). Distinguishable from exit-2 from `--fail-on` by stderr containing `error: ...` rather than the v0.2 caveat warning. | +| (clap 2) | Usage error from clap (unknown flag, missing required argument). Distinguishable from `--fail-on`-driven exit 2 by stderr containing `error: ...` rather than the rendered body. | ## Environment variables -| Variable | Purpose | -|---|---| -| `GITHUB_TOKEN` | Bumps the GitHub REST rate limit from 60/hr unauth to 5000/hr authenticated, used by the maintainer-age enricher. | -| `BOMDRIFT_REPO_URL` | Fallback for `--repo-url` when the flag isn't passed. Used to render the comment footer's "Report this finding" / "Suppress" links. | -| `GITLAB_CI` | When `true`, auto-selects `--platform gitlab` (unless overridden). | -| `CI_PROJECT_URL` | On GitLab CI, used as a final fallback for `--repo-url` after `BOMDRIFT_REPO_URL`. | -| `XDG_CACHE_HOME` | Cache root for the OSV severity cache and the refreshed typosquat lists. Defaults to `~/.cache` on Linux. | -| `NO_COLOR` | Honored by the terminal renderer; falls back to plain output. | -| `CLICOLOR_FORCE` | Honored by the terminal renderer; forces ANSI even on a non-TTY. | +| Variable | Purpose | Introduced | +|---|---|---| +| `GITHUB_TOKEN` | Bumps the GitHub REST rate limit from 60/hr unauth to 5000/hr authenticated, used by the maintainer-age enricher. | v0.2 | +| `BOMDRIFT_REPO_URL` | Fallback for `--repo-url` when the flag isn't passed. | v0.5 | +| `GITLAB_CI` | When `true`, auto-selects `--platform gitlab` (unless overridden). | v0.7 | +| `BITBUCKET_BUILD_NUMBER` | When set, auto-selects `--platform bitbucket`. | v0.9 | +| `TF_BUILD` | When set, auto-selects `--platform azure-devops`. | v0.9 | +| `CI_PROJECT_URL` | On GitLab CI, used as a final fallback for `--repo-url` after `BOMDRIFT_REPO_URL`. | v0.7 | +| `XDG_CACHE_HOME` | Cache root for OSV / EPSS / KEV / registry caches and refreshed typosquat lists. Defaults to `~/.cache` on Linux. | v0.1 | +| `SOURCE_DATE_EPOCH` | When set, used as "now" for byte-deterministic output (baseline-expiry comparisons, VEX timestamps, etc.). | v0.9 | +| `NO_COLOR` | Honored by the terminal renderer. | v0.1 | +| `CLICOLOR_FORCE` | Forces ANSI even on a non-TTY. | v0.1 | +| `BOMDRIFT_DEBUG` | When `1`, enables verbose stderr notes from best-effort enrichers. | v0.8 | diff --git a/docs/src/enrichers/epss.md b/docs/src/enrichers/epss.md index 3e644b2..6f8a97c 100644 --- a/docs/src/enrichers/epss.md +++ b/docs/src/enrichers/epss.md @@ -29,6 +29,12 @@ bomdrift diff before.json after.json --fail-on-epss 0.5 Exits 2 when any advisory has score ≥ 0.5. 0.5 is roughly the top decile of actively-exploited CVEs; tune for your team's risk appetite. +## Calibration + +- `--cache-ttl-hours ` (v0.9.6+) — overrides the default 24h disk + cache TTL for the EPSS scores cache. +- `--fail-on-epss ` — threshold gate; see [Threshold gating](#threshold-gating). + ## Disabling ```bash diff --git a/docs/src/enrichers/kev.md b/docs/src/enrichers/kev.md index 5e65de9..f628151 100644 --- a/docs/src/enrichers/kev.md +++ b/docs/src/enrichers/kev.md @@ -25,6 +25,15 @@ bomdrift diff before.json after.json --fail-on kev Exits 2 when any advisory has its KEV flag set. `--fail-on any` also includes KEV. +## Calibration + +### `--cache-ttl-hours ` (v0.9.6+) + +The 24h TTL for the catalog file is now configurable via the unified +cache-TTL knob. Lower for faster CISA-update propagation in +long-running self-hosted runners; raise when running offline or against +archived SBOMs. + ## Disabling ```bash diff --git a/docs/src/enrichers/maintainer-age.md b/docs/src/enrichers/maintainer-age.md index 0425f5b..a0e9f2f 100644 --- a/docs/src/enrichers/maintainer-age.md +++ b/docs/src/enrichers/maintainer-age.md @@ -18,15 +18,16 @@ their first PR less than 90 days ago" at the moment a new dep is added. ## Threshold -**90 days.** Intentionally aggressive: most legitimate new packages will -trip this on initial introduction. That's fine — a human reviewer can -dismiss "the package is brand-new and the author is its only maintainer" -trivially. +**90 days** by default. Intentionally aggressive: most legitimate new +packages will trip this on initial introduction. That's fine — a human +reviewer can dismiss "the package is brand-new and the author is its +only maintainer" trivially. The expensive miss is the **silent takeover** of an existing package by a recently-arrived contributor, which is what the 90-day window -captures. Tune later if the false-positive rate is unworkable in -practice. +captures. Tune for your environment via `--young-maintainer-days ` +or `[diff] young_maintainer_days = ` (v0.9.6+); see +[Calibration](#calibration) below. ## How it works @@ -43,9 +44,10 @@ For each `cs.added` component with a GitHub `source_url`: "first commit by author" pagination trick is slow on prolific contributors (last page can be page 50+) but is correct without needing the GraphQL API. -5. **Compare against the SBOM-after timestamp** (or `Utc::now()` when - the SBOM lacks a metadata timestamp). Flag when the first commit - is younger than `YOUNG_MAINTAINER_DAYS = 90`. +5. **Compare against the SBOM-after timestamp** (or `clock::now()` when + the SBOM lacks a metadata timestamp). Flag when the first commit is + younger than `YOUNG_MAINTAINER_DAYS` (default 90; tunable via + `--young-maintainer-days ` in v0.9.6+). ## Skipped cases @@ -73,9 +75,31 @@ requests. transitive crates. Hand-rolled `ureq` GETs + a 25-line ISO-8601 parser keep the bomdrift binary under our 5 MB target. -## `--no-maintainer-age` +## Calibration -Skip the entire enricher (no GitHub API calls). Required for: +`--young-maintainer-days ` (CLI; v0.9.6+) or `[diff] +young_maintainer_days = ` in `.bomdrift.toml` overrides the 90-day +default. Must be `>= 1`. + +Recommended ranges: + +- `30`–`60` for paranoid security-sensitive monorepos. +- `90` (default) for general-purpose use; the calibration target for + the xz pattern. +- `180` for ecosystems with high contributor churn where the default + surfaces too many legitimate first-time-author packages. + +The threshold also appears in `--debug-calibration` rows so collected +samples can guide tuning: + +``` +maintainer-age|||90 +``` + +## Disabling + +`--no-maintainer-age` skips the entire enricher (no GitHub API calls). +Required for: - Offline runs and tests. - CI environments where `GITHUB_TOKEN` is unset and the @@ -92,21 +116,24 @@ bomdrift diff before.json after.json --no-maintainer-age Always informational. The maintainer-age signal **never** trips `--fail-on critical-cve`; it surfaces only under `--fail-on any`. The intent is for human review, not gating: many legitimate packages have -brand-new authors, and the 90-day threshold is calibrated to surface -the xz-style pattern, not to fail the build automatically. - -## Calibration roadmap - -The 90-day threshold is the v0.3 default. Future calibration possibilities: - -- **Tunable threshold flag** — `--maintainer-age-days `. Trivial to - add; gated on someone wanting it (issue welcome). -- **Multi-signal fusion** — combine maintainer-age with "package has - zero downloads" or "package was published yesterday" to narrow the - false-positive rate. -- **GraphQL pagination** — replace the REST `last-page` trick with a - GraphQL query that returns the first commit's date directly. Cuts - one round-trip per component but adds a token requirement (the - GraphQL endpoint always wants auth). +brand-new authors, and the threshold is calibrated to surface the +xz-style pattern, not to fail the build automatically. + +## Calibration roadmap (v0.9.6+ status) + +Past calibration backlog and how each item resolved: + +- **Tunable threshold flag** — *shipped in v0.9.6* as + `--young-maintainer-days `. See [Calibration](#calibration) above. +- **Multi-signal fusion** — combine maintainer-age with the registry + enricher's "recently-published" or "maintainer-set-changed" findings + to narrow the false-positive rate. The signals all surface in the + same diff today; explicit fusion in a single composite finding is a + v1.0 follow-up. +- **GraphQL pagination** — *decided not to pursue.* Adds a token + requirement (the GraphQL endpoint always wants auth) for one + saved round-trip per component. The `last-page` REST trick is + documented as the canonical approach; see the module doc-comment in + `src/enrich/maintainer.rs` for the rationale. See [Roadmap](../roadmap.md) for the current backlog. diff --git a/docs/src/enrichers/overview.md b/docs/src/enrichers/overview.md index 8151376..ddde47b 100644 --- a/docs/src/enrichers/overview.md +++ b/docs/src/enrichers/overview.md @@ -9,12 +9,17 @@ without that enricher's findings. ## Shipping enrichers -| Enricher | Source | Network? | Default | Opt-out flag | -|---|---|---|---|---| -| [OSV.dev CVE lookup](./osv-cve.md) | OSV.dev `/v1/querybatch` + `/v1/vulns/{id}` | yes | on | `--no-osv` | -| [Typosquat](./typosquat.md) | Embedded top-N lists, optional XDG cache | no | on | (none — pure compute) | -| [Multi-major version jump](./version-jump.md) | The diff itself | no | on | (none — pure compute) | -| [Maintainer age](./maintainer-age.md) | GitHub REST `/repos/.../contributors` + `/commits` | yes | on | `--no-maintainer-age` | +| Enricher | Source | Network? | Default | Opt-out flag | Calibration | +|---|---|---|---|---|---| +| [OSV.dev CVE lookup](./osv-cve.md) | OSV.dev `/v1/querybatch` + `/v1/vulns/{id}` | yes | on | `--no-osv` | `--cache-ttl-hours` (v0.9.6) | +| [EPSS](./epss.md) | FIRST.org `/api/v1/epss` | yes | on | `--no-epss` | `--cache-ttl-hours`; `--fail-on-epss <0.0–1.0>` | +| [CISA KEV](./kev.md) | CISA known-exploited catalog | yes | on | `--no-kev` | `--cache-ttl-hours`; `--fail-on kev` | +| [Typosquat](./typosquat.md) | Embedded top-N lists, optional XDG cache | no | on | (none — pure compute) | `--typosquat-similarity-threshold` (v0.9.6) | +| [Multi-major version jump](./version-jump.md) | The diff itself | no | on | (none — pure compute) | (hard-coded `MIN_MAJOR_DELTA = 2` — see chapter for rationale) | +| [Maintainer age](./maintainer-age.md) | GitHub REST `/repos/.../contributors` + `/commits` | yes | on | `--no-maintainer-age` | `--young-maintainer-days` (v0.9.6) | +| [Registry metadata](./registry.md) | npm / PyPI / crates.io public APIs | yes | on (v0.9+) | `--no-registry` | `--recently-published-days`; `--cache-ttl-hours` | +| [License policy](../license-policy.md) | SBOM `licenses` field + SPDX expression eval | no | on | (configured by allow/deny lists) | `--allow-licenses`, `--deny-licenses`, `--allow-exception`, `--deny-exception` | +| [Plugins](../plugins.md) | External-process plugins (v0.9.6+) | varies | off (opt-in) | (don't pass `--plugin`) | (per-plugin manifest) | ## Best-effort contract @@ -47,17 +52,31 @@ upsert PR comments in place: identical inputs render to byte-identical output, so the comment body is patched only when the diff genuinely changes. -## Why these four signals? +## Why these signals? -The four enrichers were chosen because each maps to a real, recent, +The enricher set was chosen because each maps to a real, recent, high-impact incident class: -- **OSV.dev CVE lookup**: published advisories. +- **OSV.dev CVE lookup**: published advisories, the broadest signal. +- **EPSS**: probability of exploitation in next 30 days; dampens + false-urgency on Critical-CVSS-but-low-exploitation advisories. +- **CISA KEV**: known-exploited; the highest-confidence "act now" filter. - **Typosquat**: malicious packages mimicking popular ones (the `plain-crypto-js` axios dropper, the PyPI campaigns 2024–2026). - **Multi-major version jump**: takeover swaps, namespace reuse. - **Maintainer age**: long-game social-engineering campaigns (xz / Jia Tan). +- **Registry metadata**: recently-published, deprecated, + maintainer-set-changed — the npm Shai-Hulud-style worm precursors. +- **License policy**: not a malicious-code signal but a policy gate that + the same diff-time reviewer is best positioned to enforce. -Future enrichers will live alongside these in the same module structure; -see [Roadmap](../roadmap.md) for what's planned. +For organizations with environment-specific rules outside this list, the +v0.9.6 [Plugins](../plugins.md) protocol lets you layer custom +enrichers on top without forking bomdrift. + +## See also + +- [CLI reference — Enrichment toggles](../cli-reference.md#enrichment-toggles) +- [CLI reference — Calibration](../cli-reference.md#calibration) +- [Architecture — Best-effort enricher contract](../architecture.md#best-effort-enricher-contract) diff --git a/docs/src/enrichers/registry.md b/docs/src/enrichers/registry.md index 2963ff9..2ed082a 100644 --- a/docs/src/enrichers/registry.md +++ b/docs/src/enrichers/registry.md @@ -34,13 +34,27 @@ A registry timeout, parse error, or unsupported ecosystem returns `Ok` with no findings. Diff rendering NEVER blocks on registry responses. -## Flags +## Calibration -- `--no-registry` — skip all three checks (alias to disabling the - `[diff] no_registry = true` config key). - `--recently-published-days ` — override the default 14-day threshold. Set `--recently-published-days 0` to disable that check while keeping deprecation / maintainer-set-changed. +- `--cache-ttl-hours ` (v0.9.6+) — overrides the default 24h disk + cache TTL for the per-ecosystem registry caches. + +## Disabling + +```bash +bomdrift diff before.json after.json --no-registry +``` + +Disables all three checks at once. Equivalent to `[diff] no_registry = +true` in `.bomdrift.toml`. + +## Flags + +- `--no-registry` — skip all three checks. +- `--recently-published-days ` — see [Calibration](#calibration). - `--fail-on recently-published`, `--fail-on deprecated` — exit-2 thresholds. diff --git a/docs/src/enrichers/typosquat.md b/docs/src/enrichers/typosquat.md index 141c939..66ca673 100644 --- a/docs/src/enrichers/typosquat.md +++ b/docs/src/enrichers/typosquat.md @@ -172,6 +172,37 @@ of truth; refreshing those means editing `data/-top*.txt` and rebuilding bomdrift. PRs adding names to the curated lists are welcome. +## Calibration + +### `--typosquat-similarity-threshold ` (v0.9.6+) + +Default `0.92`, range `[0.0, 1.0]`. Configurable via CLI flag or +`[diff] typosquat_similarity_threshold = ` in `.bomdrift.toml`. + +The threshold applies to the JW + suffix-boost path (npm, PyPI, Cargo, +RubyGems, NuGet, Go, Composer). The Maven Levenshtein-≤-2 path is +hardcoded — Levenshtein distance and JW similarity aren't directly +comparable, so a single threshold flag would either over- or +under-suppress on Maven. + +Recommended ranges: + +- `0.95` — very strict; only catches near-perfect matches. Good for + tightening down false positives in monorepos with many internally + forked dependencies. +- `0.92` (default) — calibrated against the top-1000-of-each-ecosystem + test corpus to produce zero false positives there. +- `0.85` — lenient; catches softer near-misses at the cost of more + false positives. Useful for paranoid security review of brand-new + PyPI / npm packages. + +The threshold also appears in `--debug-calibration` rows so collected +samples can guide tuning: + +``` +typosquat|||0.92 +``` + ## False-positive management The structural rules + thresholds aim for "no false positives on the @@ -183,8 +214,17 @@ wild: 2. Open a PR. Tightening the rule (rather than special-casing the package name) is preferred — drives a cleaner heuristic. -If you discover a false negative (a real typosquat that *should* fire -but doesn't): +## Disabling + +Pure compute, no network. There is no `--no-typosquat` flag — disabling +the typosquat enricher would defeat its primary purpose. To suppress +*specific* false-positive findings, hand-curate a per-component baseline +entry; see [Baseline & suppression — Worked example](../baseline.md#worked-example-triaging-a-false-positive). + +To gate exit code on typosquat findings, use `--fail-on typosquat`. + +## See also -1. Same — add a regression test that fires, then tweak the algorithm - until it does. +- [CLI reference — `--typosquat-similarity-threshold`](../cli-reference.md#--typosquat-similarity-threshold-float) +- [`bomdrift refresh-typosquat`](../cli-reference.md#bomdrift-refresh-typosquat) +- [Baseline — false-positive triage](../baseline.md#worked-example-triaging-a-false-positive) diff --git a/docs/src/enrichers/version-jump.md b/docs/src/enrichers/version-jump.md index 911efeb..6b65b60 100644 --- a/docs/src/enrichers/version-jump.md +++ b/docs/src/enrichers/version-jump.md @@ -43,18 +43,22 @@ and pulling the dep would add transitive weight for no functional gain. - leading-zero numbers (`01.2.3`) — ambiguous and almost always a sign of a non-SemVer scheme; safer to skip than misinterpret. -## Threshold - -`MIN_MAJOR_DELTA = 2` is the minimum delta to flag. Hardcoded; not -exposed as a CLI flag for two reasons: - -1. The signal's whole point is "the standard SemVer signal of a - single-major bump is already well understood." Letting users - configure it down to 1 just duplicates the SemVer-bump signal - reviewers already see. -2. Letting users configure it up (3, 4, ...) would silence legitimate - xz-pattern signals. The 90-day maintainer-age threshold already - serves the "tune for false-positive rate" knob. +## Calibration + +The `MIN_MAJOR_DELTA = 2` threshold is intentionally hardcoded. Letting +users configure it down to 1 just duplicates the standard SemVer-bump +signal reviewers already see; letting users configure it up (3, 4, …) +silences legitimate xz-pattern signals. The +[`--young-maintainer-days`](../cli-reference.md#--young-maintainer-days-n) +threshold already serves the "tune for false-positive rate" knob. + +## Disabling + +There is no `--no-version-jump` flag — pure compute, zero cost. If you +need to gate exit code only on version-jump findings, use `--fail-on +any`. To suppress a specific bump as a known-acceptable, write a +per-component baseline entry — see +[Baseline — When the bump is the false positive](../baseline.md#when-the-bump-is-the-false-positive). ## Examples diff --git a/docs/src/github-action.md b/docs/src/github-action.md index a9d971a..6bf6b02 100644 --- a/docs/src/github-action.md +++ b/docs/src/github-action.md @@ -59,6 +59,22 @@ documents that path; both flows continue to be supported in v1. | `max-version-changed` | no | `` (empty) | Exit 2 when more than this many dependencies change version. | | `baseline` | no | `` (empty) | Path to a previously captured `bomdrift diff --output json` snapshot. Findings present in the baseline are suppressed from the rendered output and the `--fail-on` trip evaluation. See [Baseline & suppression](./baseline.md) for match-key semantics. | | `github-token` | no | `${{ github.token }}` | Token used to post PR comments. | +| `upload-to-code-scanning` | no | `false` | When `true` AND `output: sarif`, upload the rendered SARIF artifact to GitHub Code Scanning via `github/codeql-action/upload-sarif@v3`. Requires the calling workflow to grant `permissions.security-events: write`. Off by default for back-compat — v0.7 callers see no behavior change. See [SARIF + Code Scanning](./sarif.md). (v0.8+) | + +## Inputs not exposed by the action + +The composite action surfaces a small, opinionated subset of the CLI. +For features without a matching action input — VEX consume / emit +(`--vex`, `--emit-vex`), license policy +(`--allow-licenses`/`--deny-licenses`/`--allow-exception`/`--deny-exception`), +calibration knobs (`--typosquat-similarity-threshold`, +`--young-maintainer-days`, `--cache-ttl-hours`), the failure thresholds +(`--fail-on-epss`, `--fail-on kev`), OCI attestation +(`--before-attestation`, `--cosign-identity`, …), or plugins +(`--plugin`) — drive them through a checked-in `.bomdrift.toml` (loaded +via the `config:` input) or run the binary directly with +`actions/setup-rust` + `cargo install`. The CLI flag set in +[CLI reference](./cli-reference.md) is the authoritative full surface. ## Outputs @@ -201,6 +217,28 @@ want the diff in the step summary: The `output: sarif` produces SARIF v2.1.0 with stable rule IDs (see [Output formats](./output-formats.md#sarif-v210)). +### Comment-driven suppression bridges (other forges) + +The `comment-suppress` companion sub-action is GitHub-only — it relies +on the `issue_comment` workflow event. For GitLab, Bitbucket Cloud, +and Azure DevOps, bomdrift ships parallel **Cloudflare Worker bridges** +that listen on each forge's webhook, validate the trigger, and dispatch +the equivalent `bomdrift baseline add --from-comment ""` run on +the underlying CI: + +- [`examples/gitlab-ci/comment-bridge/`](https://github.com/Metbcy/bomdrift/tree/main/examples/gitlab-ci/comment-bridge) (v0.9+) +- [`examples/bitbucket-pipelines/comment-bridge/`](https://github.com/Metbcy/bomdrift/tree/main/examples/bitbucket-pipelines/comment-bridge) (v0.9.5+) +- [`examples/azure-devops/comment-bridge/`](https://github.com/Metbcy/bomdrift/tree/main/examples/azure-devops/comment-bridge) (v0.9.5+) + +Each bridge enforces the same five guards: webhook secret / +HMAC verification, event-type filter, repo / project allowlist, +commenter-permission check, and a PR-context guard. The +`/bomdrift suppress [reason: …]` grammar is identical across all +four SCMs and shares a single regex (`scripts/parse-suppress-comment.sh`) +so behavior cannot drift. See the per-forge chapters +[GitLab CI](./gitlab-ci.md) · [Bitbucket](./bitbucket.md) · +[Azure DevOps](./azure-devops.md) for setup. + ## Action permissions `pull-requests: write` is required when `comment-on-pr: true` (the diff --git a/docs/src/gitlab-ci.md b/docs/src/gitlab-ci.md index 9708f73..303b88d 100644 --- a/docs/src/gitlab-ci.md +++ b/docs/src/gitlab-ci.md @@ -127,17 +127,17 @@ from the MR's pipeline view with a `BOMDRIFT_SUPPRESS_ID` variable. On trigger it runs `bomdrift baseline add` and pushes the result back to the MR branch using `BOMDRIFT_PUSH_TOKEN`. -### What's NOT in v0.7 (deferred to v0.8) - -In-comment `/bomdrift suppress ` flow on GitLab. GitLab's note -webhook fires on every comment on every MR with no command-prefix -filter, so wiring it safely (rate-limit, fork-MR safety, command -parsing, double-trigger debouncing) is materially harder than on -GitHub. v0.7 ships the manual-job path because it covers the same -user need (one click per accepted finding) without standing up a -webhook handler. v0.8 will track the comment-driven flow under a -follow-up issue once we see real adoption data on the v0.7 manual -path. +### Comment-driven suppression on GitLab (v0.9+) + +In-comment `/bomdrift suppress ` is supported on GitLab as of v0.9 +via the [Cloudflare Worker bridge](https://github.com/Metbcy/bomdrift/tree/main/examples/gitlab-ci/comment-bridge). +GitLab's note webhook fires on every comment on every MR with no +command-prefix filter, so the bridge enforces five guards (webhook +secret, event-type filter, repo allowlist, commenter-permission check, +PR-context guard) before invoking `bomdrift baseline add --from-comment +""` against the underlying CI. The grammar is identical to the +GitHub `comment-suppress` sub-action; both share the +`scripts/parse-suppress-comment.sh` regex so behavior cannot drift. ## Self-Managed GitLab diff --git a/docs/src/introduction.md b/docs/src/introduction.md index ab7108b..2da8168 100644 --- a/docs/src/introduction.md +++ b/docs/src/introduction.md @@ -1,9 +1,13 @@ # Introduction -**bomdrift** is a CLI and GitHub Action that diffs two SBOMs and surfaces -supply-chain risk signals on every changed dependency — flags **new CVEs**, -**typosquats**, **multi-major version jumps**, and **young maintainers** on -added or upgraded packages, ready to drop into a PR comment. +**bomdrift** is a CLI and multi-SCM action that diffs two SBOMs and surfaces +supply-chain risk signals on every changed dependency — flags **new CVEs** +(with EPSS + CISA KEV scoring), **typosquats** across eight ecosystems, +**multi-major version jumps**, **young-maintainer takeovers**, +**registry-metadata signals** (recently-published, deprecated, +maintainer-set-changed), and **license-policy violations** — ready to drop +into a PR comment on **GitHub**, **GitLab**, **Bitbucket**, or +**Azure DevOps**. ## What problem does it solve? @@ -31,7 +35,8 @@ compromised release: 2. **Typosquat** — `plain-crypto-js` scores 0.95 against the legitimate `crypto-js` via the suffix-containment boost rule. 3. **Vulnerabilities** — OSV.dev returns the published advisory IDs - (`MAL-2026-2306`, `GHSA-3p68-rc4w-qgx5`, etc.) on both versions. + (`MAL-2026-2306`, `GHSA-3p68-rc4w-qgx5`, etc.) on both versions, with + EPSS / KEV badges where applicable. See [`examples/axios-incident/`](https://github.com/Metbcy/bomdrift/tree/main/examples/axios-incident) for the SBOM pair and the rendered output. @@ -40,40 +45,76 @@ for the SBOM pair and the rendered output. 700+ packages compromised by a self-replicating worm. Diff-time review of newly added transitive deps and version bumps was the only pre-merge -defense. bomdrift's "added components + CVE enrichment" pair surfaces this -class of attack at PR time. +defense. bomdrift's "added components + CVE enrichment + recently- +published registry signal" combination surfaces this class of attack at +PR time. ### xz-utils backdoor (CVE-2024-3094, Mar 2024) A 2.6-year social-engineering campaign culminating in a backdoor shipped in xz 5.6.0/5.6.1. The "Jia Tan" maintainer's first commit was recent relative to the release — exactly the maintainer-age heuristic bomdrift -implements via the GitHub REST API. +implements via the GitHub REST API. The threshold is tunable via +`--young-maintainer-days` (default 90; v0.9.6+). ### Sustained PyPI typosquat campaigns (2024–2026) Hundreds of malicious packages disguised by single-character substitutions. Jaro-Winkler similarity against top-N catalogs catches these reliably; see the [Typosquat detection](./enrichers/typosquat.md) chapter for the full -algorithm. +algorithm and the `--typosquat-similarity-threshold` knob (v0.9.6+). ## Design ethos - **Small dep tree, no Docker, single binary.** ~3.4 MB stripped + LTO. -- **Best-effort enrichers.** Network failures warn and continue rendering - — a PR review is still useful without CVE / maintainer-age data, and - the offline change-shape signals always work. + No tokio, no chrono, no semver crate, no octocrab — the constraint is + load-bearing. +- **Best-effort enrichers.** Network failures (OSV, EPSS, KEV, GitHub, + registries), plugin failures, and attestation-verify failures all + warn-and-continue. A PR review is still useful without one signal, + and the offline change-shape signals always work. - **Byte-deterministic output.** Identical inputs render to byte-identical - Markdown / JSON / SARIF every time, so PR-comment upserts via - `peter-evans/create-or-update-comment` patch in place instead of - accumulating duplicate comments. + Markdown / JSON / SARIF / VEX every time, honoring `SOURCE_DATE_EPOCH`, + so PR-comment upserts via `peter-evans/create-or-update-comment` + patch in place instead of accumulating duplicate comments. - **Cosign-signed releases.** Every archive carries a Sigstore signature via GitHub OIDC. Action defaults to verifying signatures; opt-out via - `verify-signatures: false` for trusted mirrors. + `verify-signatures: false` for trusted mirrors. As of v0.9.6, the same + cosign machinery can verify the **input SBOMs** themselves via + `--before-attestation` / `--after-attestation`. +- **OSS-first, no telemetry, no account.** Apache-2.0; no daemon, no + hosted UI, no signup. ## Where to next? +### Getting started - New here? Start with the [Quickstart](./quickstart.md). - Wiring up the GitHub Action? See [GitHub Action](./github-action.md). +- On another forge? See [GitLab CI](./gitlab-ci.md), + [Bitbucket Pipelines](./bitbucket.md), or + [Azure DevOps Pipelines](./azure-devops.md). - Looking up a specific flag? See [CLI reference](./cli-reference.md). -- Curious how a specific signal works? See the [Enrichers overview](./enrichers/overview.md). + +### Suppressing findings +- [Baseline & suppression](./baseline.md) — JSON snapshots, in-comment + `/bomdrift suppress`, `expires` + `reason`. +- [License policy](./license-policy.md) — SPDX expression evaluation + with allow/deny + per-exception granularity. +- [VEX](./vex.md) — OpenVEX 0.2.0 + CycloneDX VEX 1.6 consume / emit. + +### Output +- [Output formats](./output-formats.md) — markdown / terminal / JSON. +- [SARIF + Code Scanning](./sarif.md) — stable rule IDs, fingerprints, + Code Scanning ingestion. + +### Per-signal deep dives +- [Enrichers overview](./enrichers/overview.md) — the contract every + enricher honors, plus pointers into each chapter. + +### Advanced +- [OCI attestation](./attestation.md) — fetch SBOMs as + `cosign verify-attestation`-verified OCI artifacts (v0.9.6+). +- [Plugins](./plugins.md) — external-process plugin protocol for + custom rules (v0.9.6+). +- [Architecture](./architecture.md) — module map, pipeline, + determinism contract. diff --git a/docs/src/plugins.md b/docs/src/plugins.md new file mode 100644 index 0000000..c30629e --- /dev/null +++ b/docs/src/plugins.md @@ -0,0 +1,303 @@ +# Plugins + +bomdrift's enricher set is intentionally curated — typosquats, +maintainer age, registry metadata, OSV/EPSS/KEV. Org-specific signals +(banned packages, license-tier policies, internal package allowlists) +don't belong in the binary, but they need a first-class extension +point. v0.9.6 ships that extension point as **external-process +plugins**. + +## Overview + +A plugin is an executable on the filesystem (any language, any shape +of dependencies) that reads a JSON envelope from stdin and writes a +JSON envelope to stdout. bomdrift invokes it once per matching +component during a diff. Findings the plugin emits are merged into +bomdrift's output across every render path: terminal, markdown, JSON, +SARIF. + +Plugins are **not** a sandbox. They run as your CI user with the +same filesystem and network access bomdrift itself has. Treat plugin +source the same way you'd treat any external CI script. + +## Why external-process and not WASM + +The original v0.4 sketch on the roadmap floated WASM. v0.9.6 +deliberately picks shell-out instead: + +- **Smaller dep tree.** No wasmtime / wasmer pulled into the + bomdrift binary. The dep-tree audit is a real OSS-first + constraint. +- **Any language.** Plugins write Bash, Python, Go, Rust, whatever. + WASM would force a per-language toolchain. +- **Sandboxing is the user's environment.** CI runners already + isolate per-job. Adding WASM-level sandboxing inside an already + isolated container is duplicate effort for marginal value. +- **Failure isolation is cheap.** A child-process crash can't take + bomdrift down; we already get that for free from the OS. + +WASM may revisit in v1.0+ if a clear need materializes (in-browser +diffing, multi-tenant CI without per-job isolation). For now, the +shell-out model wins on simplicity and dep cost. + +## Manifest format + +A plugin manifest is a TOML file pointed at by `--plugin `. +The flag is **repeatable** — bomdrift loads each manifest in +declaration order and runs all matching plugins per component. + +```toml +[plugin] +name = "my-plugin" +description = "What this plugin checks for" +exec = "./run.sh" +timeout_ms = 5000 +invoke_on = ["added", "version-changed"] +``` + +### Fields + +| Field | Type | Required | Default | Notes | +|---------------|-------------|----------|---------|-------| +| `name` | string | yes | — | Unique within a single bomdrift run. Used in error messages and SARIF rule IDs. | +| `description` | string | no | — | Free-form. Surfaced when bomdrift logs plugin failures. | +| `exec` | string | yes | — | Path to the executable, **resolved relative to the manifest directory**. Use `./` prefix to make this explicit. Absolute paths are accepted. | +| `timeout_ms` | integer | no | `5000` | Wall-clock timeout per invocation. After expiry the process is killed and the invocation's findings are dropped. | +| `invoke_on` | string list | yes | — | Subset of `["added", "version-changed"]`. Future versions may add `removed`, `license-changed`, `maintainer-changed`. Unknown values are rejected at load time. | + +`exec` must be marked executable on disk. bomdrift does **not** +auto-`chmod +x`; this would mask permission bugs. + +## Protocol — stdin/stdout JSON shape + +bomdrift writes one JSON object on the plugin's stdin, closes +stdin, and reads exactly one JSON object from stdout (parsing the +**last** complete JSON object on stdout — earlier output is treated +as plugin log noise and discarded silently, but plugins shouldn't +rely on this). The plugin should write its findings JSON and exit +promptly. + +### Stdin + +```json +{ + "component": { + "purl": "pkg:npm/foo@1.2.3", + "name": "foo", + "version": "1.2.3", + "licenses": ["MIT"] + }, + "event": "added", + "before": null +} +``` + +- `component` — the **after** component. Always present. +- `event` — `"added"` or `"version-changed"`. Matches the manifest's + `invoke_on` filter. +- `before` — `null` for `added`, the **before** component (same + shape as `component`) for `version-changed`. + +Unknown fields may appear in future bomdrift versions. Plugins +**must ignore unknown fields** on stdin and not assume the input +shape is closed. + +### Stdout + +Exactly one JSON object on a single line (newline-terminated is +fine; multi-line pretty-printed JSON is also accepted as long as +it's a single value): + +```json +{ + "findings": [ + { + "kind": "your-finding-tag", + "message": "human-readable description", + "severity": "info", + "rule_id": "stable.id.for.this.kind" + } + ] +} +``` + +| Field | Type | Required | Notes | +|-------------|--------|----------|-------| +| `kind` | string | yes | Free-text tag. Surfaced in the markdown/terminal renderers as the finding category. Keep it short and stable. | +| `message` | string | yes | One-line human-readable description. | +| `severity` | string | yes | One of `"info"`, `"warning"`, `"error"`. Maps to SARIF `level` as `note` / `warning` / `error`. | +| `rule_id` | string | yes | Stable identifier for this **class** of finding. Used in SARIF `partialFingerprints`; should be the same across runs for the same logical finding so dedup works. | + +An empty findings array is the no-match path: + +```json +{"findings": []} +``` + +### SARIF mapping + +All plugin findings render under a single SARIF rule: +`bomdrift.plugin`. The plugin's `rule_id` is threaded into the +SARIF result's `partialFingerprints` so that GitHub Code Scanning +and similar consumers can dedup runs of the same finding. + +## Failure semantics + +Plugins are **best-effort**. Their failures never fail the bomdrift +diff: + +| Failure mode | bomdrift response | +|--------------------------------------|------------------------------------------------------------| +| Plugin exits non-zero | Drop findings from this invocation. Log warning if `BOMDRIFT_DEBUG=1`. | +| Wall-clock timeout (`timeout_ms`) | Kill the process. Drop findings. Log warning if `BOMDRIFT_DEBUG=1`. | +| Stdout is not parseable JSON | Drop findings. Log warning if `BOMDRIFT_DEBUG=1`. | +| Stdout JSON is missing `findings` | Drop findings. Log warning if `BOMDRIFT_DEBUG=1`. | +| `findings[i].severity` is unknown | Drop that finding. Other findings in the same invocation pass through. | +| Plugin exec is missing on disk | Manifest load fails fast (before any diff work). Exit 1. | + +The contract: the rest of the bomdrift report still renders. A bad +plugin is a noisy plugin, not a broken pipeline. Run with +`BOMDRIFT_DEBUG=1` while authoring a plugin to see why findings are +being dropped. + +### Windows note + +On Windows, `Command::kill()` has known quirks where killed +processes may leave orphan grandchildren. bomdrift kills the direct +child cleanly; if your plugin spawns sub-processes, ensure it +forwards the timeout signal itself. Plugin timeouts on Windows are +**best-effort** in v0.9.6. + +## Worked example: `banned-packages` + +The reference implementation lives in +[`examples/plugins/banned-packages/`](https://github.com/Metbcy/bomdrift/tree/main/examples/plugins/banned-packages): + +``` +examples/plugins/banned-packages/ +├── README.md # how to adapt for your org +├── plugin.toml # the manifest below +├── check-banned.sh # bash + jq implementation +└── banned.txt # purl prefixes to flag +``` + +`plugin.toml`: + +```toml +[plugin] +name = "banned-packages" +description = "Flag dependencies on the org-maintained banned-packages list" +exec = "./check-banned.sh" +timeout_ms = 5000 +invoke_on = ["added", "version-changed"] +``` + +Invocation: + +```bash +bomdrift diff before.cdx.json after.cdx.json \ + --plugin examples/plugins/banned-packages/plugin.toml +``` + +See the example's [README](https://github.com/Metbcy/bomdrift/blob/main/examples/plugins/banned-packages/README.md) +for adaptation guidance, performance characteristics, and security +notes. + +## Performance + +bomdrift invokes plugins **sequentially**, once per matching +component. With `N` Added/VersionChanged components and `P` +plugins, you'll see `N × P` invocations. Implications: + +- **Process-startup cost matters.** A bash plugin that forks `jq` + ten times costs ~30 ms of fork + interpreter warmup per call. + At `N = 200, P = 3` that's ~18 s of pure startup overhead. + Compile to a static Go/Rust binary if hot-path performance + matters. +- **Tune `timeout_ms`.** The default (`5000`) is generous for + pure-CPU plugins; a plugin that hits a network endpoint per + component might need `30000`. A plugin that's intermittently + slow ruins your CI cycle time — consider sampling inside the + plugin (return early for components that don't match its + scope). +- **No parallelism in v0.9.6.** Concurrent plugin execution is on + the table for v1.0 if a meaningful workload demands it. File an + issue with timing data if you hit this. + +## Security + +bomdrift does **not** sandbox plugins: + +- Plugins run as the bomdrift parent's user. +- Plugins inherit the parent's environment (including secret-bearing + env vars like `GITHUB_TOKEN`, `NPM_TOKEN`, etc.). +- Plugins inherit the parent's filesystem and network access. +- Plugins can spawn arbitrary sub-processes. + +Treat plugin source like any external CI script: + +- **Vet what you ship.** Read the plugin source, including any + binary dependencies it pulls in. +- **Pin to a commit / tag.** Don't `curl ... | bash` an + always-latest plugin executable. +- **Minimize the env.** If a plugin doesn't need a secret, don't + let it inherit one. `env -i bomdrift diff ...` strips the + environment; manually re-export only what bomdrift itself needs. +- **Mirror internally.** For high-trust pipelines, vendor the + plugin into your own repo or internal artifact store rather + than pulling from a public registry on every CI run. + +## Stability promise + +The plugin protocol's stdin/stdout JSON shape is **best-effort +stable in v0.9.6**: + +- We may **add** fields to the stdin envelope in a future minor + release. Plugins must ignore unknown fields. +- We will **not remove or rename** documented stdin or stdout + fields without a major version bump. +- The stdout `findings` schema is the public contract; treat + `kind`, `message`, `severity`, `rule_id` as semver-stable. +- The TOML manifest schema may grow new optional fields; existing + fields stay. + +If the protocol needs a breaking change for v1.0, a deprecation +window with a `protocol_version` envelope field will land at least +one minor release before the break. + +## CI integration + +A typical GitHub Actions job that wires in a plugin: + +```yaml +jobs: + bomdrift: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + # Make sure jq is available if your plugin needs it. + - run: sudo apt-get install -y jq + + - uses: Metbcy/bomdrift@v1 + with: + before-sbom: before.cdx.json + after-sbom: after.cdx.json + extra-args: --plugin examples/plugins/banned-packages/plugin.toml +``` + +For multiple plugins, repeat `--plugin` in `extra-args`: + +```yaml +extra-args: >- + --plugin .bomdrift/plugins/banned-packages/plugin.toml + --plugin .bomdrift/plugins/license-tier/plugin.toml +``` + +## Related + +- [`examples/plugins/banned-packages/`](https://github.com/Metbcy/bomdrift/tree/main/examples/plugins/banned-packages) — worked reference. +- [SARIF + Code Scanning](./sarif.md) — how `bomdrift.plugin` + findings appear in Code Scanning. +- [Roadmap](./roadmap.md) — design rationale for shipping plugins + in v0.9.6. diff --git a/docs/src/quickstart.md b/docs/src/quickstart.md index d4a60ed..8051d6d 100644 --- a/docs/src/quickstart.md +++ b/docs/src/quickstart.md @@ -25,7 +25,7 @@ jobs: ``` The `@v1` mutable tag tracks the latest v0.x release. Pin to a specific -version (`@v0.9.5`) if you prefer reproducible builds. See +version (`@v0.9.6`) if you prefer reproducible builds. See [GitHub Action](./github-action.md) for every input. If you prefer a checked-in policy file, install the binary and run @@ -39,7 +39,7 @@ Pre-built binaries cover Linux x86_64 + aarch64, macOS aarch64, and Windows x86_64. Each archive is cosign-signed via Sigstore + GitHub OIDC. ```bash -VERSION=v0.9.5 +VERSION=v0.9.6 TARGET=x86_64-unknown-linux-gnu curl -sSL -o bomdrift.tar.gz \ "https://github.com/Metbcy/bomdrift/releases/download/${VERSION}/bomdrift-${VERSION}-${TARGET}.tar.gz" @@ -48,6 +48,10 @@ tar -xzf bomdrift.tar.gz # Diff two SBOMs ./bomdrift-${VERSION}-${TARGET}/bomdrift diff before.json after.json + +# Emit SARIF to a file (no fragile YAML > redirection) +./bomdrift-${VERSION}-${TARGET}/bomdrift diff before.json after.json \ + --output sarif --output-file bomdrift.sarif ``` To verify the archive's signature before you trust the binary, see @@ -56,7 +60,7 @@ To verify the archive's signature before you trust the binary, see ## From source ```bash -cargo install --locked --git https://github.com/Metbcy/bomdrift --tag v0.9.5 bomdrift +cargo install --locked --git https://github.com/Metbcy/bomdrift --tag v0.9.6 bomdrift ``` Requires Rust 1.85+ (the project uses edition 2024). @@ -75,10 +79,22 @@ After cloning + `cargo build --release`: The output is GitHub-Flavored Markdown ready for PR-comment posting. -## Next steps - -- [GitHub Action](./github-action.md) — every input, common patterns. -- [CLI reference](./cli-reference.md) — every flag. -- [Output formats](./output-formats.md) — markdown / terminal / JSON / SARIF. -- [Baseline & suppression](./baseline.md) — adopt bomdrift on a project - with pre-existing findings without drowning the first PR. +## What's next? + +- **Wire it up:** [GitHub Action](./github-action.md) · + [GitLab CI](./gitlab-ci.md) · [Bitbucket](./bitbucket.md) · + [Azure DevOps](./azure-devops.md). +- **Reference:** [CLI reference](./cli-reference.md) · every flag with + introduced-in annotations · [Output formats](./output-formats.md) · + [SARIF + Code Scanning](./sarif.md). +- **Suppress noise:** [Baseline & suppression](./baseline.md) lets a + team adopt bomdrift on a project with pre-existing findings without + drowning the first PR. +- **License gating:** [License policy](./license-policy.md) — SPDX + expression evaluation with allow/deny + per-exception granularity. +- **VEX:** [VEX](./vex.md) — record exploitability decisions in + OpenVEX 0.2.0 / CycloneDX VEX 1.6, suppress on subsequent diffs. +- **Advanced (v0.9.6+):** [OCI attestation](./attestation.md) · + [Plugins](./plugins.md) for custom rules. +- **Internals:** [Architecture](./architecture.md) · + [Contributing](./contributing.md). diff --git a/docs/src/release-signing.md b/docs/src/release-signing.md index 55f11a6..d80cfc5 100644 --- a/docs/src/release-signing.md +++ b/docs/src/release-signing.md @@ -13,7 +13,7 @@ via Sigstore + GitHub OIDC. This means: ## Verifying a release manually ```bash -VERSION=v0.3.0 +VERSION=v0.9.6 TARGET=x86_64-unknown-linux-gnu ARCHIVE=bomdrift-${VERSION}-${TARGET}.tar.gz diff --git a/docs/src/roadmap.md b/docs/src/roadmap.md index d831c5f..1dda8f7 100644 --- a/docs/src/roadmap.md +++ b/docs/src/roadmap.md @@ -3,6 +3,26 @@ What's planned, what's deliberately out of scope, and what the acceptance criteria for new contributions look like. +## Shipped (v0.9.6 — finish the roadmap) + +- **OCI artifact attestation verification** — `--before-attestation`, + `--after-attestation`, `--cosign-identity`, `--cosign-issuer`, and + `--require-attestation`. bomdrift shells out to + `cosign verify-attestation --type=cyclonedx` and consumes the + verified CycloneDX SBOM payload. See + [Attestation](./attestation.md). +- **Custom rules / plugin system** — external-process plugins via + repeatable `--plugin `. JSON over stdin/stdout, + best-effort failures, new `bomdrift.plugin` SARIF rule. See + [Plugins](./plugins.md). +- **Calibration knobs** — `--typosquat-similarity-threshold`, + `--young-maintainer-days`, `--cache-ttl-hours` flags plus matching + `[diff]` config keys. Every previously hardcoded threshold is now + configurable. +- **Cache-TTL unification** — internal refactor consolidating the + four duplicated `CACHE_TTL_SECS` constants behind a single + `cache::ttl()` helper. No user-visible change. + ## Shipped (v0.9.5 — polish + multi-SCM parity) - **Per-exception SPDX allow/deny** via `[license] allow_exceptions` / @@ -59,31 +79,44 @@ acceptance criteria for new contributions look like. - OSV CVE aliases threaded through `VulnRef`. - `--debug-calibration-format jsonl` and `--output-file `. +## Investigated and decided + +- **GraphQL maintainer-age** — investigated again for v0.9.6 and + rejected. GitHub's GraphQL `history()` connection doesn't expose + ascending-date ordering, so finding the oldest contributor commit + still requires walking the cursor backward from the most recent + commit. REST's + `GET /repos/{o}/{r}/commits?author=X&per_page=1` plus `Link`-header + parsing for the last page lets bomdrift fetch a single author's + oldest commit in two requests. **Decided: REST stays.** Closing + this one off the roadmap permanently — re-open only if GitHub adds + ASC ordering to the GraphQL history connection. + +## Calibration + +All calibration thresholds are configurable via `.bomdrift.toml` and +CLI flags. Tune `[diff] typosquat_similarity_threshold`, +`young_maintainer_days`, `recently_published_days`, `cache_ttl_hours`. +See [CLI reference](./cli-reference.md) for flag forms. + +## Blocked on upstream + +- **PyPI / crates.io maintainer-set-changed.** The npm enricher + (shipped v0.9) compares maintainer sets for VersionChanged + components by reading `registry.npmjs.org`'s per-version + `maintainers[]` array. PyPI's + `https://pypi.org/pypi//json` returns repository-level + maintainers but no per-version history. Crates.io's + `https://crates.io/api/v1/crates/` returns repository-level + `crate.owners` but no per-version `published_by` history. If + either ecosystem ships a per-version maintainer endpoint, bomdrift + adds the enricher in a future minor release. + ## Future candidates (not committed) -- **PyPI / crates.io maintainer-set-changed** — blocked on - per-version maintainer data in upstream APIs. -- **VEX vocabulary beyond OpenVEX's 8 justifications** — bomdrift - uses the spec's enum verbatim. If a richer vocab emerges we'll - follow. -- **GraphQL maintainer-age** — was investigated for v0.4 and - deferred. Cursor-pagination cost still steers us toward REST. -- **Custom rules / plugin system** — let consumers add - organization-specific enrichers. Probably WASM-based. -- **OCI artifact attestation** — verify SBOMs are signed by the - build system before diffing. -- **Reachability** — explicit non-goal; pair with Endor / Snyk for - call-graph analysis (see Non-goals below). - -### Calibration backlog - -Tunable thresholds where the default may not be the right answer -at scale: - -- Typosquat `SIMILARITY_THRESHOLD` (currently 0.92). -- Maintainer-age `YOUNG_MAINTAINER_DAYS` (currently 90). -- Registry `MIN_PUBLISHED_AGE_DAYS` (currently 14). -- OSV / EPSS / KEV / Registry cache TTL (currently 24h). +_Empty as of v0.9.6._ The previously listed candidates have all been +shipped, marked non-goal, decided against, or moved to "Blocked on +upstream" above. New post-v0.9.6 ideas land here as they emerge. ## Non-goals @@ -105,6 +138,20 @@ change-focused**: only on what's *new* in this diff. If you want changed in this PR's deps that I should worry about?", that's bomdrift's question. +### Reachability / call-graph analysis + +Determining whether the vulnerable function in a flagged advisory is +actually invoked from your application's entry points is a +fundamentally different analysis than diff-level supply-chain risk. +It requires whole-program call-graph construction, language-specific +runtime modeling (dynamic dispatch, reflection, eval), and an +ever-growing per-CVE vulnerable-symbol database. The vendors who do +this well — Endor Labs, Snyk Reachability — invest at a scale OSS +bomdrift can't match, and the per-CVE symbol curation is the moat, +not the call-graph engine itself. **Pair bomdrift with Endor or Snyk +for reachability**; bomdrift answers "what changed", they answer +"does the change reach prod code". + ### Dependency-tree visualization [`cargo tree`](https://doc.rust-lang.org/cargo/commands/cargo-tree.html), diff --git a/docs/src/vex.md b/docs/src/vex.md index 234c199..f35077c 100644 --- a/docs/src/vex.md +++ b/docs/src/vex.md @@ -120,6 +120,21 @@ vex_default_justification = "vulnerable_code_not_in_execute_path" `vex_author` falls back to `repo_url` when unset; falls back to `"bomdrift"` when both are missing. +## Justification vocabulary + +bomdrift uses the OpenVEX 0.2.0 spec's standard justification values +verbatim: `component_not_present`, `vulnerable_code_not_present`, +`vulnerable_code_not_in_execute_path`, +`vulnerable_code_cannot_be_controlled_by_adversary`, +`inline_mitigations_already_exist`, plus the +`under_investigation`-related justifications the spec defines. +Richer justification vocabularies (per-organization tags, +custom-reason strings, tool-specific extensions) are out of scope — +authoring against a single canonical enum keeps `--emit-vex` output +interoperable with any OpenVEX consumer. If the OpenVEX spec evolves +to add new justifications, bomdrift follows the spec; non-spec +justifications won't be invented here. + ## Worked rotation example 1. Run a diff that surfaces `GHSA-evil` on `pkg:npm/foo@1.0.0`. diff --git a/examples/plugins/banned-packages/README.md b/examples/plugins/banned-packages/README.md new file mode 100644 index 0000000..4af5d5d --- /dev/null +++ b/examples/plugins/banned-packages/README.md @@ -0,0 +1,86 @@ +# `banned-packages` — worked plugin example + +A reference [bomdrift plugin](../../../docs/src/plugins.md) that flags +any Added or VersionChanged dependency whose purl matches a +prefix in a maintained denylist file. + +## What it does + +For each component bomdrift sees as **added** or **version-changed** +during a diff, the plugin: + +1. Reads bomdrift's JSON envelope on stdin. +2. Extracts `component.purl`. +3. Checks each non-comment line of `banned.txt` as a **purl prefix**. +4. For every match, emits a `banned-package` finding with severity + `error`. + +A versionless prefix like `pkg:npm/event-stream` flags every version +of the package; a versioned prefix like `pkg:npm/coa@2.0.3` only +flags that exact release. + +## Files + +| File | Purpose | +|-------------------|------------------------------------------------------| +| `plugin.toml` | Manifest bomdrift loads with `--plugin`. | +| `check-banned.sh` | The plugin executable. Bash + `jq`. | +| `banned.txt` | Sample denylist with `#` comments. **Replace this.** | + +## Adapting for your org + +1. Replace the contents of `banned.txt` with your curated list. One + purl prefix per line; `#` comments and blank lines ignored. +2. (Optional) Modify `check-banned.sh` to source the list from a URL + (`curl ... | sponge banned.txt` in CI) or to honor a different + `severity` per-entry. +3. Vendor or copy the directory into your repo and reference it from + your bomdrift workflow. + +## Wiring into a bomdrift run + +```bash +bomdrift diff before.cdx.json after.cdx.json \ + --plugin path/to/banned-packages/plugin.toml +``` + +A matching ban shows up in every output format: terminal, markdown +PR comment, JSON, and SARIF (under the `bomdrift.plugin` rule, with +`partialFingerprints` set from the finding's `rule_id`). + +### GitHub Actions example + +```yaml +- uses: Metbcy/bomdrift@v1 + with: + extra-args: --plugin examples/plugins/banned-packages/plugin.toml +``` + +## Performance + +The plugin is invoked **once per Added or VersionChanged component**. +With N changed components and M lines in `banned.txt`, the cost is +O(N×M) prefix comparisons. Bash + jq is fine for `M < 1000` and +`N < 500`; for larger denylists, rewrite the executable in a faster +language (Go, Rust, Python) — the plugin protocol is identical. + +If your denylist is fetched from a network source, raise +`timeout_ms` in `plugin.toml` accordingly. + +## Security + +bomdrift does **not** sandbox plugins. `check-banned.sh` runs as +your CI user with whatever filesystem and network credentials that +user has. Vet plugin source (including this example) the same way +you'd vet any external script: read it, pin a commit, mirror it +internally if you need supply-chain isolation. + +## Smoke test + +```bash +echo '{"component":{"purl":"pkg:npm/event-stream@4.0.0","name":"event-stream","version":"4.0.0"},"event":"added","before":null}' \ + | ./check-banned.sh +# → {"findings":[{"kind":"banned-package", ... "rule_id":"banned-packages.pkg.npm.event.stream"}]} +``` + +A purl that matches no prefix returns `{"findings":[]}`. diff --git a/examples/plugins/banned-packages/banned.txt b/examples/plugins/banned-packages/banned.txt new file mode 100644 index 0000000..7ef8ace --- /dev/null +++ b/examples/plugins/banned-packages/banned.txt @@ -0,0 +1,23 @@ +# banned-packages list — one purl prefix per line, '#' comments allowed. +# Prefix match: an entry "pkg:npm/foo" matches every version of foo, +# while "pkg:npm/foo@1.2.3" only matches that exact version. +# +# Entries are illustrative only; replace with your org's curated list. + +# event-stream supply-chain compromise (2018; flatmap-stream backdoor). +pkg:npm/event-stream + +# coa was hijacked in November 2021 with a credential-stealer payload. +pkg:npm/coa@2.0.3 + +# colors-noise typosquat of `colors` (illustrative). +pkg:npm/colors-noise + +# Example PyPI typosquat used in published research; replace with current data. +pkg:pypi/typosquatted-pkg + +# ctx PyPI hijack (May 2022) shipped a credential exfiltrator. +pkg:pypi/ctx@0.2.2 + +# rustdecimal — the August 2022 crates.io typosquat of `rust_decimal`. +pkg:cargo/rustdecimal diff --git a/examples/plugins/banned-packages/check-banned.sh b/examples/plugins/banned-packages/check-banned.sh new file mode 100755 index 0000000..63d044c --- /dev/null +++ b/examples/plugins/banned-packages/check-banned.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# check-banned.sh — bomdrift plugin that flags components whose purl +# matches a prefix in banned.txt. Speaks bomdrift's plugin protocol: +# reads one JSON envelope on stdin, writes one JSON envelope to stdout. +# +# Stdin shape : {"component": {...}, "event": "added"|"version-changed", "before": null|{...}} +# Stdout shape: {"findings": [{"kind", "message", "severity", "rule_id"}, ...]} +# +# Exit non-zero only on internal error; matched bans are normal output. + +set -euo pipefail + +here="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +banned_file="${BANNED_PACKAGES_FILE:-$here/banned.txt}" + +if ! command -v jq >/dev/null 2>&1; then + echo "check-banned.sh: jq is required but not on PATH" >&2 + exit 2 +fi + +input="$(cat)" +purl="$(printf '%s' "$input" | jq -r '.component.purl // empty')" + +if [[ -z "$purl" ]]; then + printf '{"findings":[]}\n' + exit 0 +fi + +findings_json='[]' +while IFS= read -r raw_line || [[ -n "$raw_line" ]]; do + line="${raw_line%%#*}" + line="${line#"${line%%[![:space:]]*}"}" + line="${line%"${line##*[![:space:]]}"}" + [[ -z "$line" ]] && continue + + if [[ "$purl" == "$line"* ]]; then + sanitized="$(printf '%s' "$line" | tr -c 'A-Za-z0-9' '.' | sed 's/^\.*//; s/\.*$//; s/\.\.*/./g')" + findings_json="$(jq -c \ + --arg msg "purl $purl matches banned prefix $line" \ + --arg rid "banned-packages.${sanitized}" \ + '. + [{kind:"banned-package", message:$msg, severity:"error", rule_id:$rid}]' \ + <<<"$findings_json")" + fi +done <"$banned_file" + +jq -nc --argjson f "$findings_json" '{findings:$f}' diff --git a/examples/plugins/banned-packages/plugin.toml b/examples/plugins/banned-packages/plugin.toml new file mode 100644 index 0000000..ed7119a --- /dev/null +++ b/examples/plugins/banned-packages/plugin.toml @@ -0,0 +1,6 @@ +[plugin] +name = "banned-packages" +description = "Flag dependencies on the org-maintained banned-packages list" +exec = "./check-banned.sh" +timeout_ms = 5000 +invoke_on = ["added", "version-changed"] diff --git a/src/attestation.rs b/src/attestation.rs new file mode 100644 index 0000000..9561e46 --- /dev/null +++ b/src/attestation.rs @@ -0,0 +1,342 @@ +//! OCI-attached SBOM attestation fetch + verify (Phase B, v0.9.6). +//! +//! Shells out to the user's locally-installed `cosign` binary to verify +//! a CycloneDX SBOM attestation attached to an OCI artifact and returns +//! the raw SBOM JSON ready for the standard parser pipeline. +//! +//! Cosign is treated as an *optional runtime dep*: a missing or failing +//! `cosign` is reported back to the caller with a clear error pointing +//! at the install docs. We deliberately do NOT pull in any sigstore +//! crates — the verify step demands a Fulcio CA bundle, transparency-log +//! checkpoint, and rekor witness validation that the cosign CLI already +//! ships, and reproducing it in-process is out of scope for v0.9.6. +//! +//! Wire format produced by `cosign verify-attestation`: +//! +//! ```json +//! { +//! "payloadType": "application/vnd.in-toto+json", +//! "payload": "", +//! "signatures": [{ "keyid": "...", "sig": "..." }] +//! } +//! ``` +//! +//! Decoded `payload` is an in-toto Statement whose `predicateType` is +//! `https://cyclonedx.org/bom` (or compatible) and whose `predicate` +//! field is the actual CycloneDX SBOM. We extract `predicate` and hand +//! it back as a serialized JSON string — that's what the parser layer +//! expects. + +use anyhow::{Context, Result, bail}; +use base64::Engine; + +const COSIGN_INSTALL_URL: &str = "https://docs.sigstore.dev/system_config/installation/"; + +/// Fetch and verify a CycloneDX SBOM attached as a cosign attestation +/// to an OCI artifact. Shells out to `cosign verify-attestation`. +/// +/// Errors include: +/// - cosign-not-on-PATH (clear message pointing at install docs); +/// - cosign exit non-zero (verification failure: cert mismatch, sig +/// invalid, no attestation found); +/// - malformed in-toto envelope output (cosign succeeded but stdout +/// wasn't the expected JSON shape). +pub fn fetch_verified_sbom(oci_ref: &str, identity_regexp: &str, issuer: &str) -> Result { + let output = std::process::Command::new("cosign") + .args([ + "verify-attestation", + "--type=cyclonedx", + "--certificate-identity-regexp", + identity_regexp, + "--certificate-oidc-issuer", + issuer, + oci_ref, + ]) + .output() + .map_err(|err| { + if err.kind() == std::io::ErrorKind::NotFound { + anyhow::anyhow!( + "cosign binary not on PATH; install per {COSIGN_INSTALL_URL} and retry. \ + underlying error: {err}" + ) + } else { + anyhow::Error::from(err) + .context(format!("invoking cosign verify-attestation for {oci_ref}")) + } + })?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + bail!( + "cosign verify-attestation failed for {oci_ref}: exit {}\n{}", + output.status, + stderr.trim() + ); + } + + let stdout = std::str::from_utf8(&output.stdout) + .with_context(|| format!("cosign stdout was not utf-8 for {oci_ref}"))?; + + extract_sbom_from_envelope(stdout) + .with_context(|| format!("parsing cosign attestation envelope for {oci_ref}")) +} + +/// Decode an in-toto DSSE envelope (the JSON shape cosign emits to +/// stdout) and pull out the embedded CycloneDX SBOM as serialized JSON. +/// +/// The envelope's `payload` field is base64-encoded JSON. Within that +/// JSON, `predicate` is the SBOM object — that's what the parser needs. +/// +/// Cosign sometimes emits MULTIPLE envelopes back-to-back (one per +/// signature) separated by newlines. Take the first parseable one and +/// return its predicate; subsequent envelopes are ignored because they +/// carry the same predicate by construction. +pub fn extract_sbom_from_envelope(stdout: &str) -> Result { + let trimmed = stdout.trim(); + if trimmed.is_empty() { + bail!("cosign produced empty stdout; expected a DSSE envelope JSON"); + } + + // Cosign may print one envelope per line, or a single pretty-printed + // object. Try whole-buffer parse first; fall back to per-line. + let envelope: serde_json::Value = match serde_json::from_str(trimmed) { + Ok(v) => v, + Err(_) => { + let mut found: Option = None; + for line in trimmed.lines() { + let line = line.trim(); + if line.is_empty() { + continue; + } + if let Ok(v) = serde_json::from_str::(line) { + found = Some(v); + break; + } + } + found.ok_or_else(|| { + anyhow::anyhow!( + "no parseable JSON object in cosign stdout (got {} bytes)", + trimmed.len() + ) + })? + } + }; + + let payload_b64 = envelope + .get("payload") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("DSSE envelope missing string `payload` field"))?; + + let payload_bytes = base64::engine::general_purpose::STANDARD + .decode(payload_b64) + .context("decoding base64 `payload` field")?; + let statement: serde_json::Value = + serde_json::from_slice(&payload_bytes).context("parsing in-toto Statement payload")?; + + let predicate = statement + .get("predicate") + .ok_or_else(|| anyhow::anyhow!("in-toto Statement missing `predicate` field"))?; + + serde_json::to_string(predicate).context("re-serializing CycloneDX predicate") +} + +#[cfg(test)] +mod tests { + use super::*; + use base64::engine::general_purpose::STANDARD as B64; + + /// Build a synthetic DSSE envelope whose payload encodes an in-toto + /// Statement whose predicate is the given CycloneDX-shaped JSON. + fn make_envelope(predicate: &serde_json::Value) -> String { + let stmt = serde_json::json!({ + "_type": "https://in-toto.io/Statement/v0.1", + "predicateType": "https://cyclonedx.org/bom", + "subject": [{"name": "test", "digest": {"sha256": "00".repeat(32)}}], + "predicate": predicate, + }); + let payload = B64.encode(serde_json::to_vec(&stmt).unwrap()); + let env = serde_json::json!({ + "payloadType": "application/vnd.in-toto+json", + "payload": payload, + "signatures": [{"keyid": "kid-1", "sig": "fake"}], + }); + serde_json::to_string(&env).unwrap() + } + + #[test] + fn extracts_predicate_from_well_formed_envelope() { + let predicate = serde_json::json!({ + "bomFormat": "CycloneDX", + "specVersion": "1.6", + "components": [], + }); + let envelope = make_envelope(&predicate); + let sbom_json = extract_sbom_from_envelope(&envelope).expect("parses"); + let parsed: serde_json::Value = serde_json::from_str(&sbom_json).unwrap(); + assert_eq!(parsed["bomFormat"], "CycloneDX"); + assert_eq!(parsed["specVersion"], "1.6"); + } + + #[test] + fn handles_per_line_envelope_emission() { + let predicate = serde_json::json!({"bomFormat": "CycloneDX", "specVersion": "1.6"}); + let env = make_envelope(&predicate); + // Cosign occasionally prefixes a status line like + // "Verification for --" before the JSON; reproduce that. + let combined = format!("Verification for example.com/img@sha256:abc --\n{env}\n"); + let sbom_json = extract_sbom_from_envelope(&combined).expect("parses"); + let parsed: serde_json::Value = serde_json::from_str(&sbom_json).unwrap(); + assert_eq!(parsed["bomFormat"], "CycloneDX"); + } + + #[test] + fn missing_payload_field_errors_clearly() { + let env = serde_json::json!({ + "payloadType": "application/vnd.in-toto+json", + "signatures": [], + }) + .to_string(); + let err = extract_sbom_from_envelope(&env).unwrap_err(); + assert!( + format!("{err:#}").contains("payload"), + "error must mention the missing field; got: {err:#}" + ); + } + + #[test] + fn empty_stdout_errors_clearly() { + let err = extract_sbom_from_envelope("").unwrap_err(); + let msg = format!("{err:#}"); + assert!(msg.contains("empty") || msg.contains("DSSE"), "got: {msg}"); + } + + #[test] + fn missing_predicate_in_statement_errors() { + // Hand-craft an envelope whose Statement has no predicate. + let stmt = serde_json::json!({ + "_type": "https://in-toto.io/Statement/v0.1", + "predicateType": "https://cyclonedx.org/bom", + "subject": [], + }); + let payload = B64.encode(serde_json::to_vec(&stmt).unwrap()); + let env = serde_json::json!({ + "payloadType": "application/vnd.in-toto+json", + "payload": payload, + "signatures": [], + }) + .to_string(); + let err = extract_sbom_from_envelope(&env).unwrap_err(); + assert!(format!("{err:#}").contains("predicate")); + } + + #[test] + fn malformed_base64_payload_errors() { + let env = serde_json::json!({ + "payloadType": "application/vnd.in-toto+json", + "payload": "this is not base64!@#$", + "signatures": [], + }) + .to_string(); + let err = extract_sbom_from_envelope(&env).unwrap_err(); + assert!(format!("{err:#}").to_lowercase().contains("base64")); + } + + /// Integration: write a fake `cosign` script to a tempdir, prepend + /// it to PATH, call `fetch_verified_sbom`, assert the round-trip. + /// PATH mutation is serialized via `clock::test_env_lock()`. + #[cfg(unix)] + #[test] + fn fetch_verified_sbom_invokes_cosign_on_path() { + use std::io::Write; + use std::os::unix::fs::PermissionsExt; + + let _guard = crate::clock::test_env_lock(); + + let dir = std::env::temp_dir().join(format!( + "bomdrift-attestation-fakecosign-{}-{}", + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0) + )); + std::fs::create_dir_all(&dir).unwrap(); + + let predicate = serde_json::json!({ + "bomFormat": "CycloneDX", + "specVersion": "1.6", + "components": [], + }); + let envelope = make_envelope(&predicate); + + let script = dir.join("cosign"); + let body = format!("#!/bin/sh\ncat <<'EOF'\n{envelope}\nEOF\n"); + { + let mut f = std::fs::File::create(&script).unwrap(); + f.write_all(body.as_bytes()).unwrap(); + f.sync_all().unwrap(); + } + let mut perms = std::fs::metadata(&script).unwrap().permissions(); + perms.set_mode(0o755); + std::fs::set_permissions(&script, perms).unwrap(); + + let prev_path = std::env::var_os("PATH"); + let new_path = match &prev_path { + Some(p) => { + let mut v = std::ffi::OsString::from(&dir); + v.push(":"); + v.push(p); + v + } + None => std::ffi::OsString::from(&dir), + }; + // SAFETY: serialized via test_env_lock above. + unsafe { std::env::set_var("PATH", &new_path) }; + + let result = fetch_verified_sbom( + "example.com/img:tag", + "https://github.com/owner/.+", + "https://token.actions.githubusercontent.com", + ); + + // Restore PATH BEFORE asserting so a panic doesn't leave the + // test environment in a weird state for parallel tests. + match prev_path { + Some(p) => unsafe { std::env::set_var("PATH", p) }, + None => unsafe { std::env::remove_var("PATH") }, + } + let _ = std::fs::remove_dir_all(&dir); + + let sbom = result.expect("fake cosign returns valid envelope"); + let parsed: serde_json::Value = serde_json::from_str(&sbom).unwrap(); + assert_eq!(parsed["bomFormat"], "CycloneDX"); + } + + #[cfg(unix)] + #[test] + fn fetch_verified_sbom_reports_missing_cosign() { + let _guard = crate::clock::test_env_lock(); + + let prev_path = std::env::var_os("PATH"); + // SAFETY: serialized via test_env_lock above. + unsafe { std::env::set_var("PATH", "/nonexistent-bomdrift-empty-path-12345") }; + + let result = fetch_verified_sbom( + "example.com/img:tag", + "https://example.com/.+", + "https://example.com", + ); + + match prev_path { + Some(p) => unsafe { std::env::set_var("PATH", p) }, + None => unsafe { std::env::remove_var("PATH") }, + } + + let err = result.expect_err("must surface clear error when cosign is missing"); + let msg = format!("{err:#}"); + assert!( + msg.contains("cosign") && msg.contains(COSIGN_INSTALL_URL), + "error must mention cosign + install URL; got: {msg}" + ); + } +} diff --git a/src/cli.rs b/src/cli.rs index a5e4133..94c67e0 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -210,9 +210,14 @@ impl From for markdown::Platform { #[derive(Args, Debug)] pub struct DiffArgs { /// Path to the "before" SBOM (CycloneDX, SPDX, or Syft JSON). - pub before: PathBuf, + /// Optional when `--before-attestation` is used to fetch the SBOM + /// from an OCI registry instead. + #[arg(required_unless_present = "before_attestation")] + pub before: Option, /// Path to the "after" SBOM (CycloneDX, SPDX, or Syft JSON). - pub after: PathBuf, + /// Optional when `--after-attestation` is used. + #[arg(required_unless_present = "after_attestation")] + pub after: Option, /// Path to a repo policy config file. When omitted, `.bomdrift.toml` is /// loaded if it exists in the current working directory. #[arg(long)] @@ -388,6 +393,58 @@ pub struct DiffArgs { /// the OpenVEX spec. #[arg(long)] pub vex_default_justification: Option, + /// Override the typosquat similarity threshold (default 0.92). + /// Range 0.0 - 1.0 inclusive. Lower values surface more findings + /// (and more false positives); higher values cut down to only + /// near-perfect matches. v0.9.6+. + #[arg(long, value_parser = parse_similarity_threshold)] + pub typosquat_similarity_threshold: Option, + /// Override the maintainer-age young-maintainer-days threshold + /// (default 90 days). Components whose top contributor's first + /// commit is younger than this trip a `YoungMaintainer` finding. + /// Must be >= 1. v0.9.6+. + #[arg(long, value_parser = clap::value_parser!(i64).range(1..))] + pub young_maintainer_days: Option, + /// Override the on-disk cache TTL in hours (default 24). Applies + /// uniformly to OSV / EPSS / KEV / Registry caches. Must be >= 1. + /// v0.9.6+. + #[arg(long, value_parser = clap::value_parser!(u64).range(1..))] + pub cache_ttl_hours: Option, + /// Fetch the "before" SBOM as a cosign-verified attestation + /// attached to an OCI artifact instead of reading a local file. + /// Mutually exclusive with the positional `before` argument. + /// Requires `--cosign-identity` and `--cosign-issuer`. v0.9.6+. + #[arg(long, conflicts_with = "before")] + pub before_attestation: Option, + /// Fetch the "after" SBOM as a cosign-verified attestation + /// attached to an OCI artifact. v0.9.6+. + #[arg(long, conflicts_with = "after")] + pub after_attestation: Option, + /// Regex passed to `cosign verify-attestation + /// --certificate-identity-regexp`. Required when either + /// `--before-attestation` or `--after-attestation` is set. + /// Example: `https://github.com/owner/.+`. v0.9.6+. + #[arg(long)] + pub cosign_identity: Option, + /// URL passed to `cosign verify-attestation + /// --certificate-oidc-issuer`. Required alongside + /// `--cosign-identity`. Example: + /// `https://token.actions.githubusercontent.com`. v0.9.6+. + #[arg(long)] + pub cosign_issuer: Option, + /// Refuse to fall back to local-file SBOMs: both sides MUST come + /// from a verified OCI attestation. Implies that + /// `--before-attestation` and `--after-attestation` are both set. + /// v0.9.6+. + #[arg(long)] + pub require_attestation: bool, + /// Path to a plugin manifest TOML. Repeatable. Each plugin is an + /// external executable invoked once per added / version-changed + /// component with JSON over stdin/stdout. Plugin failures (timeout, + /// non-zero exit, malformed JSON) drop their findings without + /// failing the diff. v0.9.6+. + #[arg(long, action = clap::ArgAction::Append)] + pub plugin: Vec, #[arg(long)] pub debug_calibration: bool, /// Format for `--debug-calibration` rows. `pipe` (default, back-compat @@ -486,3 +543,16 @@ impl InputFormat { } } } + +/// Clap value parser for `--typosquat-similarity-threshold`. Rejects +/// values outside the inclusive 0.0..=1.0 range with a clear message +/// (clap's built-in numeric range parser doesn't support `f64`). +fn parse_similarity_threshold(s: &str) -> Result { + let v: f64 = s + .parse() + .map_err(|_| format!("expected a float in 0.0..=1.0, got {s:?}"))?; + if !v.is_finite() || !(0.0..=1.0).contains(&v) { + return Err(format!("expected a float in 0.0..=1.0, got {v}")); + } + Ok(v) +} diff --git a/src/config.rs b/src/config.rs index 9b19f8b..c40ea14 100644 --- a/src/config.rs +++ b/src/config.rs @@ -73,6 +73,12 @@ pub struct DiffConfig { pub no_registry: Option, /// Override the default 14-day recently-published threshold. v0.9+. pub recently_published_days: Option, + /// Override the typosquat similarity threshold (default 0.92). v0.9.6+. + pub typosquat_similarity_threshold: Option, + /// Override the young-maintainer-days threshold (default 90). v0.9.6+. + pub young_maintainer_days: Option, + /// Override the on-disk cache TTL in hours (default 24). v0.9.6+. + pub cache_ttl_hours: Option, } pub fn apply_diff_config(args: &mut DiffArgs) -> Result<()> { @@ -157,6 +163,15 @@ fn apply_loaded_diff_config(args: &mut DiffArgs, config: Config) { if args.recently_published_days.is_none() { args.recently_published_days = diff.recently_published_days; } + if args.typosquat_similarity_threshold.is_none() { + args.typosquat_similarity_threshold = diff.typosquat_similarity_threshold; + } + if args.young_maintainer_days.is_none() { + args.young_maintainer_days = diff.young_maintainer_days; + } + if args.cache_ttl_hours.is_none() { + args.cache_ttl_hours = diff.cache_ttl_hours; + } // [license] block: CLI flags override (not merge) when set. Mirrors // Dependency Review Action semantics so users moving between bomdrift @@ -204,8 +219,8 @@ mod tests { fn args() -> DiffArgs { DiffArgs { - before: "before.json".into(), - after: "after.json".into(), + before: Some("before.json".into()), + after: Some("after.json".into()), config: None, output: None, format: None, @@ -239,6 +254,15 @@ mod tests { vex_default_justification: None, no_registry: false, recently_published_days: None, + typosquat_similarity_threshold: None, + young_maintainer_days: None, + cache_ttl_hours: None, + before_attestation: None, + after_attestation: None, + cosign_identity: None, + cosign_issuer: None, + require_attestation: false, + plugin: Vec::new(), } } diff --git a/src/enrich/cache.rs b/src/enrich/cache.rs index 92a0936..f298f72 100644 --- a/src/enrich/cache.rs +++ b/src/enrich/cache.rs @@ -52,6 +52,18 @@ use crate::enrich::Severity; /// propagate within a day. pub const CACHE_TTL_SECS: u64 = 24 * 60 * 60; +/// Resolve the effective TTL in seconds. When `override_hours` is `Some` +/// (driven by `--cache-ttl-hours` / `[diff] cache_ttl_hours`), uses that +/// uniformly across OSV / EPSS / KEV / Registry caches; otherwise falls +/// back to the [`CACHE_TTL_SECS`] default. Single source of truth so the +/// override semantics stay identical across enrichers. +pub fn effective_ttl_secs(override_hours: Option) -> u64 { + match override_hours { + Some(h) if h > 0 => h.saturating_mul(3600), + _ => CACHE_TTL_SECS, + } +} + /// Subdirectory under the cache root where per-advisory entries live. const OSV_SUBDIR: &str = "osv"; @@ -72,6 +84,7 @@ struct CacheEntry { pub struct Cache { root: PathBuf, now_secs: fn() -> u64, + ttl_secs: u64, } impl Cache { @@ -79,10 +92,17 @@ impl Cache { /// `None` when the platform doesn't expose one (extremely rare; degraded /// to "always miss" so callers don't have to special-case). pub fn open() -> Option { + Self::open_with_ttl(None) + } + + /// Like [`Cache::open`] but lets the caller override the on-disk TTL + /// (driven by `--cache-ttl-hours`). `None` means use the default. + pub fn open_with_ttl(ttl_hours: Option) -> Option { let root = crate::refresh::default_cache_root().ok()?.join(OSV_SUBDIR); Some(Self { root, now_secs: default_now_secs, + ttl_secs: effective_ttl_secs(ttl_hours), }) } @@ -90,7 +110,11 @@ impl Cache { /// directory and pin the clock for deterministic TTL assertions. #[cfg(test)] pub fn with_root(root: PathBuf, now_secs: fn() -> u64) -> Self { - Self { root, now_secs } + Self { + root, + now_secs, + ttl_secs: CACHE_TTL_SECS, + } } /// Look up cached severity + aliases for `advisory_id`. Returns @@ -108,7 +132,7 @@ impl Cache { let body = std::fs::read(&path).ok()?; let entry: CacheEntry = serde_json::from_slice(&body).ok()?; let now = (self.now_secs)(); - if now.saturating_sub(entry.fetched_at) > CACHE_TTL_SECS { + if now.saturating_sub(entry.fetched_at) > self.ttl_secs { return None; } Some((entry.severity, entry.aliases)) @@ -175,7 +199,17 @@ fn sanitize(id: &str) -> String { /// `--no-osv-cache`: when `disabled` is true, return `None` so callers /// uniformly skip both reads and writes. pub fn open_unless_disabled(disabled: bool) -> Option { - if disabled { None } else { Cache::open() } + open_unless_disabled_with_ttl(disabled, None) +} + +/// Like [`open_unless_disabled`] but threads the `--cache-ttl-hours` +/// override through to [`Cache::open_with_ttl`]. +pub fn open_unless_disabled_with_ttl(disabled: bool, ttl_hours: Option) -> Option { + if disabled { + None + } else { + Cache::open_with_ttl(ttl_hours) + } } #[cfg(test)] @@ -314,4 +348,36 @@ mod tests { // ProjectDirs availability; just assert the function doesn't panic. let _ = open_unless_disabled(false); } + + #[test] + fn effective_ttl_secs_falls_back_to_default_when_none() { + assert_eq!(effective_ttl_secs(None), CACHE_TTL_SECS); + // 0 is a degenerate override; treat it as "use default" rather + // than "never cache" so a misread config doesn't disable the cache. + assert_eq!(effective_ttl_secs(Some(0)), CACHE_TTL_SECS); + } + + #[test] + fn effective_ttl_secs_converts_hours_to_seconds() { + assert_eq!(effective_ttl_secs(Some(1)), 3600); + assert_eq!(effective_ttl_secs(Some(48)), 48 * 3600); + } + + #[test] + fn cache_with_overridden_ttl_expires_independently_of_const() { + // Build a Cache whose TTL is 1 hour, then read past that window. + // Validates that the per-instance ttl_secs (not CACHE_TTL_SECS) + // gates expiration. + let dir = tempdir_unique("override-ttl"); + let writer = Cache::with_root(dir.clone(), fixed_clock); + writer.put("GHSA-override", Severity::Low); + let mut reader = Cache::with_root(dir.clone(), || 1_700_000_000 + 3600 + 1); + reader.ttl_secs = effective_ttl_secs(Some(1)); + assert_eq!( + reader.get("GHSA-override"), + None, + "1h-TTL cache must miss after 1h+1s" + ); + let _ = std::fs::remove_dir_all(&dir); + } } diff --git a/src/enrich/epss.rs b/src/enrich/epss.rs index a10040d..15f8ea5 100644 --- a/src/enrich/epss.rs +++ b/src/enrich/epss.rs @@ -28,7 +28,6 @@ const MAX_BATCH: usize = 100; const SUBDIR: &str = "epss"; /// 24 hours — same TTL as the OSV cache so successive PR pushes within a /// work session hit cache. -const CACHE_TTL_SECS: u64 = 24 * 60 * 60; #[derive(Debug, Clone, Serialize, Deserialize)] struct CacheEntry { @@ -39,20 +38,32 @@ struct CacheEntry { /// Apply EPSS scores to every [`VulnRef`] in `e.vulns`. Updates in place; /// `--no-epss` callers should skip calling this entirely. Best-effort. pub fn enrich(e: &mut Enrichment) -> Result<()> { - enrich_with_url(e, EPSS_API_URL, DEFAULT_TIMEOUT) + enrich_with_ttl(e, None) } -fn enrich_with_url(e: &mut Enrichment, base_url: &str, timeout: Duration) -> Result<()> { +/// Like [`enrich`] but lets the caller override the on-disk cache TTL +/// (driven by `--cache-ttl-hours`). `None` means use the default. +pub fn enrich_with_ttl(e: &mut Enrichment, ttl_hours: Option) -> Result<()> { + enrich_with_url(e, EPSS_API_URL, DEFAULT_TIMEOUT, ttl_hours) +} + +fn enrich_with_url( + e: &mut Enrichment, + base_url: &str, + timeout: Duration, + ttl_hours: Option, +) -> Result<()> { let cves = collect_cves(e); if cves.is_empty() { return Ok(()); } + let ttl = crate::enrich::cache::effective_ttl_secs(ttl_hours); let mut scores: HashMap = HashMap::new(); let mut to_fetch: Vec = Vec::new(); let cache_root = cache_root(); for cve in &cves { if let Some(root) = &cache_root - && let Some(cached) = read_cache(root, cve) + && let Some(cached) = read_cache(root, cve, ttl) { if let Some(s) = cached { scores.insert(cve.clone(), s); @@ -166,12 +177,12 @@ fn cache_root() -> Option { .map(|p| p.join(SUBDIR)) } -fn read_cache(root: &std::path::Path, cve: &str) -> Option> { +fn read_cache(root: &std::path::Path, cve: &str, ttl_secs: u64) -> Option> { let path = root.join(format!("{}.json", sanitize(cve))); let body = std::fs::read(&path).ok()?; let entry: CacheEntry = serde_json::from_slice(&body).ok()?; let now = now_secs(); - if now.saturating_sub(entry.fetched_at) > CACHE_TTL_SECS { + if now.saturating_sub(entry.fetched_at) > ttl_secs { return None; } Some(entry.score) @@ -291,11 +302,11 @@ mod tests { )); std::fs::create_dir_all(&dir).unwrap(); write_cache(&dir, "CVE-2025-1", Some(0.5)); - let got = read_cache(&dir, "CVE-2025-1").unwrap(); + let got = read_cache(&dir, "CVE-2025-1", crate::enrich::cache::CACHE_TTL_SECS).unwrap(); assert_eq!(got, Some(0.5)); // Negative caching: no-score-found CVE. write_cache(&dir, "CVE-2025-2", None); - let got = read_cache(&dir, "CVE-2025-2").unwrap(); + let got = read_cache(&dir, "CVE-2025-2", crate::enrich::cache::CACHE_TTL_SECS).unwrap(); assert_eq!(got, None); let _ = std::fs::remove_dir_all(&dir); } diff --git a/src/enrich/kev.rs b/src/enrich/kev.rs index 3190929..12bc7ad 100644 --- a/src/enrich/kev.rs +++ b/src/enrich/kev.rs @@ -25,7 +25,6 @@ const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30); const SUBDIR: &str = "kev"; const CACHE_FILE: &str = "catalog.json"; /// 24h — KEV publishes daily. -const CACHE_TTL_SECS: u64 = 24 * 60 * 60; #[derive(Deserialize, Debug)] struct KevFeed { @@ -41,14 +40,25 @@ struct KevEntry { /// Apply KEV flags to every [`VulnRef`] in `e.vulns`. `--no-kev` callers /// should skip calling this entirely. pub fn enrich(e: &mut Enrichment) -> Result<()> { - enrich_with_url(e, KEV_FEED_URL, DEFAULT_TIMEOUT) + enrich_with_ttl(e, None) } -fn enrich_with_url(e: &mut Enrichment, url: &str, timeout: Duration) -> Result<()> { +/// Like [`enrich`] but lets the caller override the on-disk cache TTL +/// (driven by `--cache-ttl-hours`). `None` means use the default. +pub fn enrich_with_ttl(e: &mut Enrichment, ttl_hours: Option) -> Result<()> { + enrich_with_url(e, KEV_FEED_URL, DEFAULT_TIMEOUT, ttl_hours) +} + +fn enrich_with_url( + e: &mut Enrichment, + url: &str, + timeout: Duration, + ttl_hours: Option, +) -> Result<()> { if e.vulns.is_empty() { return Ok(()); } - let kev_ids = match load_or_fetch(url, timeout) { + let kev_ids = match load_or_fetch(url, timeout, ttl_hours) { Ok(ids) => ids, Err(err) => { if std::env::var("BOMDRIFT_DEBUG").is_ok() { @@ -72,10 +82,11 @@ fn apply_kev(e: &mut Enrichment, kev: &HashSet) { } } -fn load_or_fetch(url: &str, timeout: Duration) -> Result> { +fn load_or_fetch(url: &str, timeout: Duration, ttl_hours: Option) -> Result> { let cache_path = cache_path(); + let ttl = crate::enrich::cache::effective_ttl_secs(ttl_hours); if let Some(path) = &cache_path - && let Some(ids) = read_cache(path) + && let Some(ids) = read_cache(path, ttl) { return Ok(ids); } @@ -107,12 +118,12 @@ fn cache_path() -> Option { .map(|p| p.join(SUBDIR).join(CACHE_FILE)) } -fn read_cache(path: &std::path::Path) -> Option> { +fn read_cache(path: &std::path::Path, ttl_secs: u64) -> Option> { let meta = std::fs::metadata(path).ok()?; let modified = meta.modified().ok()?; let now = SystemTime::now(); let age = now.duration_since(modified).ok()?; - if age.as_secs() > CACHE_TTL_SECS { + if age.as_secs() > ttl_secs { return None; } let body = std::fs::read(path).ok()?; diff --git a/src/enrich/maintainer.rs b/src/enrich/maintainer.rs index 132df56..1ded7c5 100644 --- a/src/enrich/maintainer.rs +++ b/src/enrich/maintainer.rs @@ -95,14 +95,16 @@ struct MaintainerInfo { } pub fn enrich(cs: &ChangeSet) -> Result> { - enrich_with(cs, GITHUB_API_BASE, DEFAULT_TIMEOUT) + enrich_with(cs, GITHUB_API_BASE, DEFAULT_TIMEOUT, None) } pub fn enrich_with( cs: &ChangeSet, base_url: &str, timeout: Duration, + young_maintainer_days: Option, ) -> Result> { + let threshold = young_maintainer_days.unwrap_or(YOUNG_MAINTAINER_DAYS); if cs.added.is_empty() { return Ok(Vec::new()); } @@ -148,7 +150,7 @@ pub fn enrich_with( }; if let Some((login, date, days)) = info.finding - && days < YOUNG_MAINTAINER_DAYS + && days < threshold { out.push(MaintainerAgeFinding { component: comp.clone(), @@ -543,7 +545,7 @@ mod tests { added: vec![comp_with_url("foo", None)], ..Default::default() }; - let out = enrich_with(&cs, "http://127.0.0.1:1", Duration::from_millis(50)) + let out = enrich_with(&cs, "http://127.0.0.1:1", Duration::from_millis(50), None) .expect("no source_url means no HTTP, must succeed"); assert!(out.is_empty()); } @@ -554,7 +556,7 @@ mod tests { added: vec![comp_with_url("foo", Some("https://gitlab.com/foo/bar"))], ..Default::default() }; - let out = enrich_with(&cs, "http://127.0.0.1:1", Duration::from_millis(50)) + let out = enrich_with(&cs, "http://127.0.0.1:1", Duration::from_millis(50), None) .expect("non-github means no HTTP, must succeed"); assert!(out.is_empty()); } diff --git a/src/enrich/mod.rs b/src/enrich/mod.rs index 3334da4..7728b38 100644 --- a/src/enrich/mod.rs +++ b/src/enrich/mod.rs @@ -82,6 +82,11 @@ pub struct Enrichment { /// the diff was filtered. v0.9+. #[serde(default, skip_serializing_if = "is_zero_usize")] pub vex_suppressed_count: usize, + /// Findings emitted by external `--plugin` processes (Phase C, v0.9.6). + /// One element per plugin-finding, already tagged with the plugin + /// that produced it. Renderers group by `plugin_name` for display. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub plugin_findings: Vec, } fn is_zero_usize(n: &usize) -> bool { @@ -105,6 +110,7 @@ impl Enrichment { || !self.recently_published.is_empty() || !self.deprecated.is_empty() || !self.maintainer_set_changed.is_empty() + || !self.plugin_findings.is_empty() } } diff --git a/src/enrich/osv.rs b/src/enrich/osv.rs index 542db3e..b2a1c0d 100644 --- a/src/enrich/osv.rs +++ b/src/enrich/osv.rs @@ -42,11 +42,21 @@ pub fn enrich(cs: &ChangeSet) -> Result { /// Like [`enrich`] but lets the caller opt out of the on-disk severity cache /// (`bomdrift diff --no-osv-cache`). pub fn enrich_cached(cs: &ChangeSet, no_cache: bool) -> Result { + enrich_cached_with_ttl(cs, no_cache, None) +} + +/// Like [`enrich_cached`] but lets the caller override the cache TTL +/// (driven by `--cache-ttl-hours`). `None` means use the default. +pub fn enrich_cached_with_ttl( + cs: &ChangeSet, + no_cache: bool, + cache_ttl_hours: Option, +) -> Result { let purls = candidate_purls(cs); if purls.is_empty() { return Ok(Enrichment::default()); } - let cache = crate::enrich::cache::open_unless_disabled(no_cache); + let cache = crate::enrich::cache::open_unless_disabled_with_ttl(no_cache, cache_ttl_hours); enrich_with( &purls, OSV_BATCH_URL, @@ -169,6 +179,7 @@ fn enrich_with( maintainer_set_changed: Vec::new(), vex_annotations: std::collections::HashMap::new(), vex_suppressed_count: 0, + plugin_findings: Vec::new(), }) } diff --git a/src/enrich/registry.rs b/src/enrich/registry.rs index 8aaeb9d..fe914e2 100644 --- a/src/enrich/registry.rs +++ b/src/enrich/registry.rs @@ -21,7 +21,6 @@ use crate::diff::ChangeSet; use crate::model::{Component, Ecosystem}; const SUBDIR: &str = "registry"; -const CACHE_TTL_SECS: u64 = 24 * 60 * 60; const DEFAULT_TIMEOUT: Duration = Duration::from_secs(15); /// Default "recently published" age threshold (days). Components with @@ -80,18 +79,30 @@ struct CacheEntry { } /// Run the registry enrichers. `recently_published_days` overrides -/// [`MIN_PUBLISHED_AGE_DAYS`] when `Some`. -pub fn enrich(cs: &ChangeSet, recently_published_days: Option) -> RegistryFindings { - enrich_with(cs, recently_published_days, DEFAULT_TIMEOUT) +/// [`MIN_PUBLISHED_AGE_DAYS`] when `Some`. `cache_ttl_hours` overrides +/// the default 24h on-disk cache TTL when `Some`. +pub fn enrich( + cs: &ChangeSet, + recently_published_days: Option, + cache_ttl_hours: Option, +) -> RegistryFindings { + enrich_with( + cs, + recently_published_days, + cache_ttl_hours, + DEFAULT_TIMEOUT, + ) } fn enrich_with( cs: &ChangeSet, recently_published_days: Option, + cache_ttl_hours: Option, timeout: Duration, ) -> RegistryFindings { let mut out = RegistryFindings::default(); let threshold = recently_published_days.unwrap_or(MIN_PUBLISHED_AGE_DAYS); + let ttl_secs = crate::enrich::cache::effective_ttl_secs(cache_ttl_hours); let agent = ureq::AgentBuilder::new().timeout(timeout).build(); let cache_root = cache_root(); @@ -99,7 +110,7 @@ fn enrich_with( let Some(eco) = supported_ecosystem(c) else { continue; }; - let Some(entry) = lookup(&agent, cache_root.as_ref(), eco, &c.name) else { + let Some(entry) = lookup(&agent, cache_root.as_ref(), eco, &c.name, ttl_secs) else { continue; }; // Recently-published check: prefer per-version date if known, @@ -132,7 +143,13 @@ fn enrich_with( let Some(RegEco::Npm) = supported_ecosystem(after) else { continue; }; - let Some(entry) = lookup(&agent, cache_root.as_ref(), RegEco::Npm, &after.name) else { + let Some(entry) = lookup( + &agent, + cache_root.as_ref(), + RegEco::Npm, + &after.name, + ttl_secs, + ) else { continue; }; let bef = entry @@ -201,9 +218,10 @@ fn lookup( cache_root: Option<&PathBuf>, eco: RegEco, name: &str, + ttl_secs: u64, ) -> Option { if let Some(root) = cache_root - && let Some(cached) = read_cache(root, eco, name) + && let Some(cached) = read_cache(root, eco, name, ttl_secs) { return Some(cached); } @@ -371,11 +389,16 @@ fn cache_path(root: &std::path::Path, eco: RegEco, name: &str) -> PathBuf { .join(format!("{}.json", sanitize(name))) } -fn read_cache(root: &std::path::Path, eco: RegEco, name: &str) -> Option { +fn read_cache( + root: &std::path::Path, + eco: RegEco, + name: &str, + ttl_secs: u64, +) -> Option { let p = cache_path(root, eco, name); let body = std::fs::read(&p).ok()?; let entry: CacheEntry = serde_json::from_slice(&body).ok()?; - if now_secs().saturating_sub(entry.fetched_at) > CACHE_TTL_SECS { + if now_secs().saturating_sub(entry.fetched_at) > ttl_secs { return None; } Some(entry) diff --git a/src/enrich/typosquat.rs b/src/enrich/typosquat.rs index 4ca34db..0801880 100644 --- a/src/enrich/typosquat.rs +++ b/src/enrich/typosquat.rs @@ -182,19 +182,33 @@ impl SupportedEcosystem { } pub fn enrich(cs: &ChangeSet) -> Vec { + enrich_with_threshold(cs, None) +} + +/// Like [`enrich`] but lets the caller override [`SIMILARITY_THRESHOLD`] +/// (driven by `--typosquat-similarity-threshold`). `None` uses the default. +pub fn enrich_with_threshold( + cs: &ChangeSet, + similarity_threshold: Option, +) -> Vec { + let threshold = similarity_threshold.unwrap_or(SIMILARITY_THRESHOLD); let mut out = Vec::new(); for comp in &cs.added { let Some(eco) = SupportedEcosystem::from(&comp.ecosystem) else { continue; }; - if let Some(finding) = check_one(comp, eco) { + if let Some(finding) = check_one(comp, eco, threshold) { out.push(finding); } } out } -fn check_one(comp: &Component, eco: SupportedEcosystem) -> Option { +fn check_one( + comp: &Component, + eco: SupportedEcosystem, + threshold: f64, +) -> Option { let candidate = canonicalize(eco, &comp.name); let legit_list = legit_list_for(eco); let legit_set = legit_set_for(eco); @@ -202,7 +216,7 @@ fn check_one(comp: &Component, eco: SupportedEcosystem) -> Option best_match_maven(&candidate, legit_list)?, + SupportedEcosystem::Maven => best_match_maven(&candidate, legit_list, threshold)?, SupportedEcosystem::Npm | SupportedEcosystem::PyPI | SupportedEcosystem::Cargo @@ -211,7 +225,7 @@ fn check_one(comp: &Component, eco: SupportedEcosystem) -> Option best_match_jw(&candidate, legit_list, eco)?, }; - if score >= SIMILARITY_THRESHOLD { + if score >= threshold { Some(TyposquatFinding { component: comp.clone(), closest: closest.to_string(), @@ -337,7 +351,11 @@ fn best_match_jw<'a>( /// Returning JW-equivalent score so the rendered table is consistent with /// the other ecosystems: dist=1 → 0.97-ish, dist=2 → 0.94-ish, both above /// [`SIMILARITY_THRESHOLD`]. -fn best_match_maven<'a>(candidate: &str, legit: &'a [String]) -> Option<(&'a str, f64)> { +fn best_match_maven<'a>( + candidate: &str, + legit: &'a [String], + threshold: f64, +) -> Option<(&'a str, f64)> { let cand_artifact = artifact_id(candidate); let mut best: Option<(&'a str, usize, &str)> = None; for name in legit { @@ -361,7 +379,7 @@ fn best_match_maven<'a>(candidate: &str, legit: &'a [String]) -> Option<(&'a str best.map(|(name, dist, legit_artifact)| { let denom = (legit_artifact.len() as f64) + 1.0; let raw = 1.0 - (dist as f64) / denom; - (name, raw.max(SIMILARITY_THRESHOLD)) + (name, raw.max(threshold)) }) } @@ -1046,4 +1064,18 @@ mod tests { let _ = enrich(&cs); } } + + #[test] + fn similarity_threshold_override_widens_match_set() { + // Pick a near-miss candidate; relaxing the threshold must not + // reduce the finding count vs a strict 0.99 cutoff. + let candidate = comp("expressss"); + let cs = cs_added(vec![candidate.clone()]); + let strict = enrich_with_threshold(&cs, Some(0.99)); + let relaxed = enrich_with_threshold(&cs, Some(0.80)); + assert!( + relaxed.len() >= strict.len(), + "lowering the threshold must not reduce findings" + ); + } } diff --git a/src/lib.rs b/src/lib.rs index 5ff5620..af99af8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +pub mod attestation; pub mod baseline; pub mod cli; pub mod clock; @@ -6,6 +7,7 @@ pub mod diff; pub mod enrich; pub mod model; pub mod parse; +pub mod plugin; pub mod refresh; pub mod render; pub mod vex; @@ -132,13 +134,41 @@ fn run_baseline(action: BaselineAction) -> Result<()> { fn run_diff(mut args: DiffArgs) -> Result<()> { config::apply_diff_config(&mut args)?; + if args.require_attestation + && (args.before_attestation.is_none() || args.after_attestation.is_none()) + { + anyhow::bail!( + "--require-attestation needs both --before-attestation and --after-attestation" + ); + } + let output = args.output.unwrap_or(OutputFormat::Terminal); let format = args.format.unwrap_or(cli::InputFormat::Auto); let fail_on = args.fail_on.unwrap_or(FailOn::None); let format_hint = format.to_sbom_format(); - let before = load_sbom(&args.before, format_hint, args.include_file_components)?; - let after = load_sbom(&args.after, format_hint, args.include_file_components)?; + let before = load_sbom_or_attestation( + args.before.as_deref(), + args.before_attestation.as_deref(), + args.cosign_identity.as_deref(), + args.cosign_issuer.as_deref(), + format_hint, + args.include_file_components, + "before", + args.debug_calibration, + args.debug_calibration_format, + )?; + let after = load_sbom_or_attestation( + args.after.as_deref(), + args.after_attestation.as_deref(), + args.cosign_identity.as_deref(), + args.cosign_issuer.as_deref(), + format_hint, + args.include_file_components, + "after", + args.debug_calibration, + args.debug_calibration_format, + )?; let mut cs = diff::diff(&before, &after); @@ -147,7 +177,7 @@ fn run_diff(mut args: DiffArgs) -> Result<()> { } else { // OSV enrichment is best-effort. Network failures must not block the diff // from rendering — a PR review is still useful without CVE data. - match enrich::osv::enrich_cached(&cs, args.no_osv_cache) { + match enrich::osv::enrich_cached_with_ttl(&cs, args.no_osv_cache, args.cache_ttl_hours) { Ok(e) => e, Err(err) => { eprintln!("warning: OSV enrichment failed, continuing without it: {err:#}"); @@ -161,20 +191,21 @@ fn run_diff(mut args: DiffArgs) -> Result<()> { // there are no vulns. if !args.no_epss && !enrichment.vulns.is_empty() - && let Err(err) = enrich::epss::enrich(&mut enrichment) + && let Err(err) = enrich::epss::enrich_with_ttl(&mut enrichment, args.cache_ttl_hours) { eprintln!("warning: EPSS enrichment failed, continuing without it: {err:#}"); } if !args.no_kev && !enrichment.vulns.is_empty() - && let Err(err) = enrich::kev::enrich(&mut enrichment) + && let Err(err) = enrich::kev::enrich_with_ttl(&mut enrichment, args.cache_ttl_hours) { eprintln!("warning: KEV enrichment failed, continuing without it: {err:#}"); } // Typosquat detection is pure-compute (embedded reference list) and always // runs, regardless of `--no-osv`. Findings are informational. - enrichment.typosquats = enrich::typosquat::enrich(&cs); + enrichment.typosquats = + enrich::typosquat::enrich_with_threshold(&cs, args.typosquat_similarity_threshold); // Multi-major version-jump detection is pure-compute and also always runs. // Findings are informational. @@ -184,7 +215,12 @@ fn run_diff(mut args: DiffArgs) -> Result<()> { // `--no-maintainer-age` for offline runs. Best-effort: failures warn and // continue, mirroring the OSV enricher's contract. if !args.no_maintainer_age { - match enrich::maintainer::enrich(&cs) { + match enrich::maintainer::enrich_with( + &cs, + "https://api.github.com", + std::time::Duration::from_secs(15), + args.young_maintainer_days, + ) { Ok(findings) => enrichment.maintainer_age = findings, Err(err) => { eprintln!( @@ -209,12 +245,29 @@ fn run_diff(mut args: DiffArgs) -> Result<()> { // Registry-metadata enrichers (Phase K, v0.9). Best-effort — a // registry timeout returns Ok with no findings. if !args.no_registry { - let findings = enrich::registry::enrich(&cs, args.recently_published_days); + let findings = + enrich::registry::enrich(&cs, args.recently_published_days, args.cache_ttl_hours); enrichment.recently_published = findings.recently_published; enrichment.deprecated = findings.deprecated; enrichment.maintainer_set_changed = findings.maintainer_set_changed; } + // Plugin findings (Phase C, v0.9.6). Run after every built-in + // enricher so plugins observe the same `cs` view bomdrift renders; + // before baseline so plugin findings can be baselined too. Plugin + // failures degrade gracefully — a malformed manifest aborts the + // run (config error), but plugin runtime failures emit only a + // BOMDRIFT_DEBUG-gated stderr warning and contribute no findings. + if !args.plugin.is_empty() { + let mut manifests = Vec::with_capacity(args.plugin.len()); + for path in &args.plugin { + let manifest = plugin::load_manifest(path) + .with_context(|| format!("loading --plugin {}", path.display()))?; + manifests.push(manifest); + } + enrichment.plugin_findings = plugin::run_plugins(&manifests, &cs); + } + // Apply the baseline AFTER all enrichers run — suppression operates on // the realized finding set, not on intermediate inputs. This keeps the // baseline file format stable as new enrichers are added: a new finding @@ -298,6 +351,10 @@ fn run_diff(mut args: DiffArgs) -> Result<()> { &enrichment, &mut std::io::stderr(), args.debug_calibration_format, + CalibrationOverrides { + similarity_threshold: args.typosquat_similarity_threshold, + young_maintainer_days: args.young_maintainer_days, + }, ); } @@ -461,15 +518,32 @@ pub fn budget_tripped( /// `threshold` is the constant the score was gated against. CVE rows /// surface every advisory (no internal threshold) so adopters can see /// the score distribution before tuning `--fail-on critical-cve`. +/// Active overrides for the configurable calibration thresholds. Threaded +/// into [`write_calibration_lines`] so emitted rows reflect the effective +/// threshold the enricher actually used, not the unconditional const default. +#[derive(Debug, Default, Clone, Copy)] +pub(crate) struct CalibrationOverrides { + pub similarity_threshold: Option, + pub young_maintainer_days: Option, +} + fn write_calibration_lines( e: &Enrichment, out: &mut W, format: crate::cli::DebugFormat, + overrides: CalibrationOverrides, ) { use crate::enrich::maintainer::YOUNG_MAINTAINER_DAYS; use crate::enrich::typosquat::SIMILARITY_THRESHOLD; use crate::enrich::version_jump::MIN_MAJOR_DELTA; + let active_similarity = overrides + .similarity_threshold + .unwrap_or(SIMILARITY_THRESHOLD); + let active_young = overrides + .young_maintainer_days + .unwrap_or(YOUNG_MAINTAINER_DAYS); + for f in &e.typosquats { write_calibration_row( out, @@ -479,7 +553,7 @@ fn write_calibration_lines( .as_deref() .unwrap_or(f.component.name.as_str()), CalibrationScore::Float(f.score), - CalibrationThreshold::Float(SIMILARITY_THRESHOLD), + CalibrationThreshold::Float(active_similarity), format, ); } @@ -502,7 +576,7 @@ fn write_calibration_lines( .as_deref() .unwrap_or(f.component.name.as_str()), CalibrationScore::Int(f.days_old), - CalibrationThreshold::Int(YOUNG_MAINTAINER_DAYS), + CalibrationThreshold::Int(active_young), format, ); } @@ -750,16 +824,98 @@ fn load_sbom( ) -> Result { let raw = fs::read_to_string(path) .with_context(|| format!("reading SBOM file: {}", path.display()))?; - let value: serde_json::Value = serde_json::from_str(&raw) - .with_context(|| format!("parsing JSON in: {}", path.display()))?; + parse_sbom_bytes( + &raw, + &path.display().to_string(), + format_hint, + include_file_components, + ) +} + +fn parse_sbom_bytes( + raw: &str, + source_label: &str, + format_hint: Option, + include_file_components: bool, +) -> Result { + let value: serde_json::Value = + serde_json::from_str(raw).with_context(|| format!("parsing JSON in: {source_label}"))?; let mut sbom = parse::parse_with_format(value, format_hint) - .with_context(|| format!("normalizing SBOM from: {}", path.display()))?; + .with_context(|| format!("normalizing SBOM from: {source_label}"))?; if !include_file_components { parse::filter_file_components(&mut sbom); } Ok(sbom) } +#[allow(clippy::too_many_arguments)] +fn load_sbom_or_attestation( + path: Option<&Path>, + oci_ref: Option<&str>, + cosign_identity: Option<&str>, + cosign_issuer: Option<&str>, + format_hint: Option, + include_file_components: bool, + side: &str, + debug_calibration: bool, + debug_format: crate::cli::DebugFormat, +) -> Result { + if let Some(oci) = oci_ref { + let identity = cosign_identity.ok_or_else(|| { + anyhow::anyhow!( + "--{side}-attestation requires --cosign-identity (regex passed to cosign --certificate-identity-regexp)" + ) + })?; + let issuer = cosign_issuer.ok_or_else(|| { + anyhow::anyhow!( + "--{side}-attestation requires --cosign-issuer (URL passed to cosign --certificate-oidc-issuer)" + ) + })?; + let body = attestation::fetch_verified_sbom(oci, identity, issuer) + .with_context(|| format!("fetching --{side}-attestation {oci}"))?; + if debug_calibration { + // One row per verified attestation; surfaces the cert + // regex cosign accepted so adopters can confirm policy. + let _ = + write_attestation_calibration(&mut std::io::stderr(), oci, identity, debug_format); + } + return parse_sbom_bytes( + &body, + &format!("attestation:{oci}"), + format_hint, + include_file_components, + ); + } + let path = path.ok_or_else(|| { + anyhow::anyhow!( + "internal: {side} requires either a positional path or --{side}-attestation" + ) + })?; + load_sbom(path, format_hint, include_file_components) +} + +fn write_attestation_calibration( + out: &mut W, + oci_ref: &str, + identity: &str, + format: crate::cli::DebugFormat, +) -> std::io::Result<()> { + match format { + crate::cli::DebugFormat::Pipe => { + writeln!(out, "attestation|{oci_ref}|verified|{identity}") + } + crate::cli::DebugFormat::Jsonl => { + let row = serde_json::json!({ + "kind": "attestation", + "key": oci_ref, + "score": "verified", + "threshold": identity, + }); + writeln!(out, "{row}") + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -1011,7 +1167,12 @@ mod tests { fn calibration_pipe_format_matches_v0_7_layout() { let e = enrichment_with_typosquat(); let mut buf = Vec::new(); - write_calibration_lines(&e, &mut buf, crate::cli::DebugFormat::Pipe); + write_calibration_lines( + &e, + &mut buf, + crate::cli::DebugFormat::Pipe, + CalibrationOverrides::default(), + ); let s = String::from_utf8(buf).unwrap(); assert!(s.starts_with("typosquat|"), "got: {s}"); assert_eq!( @@ -1025,7 +1186,12 @@ mod tests { fn calibration_jsonl_format_emits_one_object_per_line() { let e = enrichment_with_typosquat(); let mut buf = Vec::new(); - write_calibration_lines(&e, &mut buf, crate::cli::DebugFormat::Jsonl); + write_calibration_lines( + &e, + &mut buf, + crate::cli::DebugFormat::Jsonl, + CalibrationOverrides::default(), + ); let s = String::from_utf8(buf).unwrap(); let lines: Vec<&str> = s.lines().collect(); assert_eq!(lines.len(), 1); @@ -1040,7 +1206,12 @@ mod tests { fn calibration_jsonl_keeps_severity_label_as_string() { let e = enrichment_with_cve_at(Severity::High); let mut buf = Vec::new(); - write_calibration_lines(&e, &mut buf, crate::cli::DebugFormat::Jsonl); + write_calibration_lines( + &e, + &mut buf, + crate::cli::DebugFormat::Jsonl, + CalibrationOverrides::default(), + ); let s = String::from_utf8(buf).unwrap(); let v: serde_json::Value = serde_json::from_str(s.trim()).unwrap(); assert_eq!(v["kind"], "cve"); @@ -1082,7 +1253,12 @@ mod tests { refs[0].kev = true; } let mut buf = Vec::new(); - write_calibration_lines(&e, &mut buf, crate::cli::DebugFormat::Pipe); + write_calibration_lines( + &e, + &mut buf, + crate::cli::DebugFormat::Pipe, + CalibrationOverrides::default(), + ); let s = String::from_utf8(buf).unwrap(); assert!(s.contains("epss|"), "missing epss row: {s}"); assert!(s.contains("kev|"), "missing kev row: {s}"); @@ -1113,7 +1289,12 @@ mod tests { kind: crate::enrich::LicenseViolationKind::Deny, }); let mut buf = Vec::new(); - write_calibration_lines(&e, &mut buf, crate::cli::DebugFormat::Pipe); + write_calibration_lines( + &e, + &mut buf, + crate::cli::DebugFormat::Pipe, + CalibrationOverrides::default(), + ); let s = String::from_utf8(buf).unwrap(); assert!( s.contains("license|"), diff --git a/src/plugin.rs b/src/plugin.rs new file mode 100644 index 0000000..36250c0 --- /dev/null +++ b/src/plugin.rs @@ -0,0 +1,671 @@ +//! External-process plugin loader (Phase C, v0.9.6). +//! +//! Plugins are external executables that bomdrift shells out to, one +//! invocation per (component, event) pair, with JSON over stdin/stdout. +//! Manifest is TOML, the executable is whatever language the plugin +//! author wants — bomdrift makes no language commitment. +//! +//! ## Protocol (v1) +//! +//! bomdrift writes one JSON object to the plugin's stdin then closes +//! stdin: +//! +//! ```json +//! { +//! "component": { ...component JSON... }, +//! "event": "added" | "version-changed", +//! "before": null | { ...component JSON... } +//! } +//! ``` +//! +//! Plugin writes one JSON object to stdout and exits 0: +//! +//! ```json +//! { +//! "findings": [ +//! { +//! "kind": "string-tag", +//! "message": "...", +//! "severity": "info" | "warning" | "error", +//! "rule_id": "..." +//! } +//! ] +//! } +//! ``` +//! +//! ## Best-effort failures +//! +//! Non-zero exit, malformed JSON, or timeout → plugin findings dropped +//! and a warning emitted to stderr at `BOMDRIFT_DEBUG=1`. The diff +//! still renders. This matches the contract used by every other v0.9 +//! enricher (OSV / EPSS / KEV / Registry). +//! +//! ## Stability +//! +//! The wire shape above is `v1` and may evolve. We expose +//! `protocol_version: 1` on the wire so future bomdrift releases can +//! detect old plugins and stay backward-compatible. Today, plugins +//! that ignore the field continue to work unchanged. + +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; +use std::time::Duration; + +use anyhow::{Context, Result}; +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; + +use crate::diff::ChangeSet; +use crate::model::Component; + +const PROTOCOL_VERSION: u32 = 1; +const DEFAULT_TIMEOUT_MS: u64 = 5000; +/// Polling interval used while waiting for a plugin to exit. Small +/// enough that a 100ms timeout is observed within ~110ms; large enough +/// that a fast plugin doesn't pay a full poll-cycle cost. +const POLL_INTERVAL_MS: u64 = 25; + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum InvokeEvent { + Added, + VersionChanged, +} + +impl InvokeEvent { + fn as_wire(&self) -> &'static str { + match self { + InvokeEvent::Added => "added", + InvokeEvent::VersionChanged => "version-changed", + } + } +} + +#[derive(Debug, Clone, Deserialize)] +struct ManifestFile { + plugin: PluginSection, +} + +#[derive(Debug, Clone, Deserialize)] +struct PluginSection { + name: String, + #[serde(default)] + description: Option, + exec: PathBuf, + #[serde(default = "default_timeout_ms")] + timeout_ms: u64, + #[serde(default = "default_invoke_on")] + invoke_on: Vec, +} + +fn default_timeout_ms() -> u64 { + DEFAULT_TIMEOUT_MS +} +fn default_invoke_on() -> Vec { + vec![InvokeEvent::Added, InvokeEvent::VersionChanged] +} + +#[derive(Debug, Clone, PartialEq)] +pub struct PluginManifest { + pub name: String, + pub description: Option, + /// Resolved absolute path to the plugin executable. Relative paths + /// in the manifest are resolved against the manifest's parent dir + /// at load time so subsequent invocations don't depend on cwd. + pub exec: PathBuf, + pub timeout_ms: u64, + pub invoke_on: Vec, + /// Path the manifest was loaded from. Useful in stderr diagnostics. + pub manifest_path: PathBuf, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum PluginSeverity { + Info, + Warning, + Error, +} + +impl PluginSeverity { + pub fn as_str(self) -> &'static str { + match self { + PluginSeverity::Info => "info", + PluginSeverity::Warning => "warning", + PluginSeverity::Error => "error", + } + } +} + +#[derive(Debug, Clone, PartialEq, Serialize)] +pub struct PluginFinding { + pub plugin_name: String, + pub component_purl: String, + pub kind: String, + pub message: String, + pub severity: PluginSeverity, + pub rule_id: String, +} + +impl PluginFinding { + /// Stable per-finding identity hash for SARIF `partialFingerprints`. + /// Distinct per `(plugin_name, component_purl, rule_id)` so two + /// plugins emitting the same `rule_id` don't collide, and the same + /// plugin emitting the same `rule_id` against two purls produces + /// two distinct fingerprints. + pub fn fingerprint(&self) -> String { + let mut h = Sha256::new(); + h.update(b"bomdrift.plugin|"); + h.update(self.plugin_name.as_bytes()); + h.update(b"|"); + h.update(self.component_purl.as_bytes()); + h.update(b"|"); + h.update(self.rule_id.as_bytes()); + let digest = h.finalize(); + let mut out = String::with_capacity(64); + for byte in digest { + use std::fmt::Write; + let _ = write!(out, "{byte:02x}"); + } + out + } +} + +/// Wire-format input written to plugin stdin. +#[derive(Debug, Serialize)] +struct PluginInput<'a> { + protocol_version: u32, + component: &'a Component, + event: &'a str, + before: Option<&'a Component>, +} + +/// Wire-format output read from plugin stdout. +#[derive(Debug, Deserialize)] +struct PluginOutput { + findings: Vec, +} + +#[derive(Debug, Deserialize)] +struct RawFinding { + kind: String, + message: String, + severity: PluginSeverity, + rule_id: String, +} + +/// Load + validate a plugin manifest from `path`. Resolves relative +/// `exec` paths against the manifest's parent directory. +pub fn load_manifest(path: &Path) -> Result { + let raw = std::fs::read_to_string(path) + .with_context(|| format!("reading plugin manifest: {}", path.display()))?; + let parsed: ManifestFile = toml::from_str(&raw) + .with_context(|| format!("parsing plugin manifest TOML: {}", path.display()))?; + + let exec = if parsed.plugin.exec.is_absolute() { + parsed.plugin.exec + } else if let Some(parent) = path.parent() { + parent.join(&parsed.plugin.exec) + } else { + parsed.plugin.exec + }; + + if parsed.plugin.timeout_ms == 0 { + anyhow::bail!("plugin manifest {}: timeout_ms must be > 0", path.display()); + } + + Ok(PluginManifest { + name: parsed.plugin.name, + description: parsed.plugin.description, + exec, + timeout_ms: parsed.plugin.timeout_ms, + invoke_on: parsed.plugin.invoke_on, + manifest_path: path.to_path_buf(), + }) +} + +/// Run every plugin against the relevant components in `cs` and return +/// the merged finding list. Best-effort: a plugin that errors is logged +/// to stderr at `BOMDRIFT_DEBUG=1` and contributes no findings. +pub fn run_plugins(manifests: &[PluginManifest], cs: &ChangeSet) -> Vec { + let mut out = Vec::new(); + for manifest in manifests { + if manifest.invoke_on.contains(&InvokeEvent::Added) { + for component in &cs.added { + run_one(manifest, component, InvokeEvent::Added, None, &mut out); + } + } + if manifest.invoke_on.contains(&InvokeEvent::VersionChanged) { + for (before, after) in &cs.version_changed { + run_one( + manifest, + after, + InvokeEvent::VersionChanged, + Some(before), + &mut out, + ); + } + } + } + out +} + +fn run_one( + manifest: &PluginManifest, + component: &Component, + event: InvokeEvent, + before: Option<&Component>, + out: &mut Vec, +) { + let purl = component + .purl + .clone() + .unwrap_or_else(|| component.name.clone()); + + match invoke_blocking(manifest, component, event, before) { + Ok(findings) => { + for f in findings { + out.push(PluginFinding { + plugin_name: manifest.name.clone(), + component_purl: purl.clone(), + kind: f.kind, + message: f.message, + severity: f.severity, + rule_id: f.rule_id, + }); + } + } + Err(err) => { + if std::env::var("BOMDRIFT_DEBUG").is_ok() { + eprintln!("plugin {} on {}: {err:#}", manifest.name, purl); + } + } + } +} + +fn invoke_blocking( + manifest: &PluginManifest, + component: &Component, + event: InvokeEvent, + before: Option<&Component>, +) -> Result> { + use std::io::Write; + + let input = PluginInput { + protocol_version: PROTOCOL_VERSION, + component, + event: event.as_wire(), + before, + }; + let stdin_bytes = serde_json::to_vec(&input).context("serializing plugin stdin payload")?; + + let mut child = Command::new(&manifest.exec) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .with_context(|| format!("spawning plugin executable: {}", manifest.exec.display()))?; + + if let Some(mut stdin) = child.stdin.take() { + stdin + .write_all(&stdin_bytes) + .context("writing plugin stdin")?; + // Drop stdin to send EOF. + } + + let timeout = Duration::from_millis(manifest.timeout_ms); + let poll = Duration::from_millis(POLL_INTERVAL_MS); + let start = std::time::Instant::now(); + loop { + match child.try_wait().context("polling plugin process")? { + Some(status) => { + let mut stdout = String::new(); + if let Some(mut s) = child.stdout.take() { + use std::io::Read; + let _ = s.read_to_string(&mut stdout); + } + if !status.success() { + let mut stderr = String::new(); + if let Some(mut s) = child.stderr.take() { + use std::io::Read; + let _ = s.read_to_string(&mut stderr); + } + anyhow::bail!("plugin exited {status}; stderr: {}", stderr.trim()); + } + let parsed: PluginOutput = + serde_json::from_str(stdout.trim()).with_context(|| { + format!("parsing plugin stdout as JSON (got {} bytes)", stdout.len()) + })?; + return Ok(parsed.findings); + } + None => { + if start.elapsed() >= timeout { + let _ = child.kill(); + let _ = child.wait(); + anyhow::bail!("plugin timed out after {}ms", manifest.timeout_ms); + } + std::thread::sleep(poll); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + #[cfg(unix)] + use crate::model::{Ecosystem, Relationship}; + + fn write_manifest(dir: &Path, body: &str) -> PathBuf { + let path = dir.join("plugin.toml"); + std::fs::write(&path, body).unwrap(); + path + } + + fn unique_dir(stem: &str) -> PathBuf { + let p = std::env::temp_dir().join(format!( + "bomdrift-plugin-test-{stem}-{}-{}", + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0) + )); + std::fs::create_dir_all(&p).unwrap(); + p + } + + #[cfg(unix)] + fn comp(name: &str) -> Component { + Component { + name: name.to_string(), + version: "1.0.0".to_string(), + ecosystem: Ecosystem::Npm, + purl: Some(format!("pkg:npm/{name}@1.0.0")), + licenses: Vec::new(), + supplier: None, + hashes: Vec::new(), + relationship: Relationship::Unknown, + source_url: None, + bom_ref: None, + } + } + + #[cfg(unix)] + fn write_script(dir: &Path, name: &str, body: &str) -> PathBuf { + use std::os::unix::fs::PermissionsExt; + let path = dir.join(name); + std::fs::write(&path, body).unwrap(); + let mut perms = std::fs::metadata(&path).unwrap().permissions(); + perms.set_mode(0o755); + std::fs::set_permissions(&path, perms).unwrap(); + path + } + + #[test] + fn manifest_parses_minimum_valid_toml() { + let dir = unique_dir("min-manifest"); + let path = write_manifest( + &dir, + r#" +[plugin] +name = "demo" +exec = "./run.sh" +"#, + ); + let m = load_manifest(&path).expect("parses"); + assert_eq!(m.name, "demo"); + assert_eq!(m.timeout_ms, DEFAULT_TIMEOUT_MS); + assert_eq!( + m.invoke_on, + vec![InvokeEvent::Added, InvokeEvent::VersionChanged] + ); + assert!(m.exec.is_absolute(), "relative exec must resolve"); + let _ = std::fs::remove_dir_all(&dir); + } + + #[test] + fn manifest_parses_full_toml() { + // Use a platform-appropriate absolute path so this test runs + // identically on Unix and Windows. The semantic under test is + // "absolute path in TOML is preserved as-is, not joined to + // manifest dir." + #[cfg(unix)] + let abs_exec = "/abs/path/to/check"; + #[cfg(windows)] + let abs_exec = "C:\\\\abs\\\\path\\\\to\\\\check"; + let dir = unique_dir("full-manifest"); + let path = write_manifest( + &dir, + &format!( + r#" +[plugin] +name = "banned-packages" +description = "Flag dependencies on org-banned packages" +exec = "{abs_exec}" +timeout_ms = 10000 +invoke_on = ["added"] +"# + ), + ); + let m = load_manifest(&path).expect("parses"); + assert_eq!(m.name, "banned-packages"); + assert_eq!( + m.description.as_deref(), + Some("Flag dependencies on org-banned packages") + ); + assert_eq!(m.timeout_ms, 10000); + assert_eq!(m.invoke_on, vec![InvokeEvent::Added]); + #[cfg(unix)] + assert_eq!(m.exec, PathBuf::from("/abs/path/to/check")); + #[cfg(windows)] + assert_eq!(m.exec, PathBuf::from("C:\\abs\\path\\to\\check")); + let _ = std::fs::remove_dir_all(&dir); + } + + #[test] + fn manifest_missing_exec_field_errors() { + let dir = unique_dir("missing-exec"); + let path = write_manifest( + &dir, + r#" +[plugin] +name = "broken" +"#, + ); + let err = load_manifest(&path).unwrap_err(); + let msg = format!("{err:#}"); + assert!(msg.contains("exec"), "error must mention exec; got: {msg}"); + let _ = std::fs::remove_dir_all(&dir); + } + + #[test] + fn manifest_unknown_invoke_on_value_errors() { + let dir = unique_dir("bad-event"); + let path = write_manifest( + &dir, + r#" +[plugin] +name = "broken" +exec = "./x" +invoke_on = ["removed"] +"#, + ); + let err = load_manifest(&path).unwrap_err(); + let msg = format!("{err:#}"); + assert!( + msg.contains("removed") || msg.contains("invoke_on") || msg.contains("variant"), + "error must surface the bad enum value; got: {msg}" + ); + let _ = std::fs::remove_dir_all(&dir); + } + + #[cfg(unix)] + #[test] + fn plugin_invocation_returns_findings() { + let dir = unique_dir("happy"); + let exec = write_script( + &dir, + "ok.sh", + "#!/bin/sh\ncat > /dev/null\ncat <<'EOF'\n{\"findings\":[{\"kind\":\"banned\",\"message\":\"left-pad is banned\",\"severity\":\"warning\",\"rule_id\":\"banned/left-pad\"}]}\nEOF\n", + ); + let manifest = PluginManifest { + name: "demo".into(), + description: None, + exec, + timeout_ms: 5000, + invoke_on: vec![InvokeEvent::Added], + manifest_path: dir.clone(), + }; + let cs = ChangeSet { + added: vec![comp("left-pad")], + ..Default::default() + }; + let findings = run_plugins(std::slice::from_ref(&manifest), &cs); + assert_eq!(findings.len(), 1); + assert_eq!(findings[0].plugin_name, "demo"); + assert_eq!(findings[0].kind, "banned"); + assert_eq!(findings[0].rule_id, "banned/left-pad"); + assert_eq!(findings[0].severity, PluginSeverity::Warning); + let _ = std::fs::remove_dir_all(&dir); + } + + #[cfg(unix)] + #[test] + fn plugin_timeout_drops_findings() { + let dir = unique_dir("timeout"); + let exec = write_script(&dir, "slow.sh", "#!/bin/sh\nsleep 10\n"); + let manifest = PluginManifest { + name: "slow".into(), + description: None, + exec, + timeout_ms: 100, + invoke_on: vec![InvokeEvent::Added], + manifest_path: dir.clone(), + }; + let cs = ChangeSet { + added: vec![comp("foo")], + ..Default::default() + }; + let started = std::time::Instant::now(); + let findings = run_plugins(std::slice::from_ref(&manifest), &cs); + let elapsed = started.elapsed(); + assert!(findings.is_empty()); + assert!( + elapsed < Duration::from_secs(3), + "timeout must fire well before sleep 10 completes; elapsed={elapsed:?}" + ); + let _ = std::fs::remove_dir_all(&dir); + } + + #[cfg(unix)] + #[test] + fn plugin_nonzero_exit_drops_findings() { + let dir = unique_dir("nonzero"); + let exec = write_script(&dir, "fail.sh", "#!/bin/sh\nexit 1\n"); + let manifest = PluginManifest { + name: "fail".into(), + description: None, + exec, + timeout_ms: 5000, + invoke_on: vec![InvokeEvent::Added], + manifest_path: dir.clone(), + }; + let cs = ChangeSet { + added: vec![comp("foo")], + ..Default::default() + }; + let findings = run_plugins(std::slice::from_ref(&manifest), &cs); + assert!(findings.is_empty()); + let _ = std::fs::remove_dir_all(&dir); + } + + #[cfg(unix)] + #[test] + fn plugin_malformed_json_drops_findings() { + let dir = unique_dir("badjson"); + let exec = write_script( + &dir, + "bad.sh", + "#!/bin/sh\ncat > /dev/null\necho 'not json'\n", + ); + let manifest = PluginManifest { + name: "bad".into(), + description: None, + exec, + timeout_ms: 5000, + invoke_on: vec![InvokeEvent::Added], + manifest_path: dir.clone(), + }; + let cs = ChangeSet { + added: vec![comp("foo")], + ..Default::default() + }; + let findings = run_plugins(std::slice::from_ref(&manifest), &cs); + assert!(findings.is_empty()); + let _ = std::fs::remove_dir_all(&dir); + } + + #[cfg(unix)] + #[test] + fn two_plugins_findings_are_merged() { + let dir = unique_dir("two"); + let exec_a = write_script( + &dir, + "a.sh", + "#!/bin/sh\ncat > /dev/null\necho '{\"findings\":[{\"kind\":\"k1\",\"message\":\"a\",\"severity\":\"info\",\"rule_id\":\"a-1\"}]}'\n", + ); + let exec_b = write_script( + &dir, + "b.sh", + "#!/bin/sh\ncat > /dev/null\necho '{\"findings\":[{\"kind\":\"k2\",\"message\":\"b\",\"severity\":\"error\",\"rule_id\":\"b-1\"}]}'\n", + ); + let m_a = PluginManifest { + name: "a".into(), + description: None, + exec: exec_a, + timeout_ms: 5000, + invoke_on: vec![InvokeEvent::Added], + manifest_path: dir.clone(), + }; + let m_b = PluginManifest { + name: "b".into(), + description: None, + exec: exec_b, + timeout_ms: 5000, + invoke_on: vec![InvokeEvent::Added], + manifest_path: dir.clone(), + }; + let cs = ChangeSet { + added: vec![comp("foo")], + ..Default::default() + }; + let findings = run_plugins(&[m_a, m_b], &cs); + assert_eq!(findings.len(), 2); + let names: Vec<&str> = findings.iter().map(|f| f.plugin_name.as_str()).collect(); + assert!(names.contains(&"a")); + assert!(names.contains(&"b")); + let _ = std::fs::remove_dir_all(&dir); + } + + #[test] + fn fingerprint_is_stable_and_distinct() { + let mk = |plugin: &str, purl: &str, rule: &str| PluginFinding { + plugin_name: plugin.into(), + component_purl: purl.into(), + kind: "k".into(), + message: "m".into(), + severity: PluginSeverity::Info, + rule_id: rule.into(), + }; + let a = mk("p1", "pkg:npm/x", "r1").fingerprint(); + let a2 = mk("p1", "pkg:npm/x", "r1").fingerprint(); + let b = mk("p2", "pkg:npm/x", "r1").fingerprint(); + let c = mk("p1", "pkg:npm/y", "r1").fingerprint(); + let d = mk("p1", "pkg:npm/x", "r2").fingerprint(); + assert_eq!(a, a2, "byte-stable for the same identity"); + assert_ne!(a, b, "distinct per plugin_name"); + assert_ne!(a, c, "distinct per purl"); + assert_ne!(a, d, "distinct per rule_id"); + } +} diff --git a/src/render/json.rs b/src/render/json.rs index 25be12d..2227267 100644 --- a/src/render/json.rs +++ b/src/render/json.rs @@ -210,6 +210,7 @@ mod tests { maintainer_set_changed: Vec::new(), vex_annotations: HashMap::new(), vex_suppressed_count: 0, + plugin_findings: Vec::new(), }; let cs = ChangeSet::default(); diff --git a/src/render/markdown.rs b/src/render/markdown.rs index e41c617..3ce92b7 100644 --- a/src/render/markdown.rs +++ b/src/render/markdown.rs @@ -146,6 +146,13 @@ pub fn render_with_options(cs: &ChangeSet, enrichment: &Enrichment, opts: Option enrichment.maintainer_set_changed.len() ); } + if !enrichment.plugin_findings.is_empty() { + let _ = writeln!( + out, + "| Plugin findings | {} |", + enrichment.plugin_findings.len() + ); + } if enrichment.vex_suppressed_count > 0 { let _ = writeln!( out, @@ -451,6 +458,43 @@ pub fn render_with_options(cs: &ChangeSet, enrichment: &Enrichment, opts: Option section_close(&mut out); } + if !enrichment.plugin_findings.is_empty() { + // Group findings by plugin_name so each plugin gets its own + // subsection. Use a BTreeMap-style stable ordering (already + // pre-sorted by run_plugins via manifest order); insertion + // order is preserved within each group. + use std::collections::BTreeMap; + let mut by_plugin: BTreeMap<&str, Vec<&crate::plugin::PluginFinding>> = BTreeMap::new(); + for f in &enrichment.plugin_findings { + by_plugin.entry(f.plugin_name.as_str()).or_default().push(f); + } + let total = enrichment.plugin_findings.len(); + section_open(&mut out, "Plugin findings", total, None); + out.push_str( + "External plugins reported the following findings against added \ + or version-changed components. Plugin findings are best-effort \ + — runtime failures (timeout, malformed JSON, non-zero exit) \ + drop findings without failing the diff.\n\n", + ); + for (name, findings) in &by_plugin { + let _ = writeln!(out, "**{name}** ({})\n", findings.len()); + for f in findings { + let prefix = match f.severity { + crate::plugin::PluginSeverity::Info => "ℹ️ info", + crate::plugin::PluginSeverity::Warning => "⚠️ warning", + crate::plugin::PluginSeverity::Error => "❌ error", + }; + let _ = writeln!( + out, + "- {prefix} · `{}` · {} — {} (`{}`)", + f.component_purl, f.kind, f.message, f.rule_id, + ); + } + out.push('\n'); + } + section_close(&mut out); + } + write_footer(&mut out, &opts); out diff --git a/src/render/sarif.rs b/src/render/sarif.rs index ce99c56..4d19a7e 100644 --- a/src/render/sarif.rs +++ b/src/render/sarif.rs @@ -183,6 +183,20 @@ fn rules() -> Value { Severity `warning`.", "https://metbcy.github.io/bomdrift/enrichers/registry.html", ), + rule( + "bomdrift.plugin", + "plugin", + "External plugin reported a finding", + "An external plugin (loaded via --plugin manifest.toml) \ + reported a finding against an added or version-changed \ + component. The plugin name and finding kind are recorded \ + on the result's `properties` for filtering. Severity is \ + plugin-controlled (info → note, warning → warning, error \ + → error). Plugin findings are best-effort — runtime \ + failures (timeout, malformed JSON, non-zero exit) drop \ + findings without failing the diff.", + "https://metbcy.github.io/bomdrift/plugins.html", + ), ]) } @@ -535,9 +549,46 @@ fn results(cs: &ChangeSet, e: &Enrichment) -> Value { })); } + // ---- bomdrift.plugin ---- + // Plugin findings are pre-ordered by run_plugins() (manifest order + // outer, cs.added/version_changed inner — both already deterministic + // since cs.added is BTreeMap-derived and the manifest list is the + // user's CLI order). Emit verbatim. + for f in &e.plugin_findings { + let fp = f.fingerprint(); + out.push(json!({ + "ruleId": "bomdrift.plugin", + "level": plugin_sarif_level(f.severity), + "message": { + "text": format!( + "{} ({}): {}", + f.plugin_name, f.kind, f.message, + ), + }, + "locations": [synthetic_location()], + "partialFingerprints": { "primaryHash/v1": fp }, + "properties": { + "pluginName": f.plugin_name, + "findingKind": f.kind, + "ruleId": f.rule_id, + "purl": f.component_purl, + "severity": f.severity.as_str(), + }, + })); + } + Value::Array(out) } +fn plugin_sarif_level(severity: crate::plugin::PluginSeverity) -> &'static str { + use crate::plugin::PluginSeverity; + match severity { + PluginSeverity::Info => "note", + PluginSeverity::Warning => "warning", + PluginSeverity::Error => "error", + } +} + fn synthetic_location() -> Value { json!({ "physicalLocation": { @@ -610,6 +661,7 @@ mod tests { "bomdrift.recently-published", "bomdrift.deprecated", "bomdrift.maintainer-set-changed", + "bomdrift.plugin", ], "rule IDs are stable public API — order also stable for byte-determinism", ); @@ -1090,4 +1142,52 @@ mod tests { let r_exception_2 = render(&ChangeSet::default(), &e_exception); assert_eq!(parse(&r_exception_2), fp_ex); } + + #[test] + fn plugin_findings_emit_sarif_results_with_distinct_fingerprints() { + use crate::plugin::{PluginFinding, PluginSeverity}; + let mut e = Enrichment::default(); + e.plugin_findings.push(PluginFinding { + plugin_name: "banned".into(), + component_purl: "pkg:npm/left-pad@1.0.0".into(), + kind: "banned-package".into(), + message: "left-pad is banned".into(), + severity: PluginSeverity::Warning, + rule_id: "banned/left-pad".into(), + }); + e.plugin_findings.push(PluginFinding { + plugin_name: "banned".into(), + component_purl: "pkg:npm/right-pad@2.0.0".into(), + kind: "banned-package".into(), + message: "right-pad is banned".into(), + severity: PluginSeverity::Error, + rule_id: "banned/right-pad".into(), + }); + let s = render(&ChangeSet::default(), &e); + let v: Value = serde_json::from_str(&s).unwrap(); + let results = v["runs"][0]["results"].as_array().unwrap(); + let plugin_results: Vec<&Value> = results + .iter() + .filter(|r| r["ruleId"] == "bomdrift.plugin") + .collect(); + assert_eq!(plugin_results.len(), 2); + + let fp1 = plugin_results[0]["partialFingerprints"]["primaryHash/v1"] + .as_str() + .unwrap(); + let fp2 = plugin_results[1]["partialFingerprints"]["primaryHash/v1"] + .as_str() + .unwrap(); + assert_ne!(fp1, fp2, "distinct fingerprints per (purl, rule_id)"); + assert_eq!(plugin_results[0]["properties"]["pluginName"], "banned"); + assert_eq!( + plugin_results[0]["properties"]["findingKind"], + "banned-package" + ); + assert_eq!(plugin_results[1]["level"], "error"); + + // Render twice must produce byte-equal output. + let s2 = render(&ChangeSet::default(), &e); + assert_eq!(s, s2); + } } diff --git a/src/render/term.rs b/src/render/term.rs index 95ce088..1b6b8fd 100644 --- a/src/render/term.rs +++ b/src/render/term.rs @@ -191,6 +191,32 @@ pub fn render_with_color(cs: &ChangeSet, enrichment: &Enrichment, color: ColorCh out.push('\n'); } + if !enrichment.plugin_findings.is_empty() { + let _ = writeln!( + out, + "Plugin findings ({}):", + enrichment.plugin_findings.len() + ); + for f in &enrichment.plugin_findings { + let (token, tone) = match f.severity { + crate::plugin::PluginSeverity::Info => ("[PLG]", Tone::Info), + crate::plugin::PluginSeverity::Warning => ("[PLG]", Tone::Caution), + crate::plugin::PluginSeverity::Error => ("[PLG]", Tone::High), + }; + let _ = writeln!( + out, + " {} {} :: {} :: {} - {} ({})", + tag(token, tone, color), + f.plugin_name, + f.component_purl, + f.kind, + f.message, + f.rule_id, + ); + } + out.push('\n'); + } + out } diff --git a/tests/cli.rs b/tests/cli.rs index 88b38e3..f2e017c 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -1039,3 +1039,271 @@ fn diff_debug_calibration_prints_csv_lines_to_stderr() { "calibration output must NOT leak into stdout; got:\n{stdout}" ); } + +#[test] +fn diff_typosquat_similarity_threshold_rejects_out_of_range() { + // clap value_parser must reject < 0.0 / > 1.0 / non-numeric. + for bad in &["-0.1", "1.5", "two"] { + let out = Command::new(bin()) + .current_dir(manifest_dir()) + .args([ + "diff", + "tests/fixtures/cdx-minimal.json", + "tests/fixtures/cdx-after.json", + "--no-osv", + "--typosquat-similarity-threshold", + bad, + ]) + .output() + .expect("spawn bomdrift"); + assert!( + !out.status.success(), + "expected clap to reject --typosquat-similarity-threshold {bad}, but exit was {}", + out.status + ); + } +} + +#[test] +fn diff_young_maintainer_days_rejects_zero_and_negative() { + for bad in &["0", "-1", "abc"] { + let out = Command::new(bin()) + .current_dir(manifest_dir()) + .args([ + "diff", + "tests/fixtures/cdx-minimal.json", + "tests/fixtures/cdx-after.json", + "--no-osv", + "--young-maintainer-days", + bad, + ]) + .output() + .expect("spawn bomdrift"); + assert!( + !out.status.success(), + "expected clap to reject --young-maintainer-days {bad}" + ); + } +} + +#[test] +fn diff_cache_ttl_hours_rejects_zero_and_negative() { + for bad in &["0", "-3", "x"] { + let out = Command::new(bin()) + .current_dir(manifest_dir()) + .args([ + "diff", + "tests/fixtures/cdx-minimal.json", + "tests/fixtures/cdx-after.json", + "--no-osv", + "--cache-ttl-hours", + bad, + ]) + .output() + .expect("spawn bomdrift"); + assert!( + !out.status.success(), + "expected clap to reject --cache-ttl-hours {bad}" + ); + } +} + +#[test] +fn diff_typosquat_similarity_threshold_changes_calibration_row() { + // With a near-1.0 threshold, no typosquat findings surface — so no + // typosquat row in calibration output. Compare against the default + // run which DOES produce one. + let out = Command::new(bin()) + .current_dir(manifest_dir()) + .args([ + "diff", + "tests/fixtures/cdx-minimal.json", + "tests/fixtures/cdx-after.json", + "--no-osv", + "--debug-calibration", + "--typosquat-similarity-threshold", + "0.999", + ]) + .output() + .expect("spawn bomdrift"); + assert!(out.status.success()); + let stderr = String::from_utf8(out.stderr).expect("stderr utf-8"); + let typosquat_lines: Vec<&str> = stderr + .lines() + .filter(|l| l.starts_with("typosquat|")) + .collect(); + assert!( + typosquat_lines.is_empty(), + "raising similarity threshold to 0.999 must drop the existing typosquat finding; got: {stderr}" + ); +} + +#[test] +fn diff_typosquat_similarity_threshold_surfaces_in_calibration_threshold_field() { + // Pick a low threshold that the existing axios-fixture finding still + // clears, and assert the threshold COLUMN reflects the override (not + // the unconditional 0.92 default). + let out = Command::new(bin()) + .current_dir(manifest_dir()) + .args([ + "diff", + "tests/fixtures/cdx-minimal.json", + "tests/fixtures/cdx-after.json", + "--no-osv", + "--debug-calibration", + "--typosquat-similarity-threshold", + "0.5", + ]) + .output() + .expect("spawn bomdrift"); + assert!(out.status.success()); + let stderr = String::from_utf8(out.stderr).expect("stderr utf-8"); + let line = stderr + .lines() + .find(|l| l.starts_with("typosquat|")) + .expect("typosquat row"); + let fields: Vec<&str> = line.split('|').collect(); + let threshold: f64 = fields[3].parse().expect("threshold is float"); + assert!( + (threshold - 0.5).abs() < 1e-9, + "calibration threshold field must reflect the active override; got {threshold}" + ); +} + +#[test] +fn diff_before_attestation_conflicts_with_positional_before() { + let out = Command::new(bin()) + .current_dir(manifest_dir()) + .args([ + "diff", + "tests/fixtures/cdx-minimal.json", + "tests/fixtures/cdx-after.json", + "--before-attestation", + "ghcr.io/example/img:tag", + "--cosign-identity", + "https://github.com/example/.+", + "--cosign-issuer", + "https://token.actions.githubusercontent.com", + ]) + .output() + .expect("spawn bomdrift"); + assert!( + !out.status.success(), + "expected clap to reject --before-attestation alongside positional `before`" + ); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("cannot be used with") || stderr.contains("conflicts"), + "expected conflicts-with diagnostic; got: {stderr}" + ); +} + +#[test] +fn diff_require_attestation_demands_both_attestation_flags() { + let out = Command::new(bin()) + .current_dir(manifest_dir()) + .args([ + "diff", + "tests/fixtures/cdx-minimal.json", + "tests/fixtures/cdx-after.json", + "--require-attestation", + ]) + .output() + .expect("spawn bomdrift"); + assert!( + !out.status.success(), + "expected clap to reject --require-attestation without both attestation flags" + ); +} + +#[cfg(unix)] +#[test] +fn diff_plugin_end_to_end_emits_finding_in_markdown_and_sarif() { + use std::io::Write; + use std::os::unix::fs::PermissionsExt; + + let dir = temp_dir("plugin-e2e"); + + let script = dir.join("flag.sh"); + { + let mut f = fs::File::create(&script).unwrap(); + f.write_all( + b"#!/bin/sh\ncat > /dev/null\necho '{\"findings\":[{\"kind\":\"banned\",\"message\":\"left-pad is banned\",\"severity\":\"warning\",\"rule_id\":\"banned/left-pad\"}]}'\n" + ).unwrap(); + f.sync_all().unwrap(); + } + let mut perms = fs::metadata(&script).unwrap().permissions(); + perms.set_mode(0o755); + fs::set_permissions(&script, perms).unwrap(); + + let manifest_path = dir.join("plugin.toml"); + fs::write( + &manifest_path, + format!( + r#" +[plugin] +name = "banned" +exec = "{}" +invoke_on = ["added"] +"#, + script.display() + ), + ) + .unwrap(); + + // Markdown + let md = Command::new(bin()) + .current_dir(manifest_dir()) + .args([ + "diff", + "tests/fixtures/cdx-minimal.json", + "tests/fixtures/cdx-after.json", + "--no-osv", + "--output", + "markdown", + "--plugin", + &manifest_path.display().to_string(), + ]) + .output() + .expect("spawn bomdrift"); + assert!( + md.status.success(), + "exit: {} stderr: {}", + md.status, + String::from_utf8_lossy(&md.stderr) + ); + let stdout = String::from_utf8_lossy(&md.stdout); + assert!( + stdout.contains("Plugin findings"), + "missing 'Plugin findings' header in:\n{stdout}" + ); + assert!( + stdout.contains("banned/left-pad"), + "missing rule_id in markdown:\n{stdout}" + ); + + // SARIF + let sarif = Command::new(bin()) + .current_dir(manifest_dir()) + .args([ + "diff", + "tests/fixtures/cdx-minimal.json", + "tests/fixtures/cdx-after.json", + "--no-osv", + "--output", + "sarif", + "--plugin", + &manifest_path.display().to_string(), + ]) + .output() + .expect("spawn bomdrift"); + assert!(sarif.status.success()); + let sarif_str = String::from_utf8_lossy(&sarif.stdout); + assert!( + sarif_str.contains("\"ruleId\": \"bomdrift.plugin\""), + "missing bomdrift.plugin result in SARIF" + ); + assert!(sarif_str.contains("banned/left-pad")); + + fs::remove_dir_all(&dir).ok(); +}