From d34761a76e96557955eb5dc140bb7d2f975c6c8f Mon Sep 17 00:00:00 2001 From: Metbcy Date: Wed, 29 Apr 2026 13:09:59 -0700 Subject: [PATCH 01/10] feat(clock): time crate adoption + clock module honors SOURCE_DATE_EPOCH Add time = 0.3 and sha2 = 0.10 (sha2 lands here to keep the dep churn in one commit; used by Phase A SARIF fingerprints). New src/clock.rs is the single source of truth for date/time: - now()/today() honor SOURCE_DATE_EPOCH (env read per-call so fixtures can vary it between scenarios) - parse_ymd is strict: rejects non-zero-padded YYYY-MM-DD - format_rfc3339 + format_ymd byte-deterministic emitters No public surface change yet; subsequent phases consume this. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Cargo.lock | 126 ++++++++++++++++++++++++++++++++++++++ Cargo.toml | 2 + src/clock.rs | 168 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + 4 files changed, 297 insertions(+) create mode 100644 src/clock.rs diff --git a/Cargo.lock b/Cargo.lock index b23f056..1255455 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -112,6 +112,15 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bomdrift" version = "0.7.0" @@ -124,9 +133,11 @@ dependencies = [ "proptest", "serde", "serde_json", + "sha2", "strsim", "supports-color 3.0.2", "thiserror", + "time", "toml", "ureq", ] @@ -226,6 +237,15 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + [[package]] name = "crc32fast" version = "1.5.0" @@ -275,6 +295,36 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "deranged" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" +dependencies = [ + "powerfmt", + "serde_core", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "directories" version = "6.0.0" @@ -372,6 +422,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.17" @@ -659,6 +719,12 @@ dependencies = [ "simd-adler32", ] +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + [[package]] name = "num-traits" version = "0.2.19" @@ -717,6 +783,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "ppv-lite86" version = "0.2.21" @@ -1010,6 +1082,17 @@ dependencies = [ "serde", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "shlex" version = "1.3.0" @@ -1120,6 +1203,37 @@ dependencies = [ "syn", ] +[[package]] +name = "time" +version = "0.3.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9e442fc33d7fdb45aa9bfeb312c095964abdf596f7567261062b2a7107aaabd" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde_core", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b36ee98fd31ec7426d599183e8fe26932a8dc1fb76ddb6214d05493377d34ca" + +[[package]] +name = "time-macros" +version = "0.2.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71e552d1249bf61ac2a52db88179fd0673def1e1ad8243a00d9ec9ed71fee3dd" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "tinystr" version = "0.8.3" @@ -1181,6 +1295,12 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" +[[package]] +name = "typenum" +version = "1.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" + [[package]] name = "unarray" version = "0.1.4" @@ -1247,6 +1367,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "wait-timeout" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index 04f4f50..713e63b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,8 @@ owo-colors = { version = "4", features = ["supports-colors"] } supports-color = "3" directories = "6" toml = "0.8" +time = { version = "0.3", default-features = false, features = ["serde", "parsing", "formatting", "macros", "std"] } +sha2 = { version = "0.10", default-features = false } [dev-dependencies] criterion = { version = "0.5", default-features = false, features = ["html_reports"] } diff --git a/src/clock.rs b/src/clock.rs new file mode 100644 index 0000000..02015b4 --- /dev/null +++ b/src/clock.rs @@ -0,0 +1,168 @@ +//! Single source of truth for date/time. Honors `SOURCE_DATE_EPOCH` so +//! every timestamp/date emitted by bomdrift in production paths is +//! reproducible across runs when the env var is set. +//! +//! Why one module: byte-deterministic SARIF, VEX (v0.9), baseline expiry, +//! and any audit-log-style output must agree on "now" / "today". This +//! module is the only place we read the system clock or `SOURCE_DATE_EPOCH`. + +use std::env; + +use anyhow::{Context, Result, anyhow}; +use time::format_description::well_known::Rfc3339; +use time::macros::format_description; +use time::{Date, OffsetDateTime}; + +/// Returns the current time in UTC, honoring `SOURCE_DATE_EPOCH` when set. +/// +/// The env is read on every call (not cached at startup) so test fixtures +/// can vary it between scenarios. If `SOURCE_DATE_EPOCH` is set but +/// malformed, we fall back to `now_utc()` rather than panic — this matches +/// the reproducible-builds spec's "best-effort" guidance. +pub fn now() -> OffsetDateTime { + if let Ok(raw) = env::var("SOURCE_DATE_EPOCH") + && let Ok(secs) = raw.trim().parse::() + && let Ok(t) = OffsetDateTime::from_unix_timestamp(secs) + { + return t; + } + OffsetDateTime::now_utc() +} + +/// Today's date in UTC, honoring `SOURCE_DATE_EPOCH`. +pub fn today() -> Date { + now().date() +} + +/// Format an `OffsetDateTime` as RFC 3339 (e.g. `2026-04-29T12:34:56Z`). +pub fn format_rfc3339(t: OffsetDateTime) -> String { + t.format(&Rfc3339) + .unwrap_or_else(|_| "1970-01-01T00:00:00Z".to_string()) +} + +/// Strict `YYYY-MM-DD` parser. Rejects non-zero-padded inputs (e.g. +/// `2026-4-29`) so baseline files don't drift between locales/tools. +pub fn parse_ymd(s: &str) -> Result { + let fmt = format_description!("[year]-[month]-[day]"); + Date::parse(s, fmt).with_context(|| format!("invalid YYYY-MM-DD date: {s:?}")) +} + +/// Format a `Date` as `YYYY-MM-DD` (zero-padded). +pub fn format_ymd(d: Date) -> String { + let fmt = format_description!("[year]-[month]-[day]"); + d.format(fmt).unwrap_or_else(|_| "1970-01-01".to_string()) +} + +/// Returns true when `expires` is strictly before `today()`. +pub fn is_expired(expires: Date) -> bool { + expires < today() +} + +/// Convenience: parse a `YYYY-MM-DD` string and return whether it has +/// expired relative to `today()`. Surface parse errors to caller. +pub fn is_expired_str(s: &str) -> Result { + parse_ymd(s).map(is_expired).map_err(|e| anyhow!("{}", e)) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Guard env mutations behind a process-wide mutex so concurrent + /// `cargo test` threads don't trample each other. + fn env_lock() -> std::sync::MutexGuard<'static, ()> { + use std::sync::{Mutex, OnceLock}; + static LOCK: OnceLock> = OnceLock::new(); + LOCK.get_or_init(|| Mutex::new(())) + .lock() + .unwrap_or_else(|e| e.into_inner()) + } + + #[test] + fn parse_ymd_accepts_valid() { + let d = parse_ymd("2026-04-29").unwrap(); + assert_eq!(format_ymd(d), "2026-04-29"); + } + + #[test] + fn parse_ymd_rejects_malformed() { + assert!(parse_ymd("2026/04/29").is_err()); + assert!(parse_ymd("not-a-date").is_err()); + assert!(parse_ymd("").is_err()); + } + + #[test] + fn parse_ymd_rejects_non_zero_padded() { + assert!(parse_ymd("2026-4-29").is_err()); + assert!(parse_ymd("2026-04-9").is_err()); + } + + #[test] + fn now_honors_source_date_epoch() { + let _g = env_lock(); + // 2026-05-01T00:00:00Z = 1777593600 + // SAFETY: env mutation guarded by process-wide mutex above. + unsafe { + env::set_var("SOURCE_DATE_EPOCH", "1777593600"); + } + let t = now(); + assert_eq!(t.unix_timestamp(), 1777593600); + assert_eq!(format_ymd(t.date()), "2026-05-01"); + unsafe { + env::remove_var("SOURCE_DATE_EPOCH"); + } + } + + #[test] + fn now_is_read_per_call_not_cached() { + let _g = env_lock(); + unsafe { + env::set_var("SOURCE_DATE_EPOCH", "1000000000"); + } + let a = now(); + unsafe { + env::set_var("SOURCE_DATE_EPOCH", "2000000000"); + } + let b = now(); + assert_ne!(a.unix_timestamp(), b.unix_timestamp()); + assert_eq!(a.unix_timestamp(), 1000000000); + assert_eq!(b.unix_timestamp(), 2000000000); + unsafe { + env::remove_var("SOURCE_DATE_EPOCH"); + } + } + + #[test] + fn malformed_source_date_epoch_falls_back() { + let _g = env_lock(); + unsafe { + env::set_var("SOURCE_DATE_EPOCH", "not-a-number"); + } + // Should not panic; returns system clock now. + let _ = now(); + unsafe { + env::remove_var("SOURCE_DATE_EPOCH"); + } + } + + #[test] + fn format_rfc3339_round_trip() { + let t = OffsetDateTime::from_unix_timestamp(1777593600).unwrap(); + let s = format_rfc3339(t); + assert_eq!(s, "2026-05-01T00:00:00Z"); + } + + #[test] + fn is_expired_ordering() { + let _g = env_lock(); + unsafe { + env::set_var("SOURCE_DATE_EPOCH", "1777593600"); + } // 2026-05-01 + assert!(is_expired(parse_ymd("2026-04-30").unwrap())); + assert!(!is_expired(parse_ymd("2026-05-01").unwrap())); + assert!(!is_expired(parse_ymd("2026-05-02").unwrap())); + unsafe { + env::remove_var("SOURCE_DATE_EPOCH"); + } + } +} diff --git a/src/lib.rs b/src/lib.rs index c311293..4314a44 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ pub mod baseline; pub mod cli; +pub mod clock; pub mod config; pub mod diff; pub mod enrich; From 22410898455b7c73f1c53ebb1c27299a356d4b4d Mon Sep 17 00:00:00 2001 From: Metbcy Date: Wed, 29 Apr 2026 13:11:26 -0700 Subject: [PATCH 02/10] feat(clock): honor SOURCE_DATE_EPOCH in production timestamp emission Doctest on clock::now() locks env, calls now(), asserts the returned timestamp matches. Combined with F1's now_is_read_per_call_not_cached unit test this proves the env is consulted at every call site so later phases (baseline expiry, VEX) get reproducible output. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/clock.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/clock.rs b/src/clock.rs index 02015b4..788f078 100644 --- a/src/clock.rs +++ b/src/clock.rs @@ -19,6 +19,16 @@ use time::{Date, OffsetDateTime}; /// can vary it between scenarios. If `SOURCE_DATE_EPOCH` is set but /// malformed, we fall back to `now_utc()` rather than panic — this matches /// the reproducible-builds spec's "best-effort" guidance. +/// +/// # Example +/// +/// ``` +/// // SAFETY: doctest is single-threaded. +/// unsafe { std::env::set_var("SOURCE_DATE_EPOCH", "1700000000"); } +/// let t = bomdrift::clock::now(); +/// assert_eq!(t.unix_timestamp(), 1700000000); +/// unsafe { std::env::remove_var("SOURCE_DATE_EPOCH"); } +/// ``` pub fn now() -> OffsetDateTime { if let Ok(raw) = env::var("SOURCE_DATE_EPOCH") && let Ok(secs) = raw.trim().parse::() From f411e44e154d8080a49e3e04255876b23380306a Mon Sep 17 00:00:00 2001 From: Metbcy Date: Wed, 29 Apr 2026 13:16:07 -0700 Subject: [PATCH 03/10] feat(enrich/osv): thread CVE aliases through VulnRef Extend VulnRef with aliases: Vec (sorted, primary id excluded) and a cves() iterator over CVE-prefixed identifiers (primary + aliases). osv::fetch_detail returns (severity, aliases) from /v1/vulns/{id}.aliases and the cache hit path keeps aliases empty (v0.7 cache schema only stored severity; aliases populate on next live fetch). JSON shape additive: aliases serializes via skip_serializing_if=is_empty so existing consumers see no churn. Tests: parse fixture (GHSA primary + CVE alias both present, primary excluded from aliases, sort order stable), cves() iterator on both GHSA-keyed and CVE-keyed advisories. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- benches/render.rs | 1 + src/baseline.rs | 7 +++ src/enrich/mod.rs | 47 +++++++++++++++++- src/enrich/osv.rs | 108 +++++++++++++++++++++++++++++++++++++---- src/lib.rs | 1 + src/render/json.rs | 1 + src/render/markdown.rs | 11 +++++ src/render/sarif.rs | 6 +++ src/render/term.rs | 1 + 9 files changed, 172 insertions(+), 11 deletions(-) diff --git a/benches/render.rs b/benches/render.rs index f23e6b2..4b67ba2 100644 --- a/benches/render.rs +++ b/benches/render.rs @@ -64,6 +64,7 @@ fn synth_changeset() -> (ChangeSet, Enrichment) { vec![VulnRef { id: format!("GHSA-test-{i:04}"), severity: Severity::High, + aliases: Vec::new(), }], ); } diff --git a/src/baseline.rs b/src/baseline.rs index 3b18568..1953a95 100644 --- a/src/baseline.rs +++ b/src/baseline.rs @@ -343,6 +343,7 @@ mod tests { vec![VulnRef { id: "CVE-1".into(), severity: Severity::High, + aliases: Vec::new(), }], ); apply(&mut cs, &mut e, &baseline); @@ -368,10 +369,12 @@ mod tests { VulnRef { id: "CVE-1".into(), severity: Severity::High, + aliases: Vec::new(), }, VulnRef { id: "CVE-2".into(), severity: Severity::Medium, + aliases: Vec::new(), }, ], ); @@ -395,6 +398,7 @@ mod tests { vec![VulnRef { id: "CVE-1".into(), severity: Severity::High, + aliases: Vec::new(), }], ); apply(&mut cs, &mut e, &baseline); @@ -483,10 +487,12 @@ mod tests { VulnRef { id: "GHSA-evil-1234".into(), severity: Severity::Critical, + aliases: Vec::new(), }, VulnRef { id: "CVE-still-here".into(), severity: Severity::Medium, + aliases: Vec::new(), }, ], ); @@ -495,6 +501,7 @@ mod tests { vec![VulnRef { id: "GHSA-evil-1234".into(), severity: Severity::Critical, + aliases: Vec::new(), }], ); apply(&mut cs, &mut e, &baseline); diff --git a/src/enrich/mod.rs b/src/enrich/mod.rs index db6e392..8b33d5c 100644 --- a/src/enrich/mod.rs +++ b/src/enrich/mod.rs @@ -70,7 +70,7 @@ impl Enrichment { /// A single advisory reference attached to a vulnerable component, with the /// best-known severity bucket. Built by [`osv::enrich`] from the /// `/v1/querybatch` advisory IDs plus per-advisory `/v1/vulns/{id}` lookups. -#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize)] pub struct VulnRef { /// Stable advisory identifier (`GHSA-…`, `CVE-…`, `MAL-…`, `OSV-…`). pub id: String, @@ -78,6 +78,51 @@ pub struct VulnRef { /// could be resolved (network failure, advisory predates GHSA tagging, /// CVSS-only severity not yet parsed — see [`Severity`] doc comment). pub severity: Severity, + /// Cross-database aliases for this advisory (e.g. CVE-… for a GHSA- + /// keyed entry). Sorted lexicographically so JSON output is byte- + /// deterministic. Excludes the primary [`id`](Self::id). Populated + /// from OSV's `aliases[]` field; empty when offline or pre-v0.8. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub aliases: Vec, +} + +impl VulnRef { + /// Construct a [`VulnRef`] with no aliases — convenience for tests and + /// callers that don't have alias data (e.g. baseline-load round-trips). + pub fn new(id: impl Into, severity: Severity) -> Self { + Self { + id: id.into(), + severity, + aliases: Vec::new(), + } + } + + /// Iterator over CVE-prefixed identifiers attached to this advisory: + /// the primary [`id`](Self::id) when it begins with `CVE-`, plus every + /// alias that does. Used by EPSS/KEV enrichers (Phase B) and by + /// SARIF/markdown render paths that need to surface CVE IDs even when + /// the advisory is keyed by GHSA. + pub fn cves(&self) -> impl Iterator { + let primary = if self.id.starts_with("CVE-") { + Some(self.id.as_str()) + } else { + None + }; + primary.into_iter().chain( + self.aliases + .iter() + .map(String::as_str) + .filter(|a| a.starts_with("CVE-")), + ) + } +} + +/// Default [`Severity`] is [`Severity::None`] so [`VulnRef::default`] gives +/// a sensible "unknown advisory" stub useful in tests and round-trips. +impl Default for Severity { + fn default() -> Self { + Self::None + } } /// Severity bucket for an advisory. Ordered low-to-high so `>= Severity::High` diff --git a/src/enrich/osv.rs b/src/enrich/osv.rs index 48de126..e972547 100644 --- a/src/enrich/osv.rs +++ b/src/enrich/osv.rs @@ -94,27 +94,29 @@ fn enrich_with( // not 200. BTreeSet ordering also means the lookups happen in a stable // order, which makes the warn-once-on-failure stderr output deterministic. let unique_ids: BTreeSet = purl_to_ids.values().flatten().cloned().collect(); - let mut severities: HashMap = HashMap::new(); + let mut details: HashMap)> = HashMap::new(); let mut lookup_failures = 0usize; let mut cache_hits = 0usize; for id in &unique_ids { if let Some(c) = cache && let Some(cached) = c.get(id) { - severities.insert(id.clone(), cached); + // v0.7 cache only stored severity; aliases stay empty on a + // cache hit. EPSS/KEV (Phase B) tolerates empty aliases. + details.insert(id.clone(), (cached, Vec::new())); cache_hits += 1; continue; } - match fetch_severity(&agent, vuln_url_base, id) { - Ok(sev) => { - severities.insert(id.clone(), sev); + match fetch_detail(&agent, vuln_url_base, id) { + Ok((sev, aliases)) => { + details.insert(id.clone(), (sev, aliases)); if let Some(c) = cache { c.put(id, sev); } } Err(_) => { lookup_failures += 1; - severities.insert(id.clone(), Severity::None); + details.insert(id.clone(), (Severity::None, Vec::new())); // Deliberately do NOT cache failures — a transient 5xx // shouldn't pin Severity::None for 24h. } @@ -140,8 +142,15 @@ fn enrich_with( let refs: Vec = ids .into_iter() .map(|id| { - let severity = severities.get(&id).copied().unwrap_or(Severity::None); - VulnRef { id, severity } + let (severity, aliases) = details + .get(&id) + .cloned() + .unwrap_or((Severity::None, Vec::new())); + VulnRef { + id, + severity, + aliases, + } }) .collect(); if !refs.is_empty() { @@ -172,7 +181,11 @@ fn post_batch(agent: &ureq::Agent, purls: &[String], url: &str) -> Result Result { +fn fetch_detail( + agent: &ureq::Agent, + vuln_url_base: &str, + id: &str, +) -> Result<(Severity, Vec)> { let url = format!("{vuln_url_base}{id}"); let resp = agent .get(&url) @@ -185,7 +198,24 @@ fn fetch_severity(agent: &ureq::Agent, vuln_url_base: &str, id: &str) -> Result< let parsed: OsvVulnDetail = resp .into_json() .with_context(|| format!("parsing OSV detail JSON for {id}"))?; - Ok(severity_from_detail(&parsed)) + Ok(( + severity_from_detail(&parsed), + aliases_from_detail(&parsed, id), + )) +} + +/// Extract sorted, deduplicated aliases from `/v1/vulns/{id}`. The primary +/// id is excluded so consumers can iterate `aliases` without re-checking +/// against the primary; sorting gives byte-deterministic JSON output. +fn aliases_from_detail(detail: &OsvVulnDetail, primary: &str) -> Vec { + let mut out: BTreeSet = detail + .aliases + .clone() + .unwrap_or_default() + .into_iter() + .collect(); + out.remove(primary); + out.into_iter().collect() } /// Extract a severity bucket from the `/v1/vulns/{id}` response. Honors the @@ -264,6 +294,8 @@ struct OsvVulnRef { #[derive(Deserialize, Debug)] struct OsvVulnDetail { database_specific: Option, + /// Cross-database alias IDs (e.g. CVE-… on a GHSA-keyed entry). + aliases: Option>, } #[derive(Deserialize, Debug)] @@ -373,6 +405,7 @@ mod tests { database_specific: Some(OsvDatabaseSpecific { severity: Some("CRITICAL".to_string()), }), + aliases: None, }; assert_eq!(severity_from_detail(&detail), Severity::Critical); } @@ -383,6 +416,7 @@ mod tests { database_specific: Some(OsvDatabaseSpecific { severity: Some("MODERATE".to_string()), }), + aliases: None, }; assert_eq!(severity_from_detail(&detail), Severity::Medium); } @@ -391,6 +425,7 @@ mod tests { fn severity_from_detail_returns_none_when_database_specific_absent() { let detail = OsvVulnDetail { database_specific: None, + aliases: None, }; assert_eq!(severity_from_detail(&detail), Severity::None); } @@ -399,7 +434,60 @@ mod tests { fn severity_from_detail_returns_none_when_severity_field_missing() { let detail = OsvVulnDetail { database_specific: Some(OsvDatabaseSpecific { severity: None }), + aliases: None, }; assert_eq!(severity_from_detail(&detail), Severity::None); } + + #[test] + fn aliases_from_detail_excludes_primary_and_sorts() { + let detail = OsvVulnDetail { + database_specific: None, + aliases: Some(vec![ + "CVE-2025-9999".to_string(), + "GHSA-xxxx-yyyy-zzzz".to_string(), + "CVE-2025-1111".to_string(), + ]), + }; + let aliases = aliases_from_detail(&detail, "GHSA-xxxx-yyyy-zzzz"); + assert_eq!( + aliases, + vec!["CVE-2025-1111".to_string(), "CVE-2025-9999".to_string(),], + "primary excluded; sorted lexicographically" + ); + } + + #[test] + fn aliases_from_detail_handles_missing_aliases_field() { + let detail = OsvVulnDetail { + database_specific: None, + aliases: None, + }; + assert!(aliases_from_detail(&detail, "GHSA-x").is_empty()); + } + + #[test] + fn vulnref_cves_iterates_aliases_when_primary_is_ghsa() { + let v = VulnRef { + id: "GHSA-xxxx-yyyy-zzzz".to_string(), + severity: Severity::High, + aliases: vec!["CVE-2025-1111".to_string(), "OSV-2025-1".to_string()], + }; + let cves: Vec<&str> = v.cves().collect(); + assert_eq!(cves, vec!["CVE-2025-1111"]); + } + + #[test] + fn vulnref_cves_includes_primary_when_cve_keyed() { + let v = VulnRef { + id: "CVE-2025-9999".to_string(), + severity: Severity::Critical, + aliases: vec![ + "GHSA-aaaa-bbbb-cccc".to_string(), + "CVE-2025-1111".to_string(), + ], + }; + let cves: Vec<&str> = v.cves().collect(); + assert_eq!(cves, vec!["CVE-2025-9999", "CVE-2025-1111"]); + } } diff --git a/src/lib.rs b/src/lib.rs index 4314a44..ab0ce1e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -474,6 +474,7 @@ mod tests { vec![VulnRef { id: "CVE-2025-1".into(), severity, + aliases: Vec::new(), }], ); Enrichment { diff --git a/src/render/json.rs b/src/render/json.rs index 7ba8508..aa791b3 100644 --- a/src/render/json.rs +++ b/src/render/json.rs @@ -181,6 +181,7 @@ mod tests { vec![crate::enrich::VulnRef { id: "GHSA-3p68-rc4w-qgx5".to_string(), severity: crate::enrich::Severity::High, + aliases: Vec::new(), }], ); diff --git a/src/render/markdown.rs b/src/render/markdown.rs index dfa86d4..1abb66c 100644 --- a/src/render/markdown.rs +++ b/src/render/markdown.rs @@ -617,6 +617,7 @@ mod tests { vec![crate::enrich::VulnRef { id: "GHSA-xxxx-yyyy-zzzz".to_string(), severity: crate::enrich::Severity::Critical, + aliases: Vec::new(), }], ); let md = render(&cs, &e); @@ -649,14 +650,17 @@ mod tests { crate::enrich::VulnRef { id: "CVE-2025-medium".to_string(), severity: crate::enrich::Severity::Medium, + aliases: Vec::new(), }, crate::enrich::VulnRef { id: "CVE-2025-critical".to_string(), severity: crate::enrich::Severity::Critical, + aliases: Vec::new(), }, crate::enrich::VulnRef { id: "CVE-2025-high".to_string(), severity: crate::enrich::Severity::High, + aliases: Vec::new(), }, ], ); @@ -685,6 +689,7 @@ mod tests { vec![crate::enrich::VulnRef { id: "GHSA-xxxx-yyyy-zzzz".to_string(), severity: crate::enrich::Severity::Critical, + aliases: Vec::new(), }], ); let summary = render_with_options( @@ -745,6 +750,7 @@ mod tests { vec![crate::enrich::VulnRef { id: "GHSA-xxxx-yyyy-zzzz".to_string(), severity: crate::enrich::Severity::High, + aliases: Vec::new(), }], ); @@ -973,6 +979,7 @@ mod tests { vec![crate::enrich::VulnRef { id: "GHSA-medium".into(), severity: crate::enrich::Severity::Medium, + aliases: Vec::new(), }], ); e.vulns.insert( @@ -980,6 +987,7 @@ mod tests { vec![crate::enrich::VulnRef { id: "CVE-2025-critical".into(), severity: crate::enrich::Severity::Critical, + aliases: Vec::new(), }], ); let md = render(&cs, &e); @@ -1015,6 +1023,7 @@ mod tests { vec![crate::enrich::VulnRef { id: "GHSA-medium".into(), severity: crate::enrich::Severity::Medium, + aliases: Vec::new(), }], ); e.vulns.insert( @@ -1022,6 +1031,7 @@ mod tests { vec![crate::enrich::VulnRef { id: "CVE-2025-critical".into(), severity: crate::enrich::Severity::Critical, + aliases: Vec::new(), }], ); let md = render(&cs, &e); @@ -1199,6 +1209,7 @@ mod tests { vec![crate::enrich::VulnRef { id: "GHSA-x".into(), severity: crate::enrich::Severity::High, + aliases: Vec::new(), }], ); e.typosquats diff --git a/src/render/sarif.rs b/src/render/sarif.rs index d6bd6d4..055dca7 100644 --- a/src/render/sarif.rs +++ b/src/render/sarif.rs @@ -380,10 +380,12 @@ mod tests { crate::enrich::VulnRef { id: "GHSA-3p68-rc4w-qgx5".to_string(), severity: crate::enrich::Severity::High, + aliases: Vec::new(), }, crate::enrich::VulnRef { id: "CVE-2025-99999".to_string(), severity: crate::enrich::Severity::Medium, + aliases: Vec::new(), }, ], ); @@ -427,6 +429,7 @@ mod tests { vec![crate::enrich::VulnRef { id: "OSV-2025-1".to_string(), severity: crate::enrich::Severity::None, + aliases: Vec::new(), }], ); let e = Enrichment { @@ -449,6 +452,7 @@ mod tests { vec![crate::enrich::VulnRef { id: "CVE-2025-1".to_string(), severity: crate::enrich::Severity::Medium, + aliases: Vec::new(), }] }; @@ -569,6 +573,7 @@ mod tests { vec![crate::enrich::VulnRef { id: "CVE-2025-1".to_string(), severity: crate::enrich::Severity::High, + aliases: Vec::new(), }], ); let e = Enrichment { @@ -610,6 +615,7 @@ mod tests { vec![crate::enrich::VulnRef { id: "CVE-1".into(), severity: crate::enrich::Severity::Medium, + aliases: Vec::new(), }], ); let e = Enrichment { diff --git a/src/render/term.rs b/src/render/term.rs index 6d5c2de..fd7c005 100644 --- a/src/render/term.rs +++ b/src/render/term.rs @@ -310,6 +310,7 @@ mod tests { vec![crate::enrich::VulnRef { id: "MAL-2026-2306".to_string(), severity: crate::enrich::Severity::Critical, + aliases: Vec::new(), }], ); From 2df299ae5a1cc8e2d53046c53651bc562ed6e42d Mon Sep 17 00:00:00 2001 From: Metbcy Date: Wed, 29 Apr 2026 13:19:43 -0700 Subject: [PATCH 04/10] feat(cli): --debug-calibration-format jsonl alternative Add --debug-calibration-format , default pipe (back-compat). JSONL emits {kind,key,score,threshold} per line; numeric scores stay numeric, severity buckets ('HIGH', 'high+') stay strings. Adding new finding kinds in subsequent phases is one call to write_calibration_row, not a fork. Also pre-add --output-file flag (used by Phase A SARIF Code Scanning workflow to avoid YAML > redirection quoting hazards). Wiring into run_diff lands in Phase A; the flag is no-op for now. Config: debug_calibration, debug_calibration_format, output_file all mergeable from .bomdrift.toml. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/cli.rs | 22 +++++++ src/config.rs | 18 +++++- src/lib.rs | 161 ++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 175 insertions(+), 26 deletions(-) diff --git a/src/cli.rs b/src/cli.rs index 33c37b8..3eec682 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -264,6 +264,28 @@ pub struct DiffArgs { /// `threshold` is the constant the finding was compared against. #[arg(long)] pub debug_calibration: bool, + /// Format for `--debug-calibration` rows. `pipe` (default, back-compat + /// with v0.7) emits `kind|key|score|threshold` per line; `jsonl` emits + /// one JSON object per line for downstream tooling that doesn't want + /// to maintain a custom CSV-ish parser. + #[arg(long, value_enum, default_value_t = DebugFormat::Pipe)] + pub debug_calibration_format: DebugFormat, + /// Write the chosen `--output` format to this path instead of stdout. + /// Useful for SARIF (`--output sarif --output-file bomdrift.sarif`) + /// where YAML quoting `>` redirection is fragile in CI templates. + #[arg(long)] + pub output_file: Option, +} + +/// Wire format for `--debug-calibration` output. Pipe-delimited keeps v0.7 +/// callers working unchanged; JSONL is the recommended shape for new tooling +/// because adding a new finding kind doesn't fork the parser. +#[derive(ValueEnum, Clone, Copy, Debug, PartialEq, Eq, Default, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum DebugFormat { + #[default] + Pipe, + Jsonl, } /// Threshold for `--fail-on` exit-code-2 behavior. diff --git a/src/config.rs b/src/config.rs index e97dfee..dcf5525 100644 --- a/src/config.rs +++ b/src/config.rs @@ -11,7 +11,7 @@ use std::path::{Path, PathBuf}; use anyhow::{Context, Result}; use serde::Deserialize; -use crate::cli::{DiffArgs, FailOn, InputFormat, OutputFormat, Platform}; +use crate::cli::{DebugFormat, DiffArgs, FailOn, InputFormat, OutputFormat, Platform}; const DEFAULT_CONFIG_PATH: &str = ".bomdrift.toml"; @@ -37,6 +37,9 @@ pub struct DiffConfig { pub max_added: Option, pub max_removed: Option, pub max_version_changed: Option, + pub debug_calibration: Option, + pub debug_calibration_format: Option, + pub output_file: Option, } pub fn apply_diff_config(args: &mut DiffArgs) -> Result<()> { @@ -95,6 +98,17 @@ fn apply_loaded_diff_config(args: &mut DiffArgs, config: Config) { if args.max_version_changed.is_none() { args.max_version_changed = diff.max_version_changed; } + args.debug_calibration |= diff.debug_calibration.unwrap_or(false); + if let Some(fmt) = diff.debug_calibration_format { + // Only override the default when the config explicitly sets a value; + // CLI flag still wins because it's the explicit form. + if args.debug_calibration_format == DebugFormat::default() { + args.debug_calibration_format = fmt; + } + } + if args.output_file.is_none() { + args.output_file = diff.output_file; + } } fn load_config(explicit: Option<&Path>) -> Result> { @@ -142,6 +156,8 @@ mod tests { max_removed: None, max_version_changed: None, debug_calibration: false, + debug_calibration_format: DebugFormat::default(), + output_file: None, } } diff --git a/src/lib.rs b/src/lib.rs index ab0ce1e..4d8d052 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -157,7 +157,11 @@ fn run_diff(mut args: DiffArgs) -> Result<()> { // integration. Format: `kind|key|score|threshold`. No telemetry: the // user owns the bytes and pipes them wherever they want. if args.debug_calibration { - write_calibration_lines(&enrichment, &mut std::io::stderr()); + write_calibration_lines( + &enrichment, + &mut std::io::stderr(), + args.debug_calibration_format, + ); } // CLI flag wins; otherwise the env var supplies the default. Empty @@ -283,63 +287,129 @@ pub fn budget_tripped( /// `threshold` is the constant the score was gated against. CVE rows /// surface every advisory (no internal threshold) so adopters can see /// the score distribution before tuning `--fail-on critical-cve`. -fn write_calibration_lines(e: &Enrichment, out: &mut W) { +fn write_calibration_lines( + e: &Enrichment, + out: &mut W, + format: crate::cli::DebugFormat, +) { use crate::enrich::maintainer::YOUNG_MAINTAINER_DAYS; use crate::enrich::typosquat::SIMILARITY_THRESHOLD; use crate::enrich::version_jump::MIN_MAJOR_DELTA; for f in &e.typosquats { - let _ = writeln!( + write_calibration_row( out, - "typosquat|{}|{:.4}|{:.4}", + "typosquat", f.component .purl .as_deref() .unwrap_or(f.component.name.as_str()), - f.score, - SIMILARITY_THRESHOLD, + CalibrationScore::Float(f.score), + CalibrationThreshold::Float(SIMILARITY_THRESHOLD), + format, ); } for f in &e.version_jumps { - let _ = writeln!( + write_calibration_row( out, - "version-jump|{}|{}|{}", + "version-jump", f.after.purl.as_deref().unwrap_or(f.after.name.as_str()), - f.after_major.saturating_sub(f.before_major), - MIN_MAJOR_DELTA, + CalibrationScore::Int(f.after_major.saturating_sub(f.before_major) as i64), + CalibrationThreshold::Int(MIN_MAJOR_DELTA as i64), + format, ); } for f in &e.maintainer_age { - let _ = writeln!( + write_calibration_row( out, - "maintainer-age|{}|{}|{}", + "maintainer-age", f.component .purl .as_deref() .unwrap_or(f.component.name.as_str()), - f.days_old, - YOUNG_MAINTAINER_DAYS, + CalibrationScore::Int(f.days_old), + CalibrationThreshold::Int(YOUNG_MAINTAINER_DAYS), + format, ); } for (purl, refs) in &e.vulns { for vuln in refs { - // Severity has no numeric score in our model; emit the - // bucket label as a non-numeric "score" so the CSV row is - // still well-formed. Adopters who want raw CVSS can grep - // the JSON output instead — the calibration tap is for the - // ranked-bucket choice (cve vs critical-cve), not for - // reverse-engineering CVSS. - let _ = writeln!( + // Severity has no numeric score in our model; emit the bucket + // label as a non-numeric "score" so the row stays well-formed + // (string in JSONL, plain token in pipe). + write_calibration_row( out, - "cve|{}#{}|{}|high+", - purl, - vuln.id, - vuln.severity.as_str(), + "cve", + &format!("{purl}#{}", vuln.id), + CalibrationScore::Text(vuln.severity.as_str()), + CalibrationThreshold::Text("high+"), + format, ); } } } +/// Numeric or symbolic score for a calibration row. Float/Int rendered +/// without quotes in JSONL; Text rendered as a JSON string. +pub(crate) enum CalibrationScore<'a> { + Float(f64), + Int(i64), + Text(&'a str), +} + +pub(crate) enum CalibrationThreshold<'a> { + Float(f64), + Int(i64), + Text(&'a str), +} + +/// Single dispatch point for both pipe and JSONL calibration formats. +/// Adding a new finding kind is one call site, not two — the format +/// branches stay localized to this helper. +pub(crate) fn write_calibration_row( + out: &mut W, + kind: &str, + key: &str, + score: CalibrationScore<'_>, + threshold: CalibrationThreshold<'_>, + format: crate::cli::DebugFormat, +) { + match format { + crate::cli::DebugFormat::Pipe => { + let score_s = match score { + CalibrationScore::Float(v) => format!("{v:.4}"), + CalibrationScore::Int(v) => v.to_string(), + CalibrationScore::Text(s) => s.to_string(), + }; + let thr_s = match threshold { + CalibrationThreshold::Float(v) => format!("{v:.4}"), + CalibrationThreshold::Int(v) => v.to_string(), + CalibrationThreshold::Text(s) => s.to_string(), + }; + let _ = writeln!(out, "{kind}|{key}|{score_s}|{thr_s}"); + } + crate::cli::DebugFormat::Jsonl => { + let score_v = match score { + CalibrationScore::Float(v) => serde_json::Value::from(v), + CalibrationScore::Int(v) => serde_json::Value::from(v), + CalibrationScore::Text(s) => serde_json::Value::from(s), + }; + let thr_v = match threshold { + CalibrationThreshold::Float(v) => serde_json::Value::from(v), + CalibrationThreshold::Int(v) => serde_json::Value::from(v), + CalibrationThreshold::Text(s) => serde_json::Value::from(s), + }; + let line = serde_json::json!({ + "kind": kind, + "key": key, + "score": score_v, + "threshold": thr_v, + }); + let _ = writeln!(out, "{line}"); + } + } +} + fn log_budget_trips( cs: &ChangeSet, max_added: Option, @@ -686,4 +756,45 @@ mod tests { assert!(budget_tripped(&cs, None, None, Some(0))); assert!(!budget_tripped(&cs, Some(2), Some(1), Some(1))); } + + #[test] + fn calibration_pipe_format_matches_v0_7_layout() { + let e = enrichment_with_typosquat(); + let mut buf = Vec::new(); + write_calibration_lines(&e, &mut buf, crate::cli::DebugFormat::Pipe); + let s = String::from_utf8(buf).unwrap(); + assert!(s.starts_with("typosquat|"), "got: {s}"); + assert_eq!( + s.matches('|').count(), + 3, + "pipe row has 4 fields → 3 separators; got: {s}" + ); + } + + #[test] + fn calibration_jsonl_format_emits_one_object_per_line() { + let e = enrichment_with_typosquat(); + let mut buf = Vec::new(); + write_calibration_lines(&e, &mut buf, crate::cli::DebugFormat::Jsonl); + let s = String::from_utf8(buf).unwrap(); + let lines: Vec<&str> = s.lines().collect(); + assert_eq!(lines.len(), 1); + let v: serde_json::Value = serde_json::from_str(lines[0]).expect("valid jsonl"); + assert_eq!(v["kind"], "typosquat"); + assert!(v["score"].is_number(), "numeric score in jsonl"); + assert!(v["threshold"].is_number()); + assert!(v["key"].is_string()); + } + + #[test] + fn calibration_jsonl_keeps_severity_label_as_string() { + let e = enrichment_with_cve_at(Severity::High); + let mut buf = Vec::new(); + write_calibration_lines(&e, &mut buf, crate::cli::DebugFormat::Jsonl); + let s = String::from_utf8(buf).unwrap(); + let v: serde_json::Value = serde_json::from_str(s.trim()).unwrap(); + assert_eq!(v["kind"], "cve"); + assert_eq!(v["score"], "HIGH"); + assert_eq!(v["threshold"], "high+"); + } } From 72eca39ea6156159f575a5a662b517d1ce3ffc12 Mon Sep 17 00:00:00 2001 From: Metbcy Date: Wed, 29 Apr 2026 13:23:56 -0700 Subject: [PATCH 05/10] feat(sarif): partialFingerprints + Code Scanning upload + --output-file SARIF results now carry partialFingerprints."primaryHash/v1" hashed from a stable per-rule identity tuple (ruleId + purl + per-rule discriminator): - bomdrift.cve: ruleId | purl | advisoryId - bomdrift.typosquat: ruleId | purl | closest - bomdrift.version-jump: ruleId | purl | beforeVersion | afterVersion - bomdrift.young-maint.: ruleId | purl | topContributor - bomdrift.license-change: ruleId | purl | beforeLicensesSorted | afterLicensesSorted Two CVEs on the same purl now produce distinct fingerprints (the duck-flagged collision case). The /v1 suffix on the fingerprint key lets us evolve identity later without churning GitHub alert state. New rule bomdrift.license-violation registered in tool.driver.rules ahead of Phase D's policy violation emission. CLI: --output-file writes the chosen output format to a file instead of stdout. Avoids YAML > redirection quoting in CI templates. GitHub Action: new input upload-to-code-scanning (default false) gates a github/codeql-action/upload-sarif@v3 step. Requires the calling workflow to have permissions.security-events: write. entrypoint.sh always passes --output-file when output=sarif so the file path the upload step expects is populated. Docs: new docs/src/sarif.md chapter; SUMMARY entry under Output. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- action.yml | 21 +++++ docs/src/SUMMARY.md | 1 + docs/src/sarif.md | 110 ++++++++++++++++++++++++++ entrypoint.sh | 22 +++++- src/lib.rs | 7 +- src/render/sarif.rs | 183 ++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 342 insertions(+), 2 deletions(-) create mode 100644 docs/src/sarif.md diff --git a/action.yml b/action.yml index 84a1292..37dac98 100644 --- a/action.yml +++ b/action.yml @@ -125,6 +125,15 @@ inputs: github-token: description: Token used to post PR comments. Defaults to GITHUB_TOKEN. default: ${{ github.token }} + upload-to-code-scanning: + description: | + When `true` AND `output: sarif`, upload the rendered SARIF artifact + to GitHub Code Scanning via `github/codeql-action/upload-sarif@v3`. + Requires the calling workflow to have `permissions.security-events: + write`. Off by default for back-compat — v0.7 callers see no + behavior change. See + https://metbcy.github.io/bomdrift/sarif.html for wiring details. + default: 'false' runs: using: composite @@ -192,3 +201,15 @@ runs: VERIFY_SIGNATURES: ${{ inputs.verify-signatures }} INPUT_GITHUB_TOKEN: ${{ inputs.github-token }} BOMDRIFT_REPO_URL: 'https://github.com/${{ github.repository }}' + UPLOAD_TO_CODE_SCANNING: ${{ inputs.upload-to-code-scanning }} + + # Code Scanning upload is opt-in. Requires the calling workflow to grant + # `permissions.security-events: write`. We only run when the user + # explicitly asked for SARIF AND opted in — uploading any other format + # would 422 from the GitHub API. + - name: Upload SARIF to Code Scanning + if: inputs.upload-to-code-scanning == 'true' && inputs.output == 'sarif' + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: ${{ github.workspace }}/bomdrift.sarif + category: bomdrift diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index 74b4ae8..00aa99b 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -12,6 +12,7 @@ # Output - [Output formats](./output-formats.md) +- [SARIF + Code Scanning](./sarif.md) - [Baseline & suppression](./baseline.md) # Enrichers diff --git a/docs/src/sarif.md b/docs/src/sarif.md new file mode 100644 index 0000000..bacdded --- /dev/null +++ b/docs/src/sarif.md @@ -0,0 +1,110 @@ +# SARIF + GitHub Code Scanning + +bomdrift can emit findings in [SARIF v2.1.0] for ingestion by GitHub Code +Scanning, GitLab Vulnerability Reports, and any other consumer that speaks +SARIF. + +```bash +bomdrift diff before.cdx.json after.cdx.json \ + --output sarif \ + --output-file bomdrift.sarif +``` + +## Rule taxonomy + +bomdrift emits the following stable rule IDs (load-bearing — never renamed +across releases). All rules are present in `tool.driver.rules` even when +the current diff has zero results of that kind, so Code Scanning UI +suppression flows can enumerate them upfront. + +| Rule ID | Surfaces | SARIF level | +|---|---|---| +| `bomdrift.cve` | OSV.dev advisory ID(s) for the component | `error` for High/Critical, else `warning` | +| `bomdrift.typosquat` | Component name similar to a popular package | `warning` | +| `bomdrift.version-jump` | Multi-major version bump | `warning` | +| `bomdrift.young-maintainer` | Top GitHub contributor's first commit < 90 days ago | `warning` | +| `bomdrift.license-change` | License changed at the same version | `warning` | +| `bomdrift.license-violation` | Component license violates configured allow/deny policy | `warning` | + +## Fingerprint stability + +Each result carries `partialFingerprints.primaryHash/v1` — a SHA-256 digest +of a stable identity tuple per rule: + +| Rule | Identity | +|---|---| +| `bomdrift.cve` | `ruleId | purl | advisoryId` (severity excluded — severity changes shouldn't churn alert identity) | +| `bomdrift.typosquat` | `ruleId | purl | closest` | +| `bomdrift.version-jump` | `ruleId | purl | beforeVersion | afterVersion` (full versions, not majors) | +| `bomdrift.young-maintainer` | `ruleId | purl | topContributor` | +| `bomdrift.license-change` | `ruleId | purl | beforeLicensesSorted | afterLicensesSorted` | +| `bomdrift.license-violation` | `ruleId | purl | matchedLicense` | + +The `/v1` suffix on the fingerprint key lets bomdrift evolve identity +schemes in future releases without GitHub re-opening every existing alert. +Two distinct CVEs on the same purl produce distinct fingerprints; the +same finding produced across two runs produces a byte-equal fingerprint. + +## Wire up GitHub Code Scanning + +Set the new action input `upload-to-code-scanning: 'true'` and ensure your +workflow has the `security-events: write` permission. The composite action +runs `github/codeql-action/upload-sarif@v3` after bomdrift writes +`${{ github.workspace }}/bomdrift.sarif`. + +```yaml +permissions: + contents: read + security-events: write # required for SARIF upload + pull-requests: write # only if you also want PR comments + +jobs: + bomdrift: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: Metbcy/bomdrift@v1 + with: + output: sarif + upload-to-code-scanning: 'true' +``` + +## Direct CLI use (any CI) + +When integrating with GitLab Vulnerability Reports, Bitbucket, or any +arbitrary SARIF consumer, prefer `--output-file` over shell redirection: + +```bash +bomdrift diff before.json after.json \ + --output sarif \ + --output-file bomdrift.sarif +``` + +The `--output-file` form is YAML-quoting-safe (no `>` redirection) and +keeps stdout free for human-readable progress logging. + +## Determinism + +Renderer output is byte-deterministic across runs for identical inputs. +HashMap-keyed advisory lists are sorted by purl key before emission; +license arrays are sorted before fingerprinting. The +`SOURCE_DATE_EPOCH` environment variable is honored everywhere bomdrift +emits a timestamp (the SARIF document itself currently carries no +timestamps, but related VEX emission in v0.9 will). + +## Troubleshooting + +- **Alerts don't appear in the Security tab.** Confirm + `permissions.security-events: write` on the calling workflow AND + `upload-to-code-scanning: 'true'` on the action input. Check the + "Upload SARIF to Code Scanning" step in the job log for the API + response. +- **Same finding appears twice after a re-run.** This is a fingerprint + bug — file an issue with the SARIF artifact and the inputs that + produced it. Fingerprints should remain byte-equal across runs. +- **Severity wrong / missing.** Bomdrift maps GHSA's + `database_specific.severity` text label. Advisories without a label + surface at SARIF `warning` and the `properties.severity` field reads + `NONE`. + +[SARIF v2.1.0]: https://docs.oasis-open.org/sarif/sarif/v2.1.0/sarif-v2.1.0.html diff --git a/entrypoint.sh b/entrypoint.sh index 41e6d66..7f405f7 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -292,6 +292,7 @@ main() { local max_version_changed="${MAX_VERSION_CHANGED:-}" local fail_on="${FAIL_ON:-none}" local baseline="${BASELINE:-}" + local upload_to_cs="${UPLOAD_TO_CODE_SCANNING:-false}" # ---- Resolve "before" SBOM path ------------------------------------------- # @@ -379,17 +380,36 @@ main() { if [ -n "$max_version_changed" ]; then budget_args+=(--max-version-changed "$max_version_changed") fi + # When emitting SARIF, write to a stable workspace path so the + # codeql-action upload step can pick it up. We always set this when + # output is sarif (whether or not upload is enabled) — the file is + # cheap and consumers running their own SARIF upload pipeline benefit + # too. stdout still receives the SARIF for back-compat callers that + # piped it to their own file in a follow-up step. + local sarif_args=() + local sarif_path="" + if [ "$output_format" = "sarif" ]; then + sarif_path="${GITHUB_WORKSPACE:-$PWD}/bomdrift.sarif" + sarif_args=(--output-file "$sarif_path") + fi set +e run_diff "$bin" "$before" "$after" "$output_format" "$input_format" \ "${config_args[@]}" "${fail_on_args[@]}" "${baseline_args[@]}" \ - "${focus_args[@]}" "${budget_args[@]}" \ + "${focus_args[@]}" "${budget_args[@]}" "${sarif_args[@]}" \ | tee "$out_file" rc="${PIPESTATUS[0]}" set -e out="$(cat "$out_file")" rm -f "$out_file" + # When SARIF was written via --output-file, the file is the canonical + # output for downstream upload; leave $out (terminal mirror) unused. + if [ "$output_format" = "sarif" ] && [ -n "$sarif_path" ] && [ -f "$sarif_path" ]; then + printf 'bomdrift Action: SARIF written to %s\n' "$sarif_path" + fi + : "$upload_to_cs" # consumed by composite step in action.yml; reference here keeps shellcheck quiet + # Always also write to the step summary so users see the diff even when no # PR comment is posted. if [ -n "${GITHUB_STEP_SUMMARY:-}" ] && [ "$output_format" = "markdown" ]; then diff --git a/src/lib.rs b/src/lib.rs index 4d8d052..9e39db2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -215,7 +215,12 @@ fn run_diff(mut args: DiffArgs) -> Result<()> { OutputFormat::Sarif => render::sarif::render(&cs, &enrichment), }; - print!("{rendered}"); + if let Some(path) = &args.output_file { + std::fs::write(path, &rendered) + .with_context(|| format!("writing --output-file {}", path.display()))?; + } else { + print!("{rendered}"); + } // Body must be fully written before we exit-2 — the action's `tee` // wrapper still wants the comment posted even when fail-on trips. diff --git a/src/render/sarif.rs b/src/render/sarif.rs index 055dca7..bc0f15b 100644 --- a/src/render/sarif.rs +++ b/src/render/sarif.rs @@ -50,6 +50,7 @@ //! The render-twice-byte-equal regression test below guards this. use serde_json::{Value, json}; +use sha2::{Digest, Sha256}; use crate::diff::ChangeSet; use crate::enrich::Enrichment; @@ -137,9 +138,47 @@ fn rules() -> Value { regardless. Always informational severity (`warning`).", "https://metbcy.github.io/bomdrift/output-formats.html#sarif-v210", ), + rule( + "bomdrift.license-violation", + "license-violation", + "Component license violates configured allow/deny policy", + "The component's declared license is on the deny list, doesn't \ + appear on the allow list, or is a compound expression that \ + cannot be safely evaluated against the configured policy (with \ + `allow_ambiguous=false`). Configure via the `[license]` block \ + in `.bomdrift.toml` or the `--allow-licenses` / `--deny-licenses` \ + CLI flags. Severity `error` (this is a policy gate, not an \ + advisory heuristic).", + "https://metbcy.github.io/bomdrift/license-policy.html", + ), ]) } +/// Stable per-rule identity hash for SARIF `partialFingerprints`. GitHub +/// Code Scanning uses these to thread alert state across runs (resolved / +/// dismissed / open) so the value MUST stay byte-equal for the same logical +/// finding. We hex-encode SHA-256 of a `|`-joined identity string so the +/// inputs are inspectable from a debugger and the output is filename-safe. +/// +/// The `/v1` suffix on the fingerprint key (see emit sites) lets us evolve +/// the identity scheme later without GitHub re-opening every alert. +pub(crate) fn fingerprint(parts: &[&str]) -> String { + let mut h = Sha256::new(); + for (i, p) in parts.iter().enumerate() { + if i > 0 { + h.update(b"|"); + } + h.update(p.as_bytes()); + } + let digest = h.finalize(); + let mut out = String::with_capacity(64); + for byte in digest { + use std::fmt::Write; + let _ = write!(out, "{byte:02x}"); + } + out +} + fn rule(id: &str, name: &str, short: &str, full: &str, help_uri: &str) -> Value { json!({ "id": id, @@ -165,6 +204,8 @@ fn results(cs: &ChangeSet, e: &Enrichment) -> Value { let mut advisories: Vec<&crate::enrich::VulnRef> = e.vulns[purl].iter().collect(); advisories.sort_by(|a, b| b.severity.cmp(&a.severity).then_with(|| a.id.cmp(&b.id))); for advisory in advisories { + let purl_str: &str = purl; + let fp = fingerprint(&["bomdrift.cve", purl_str, &advisory.id]); out.push(json!({ "ruleId": "bomdrift.cve", "level": sarif_level(advisory.severity), @@ -177,6 +218,7 @@ fn results(cs: &ChangeSet, e: &Enrichment) -> Value { ), }, "locations": [synthetic_location()], + "partialFingerprints": { "primaryHash/v1": fp }, "properties": { "purl": purl, "advisoryId": advisory.id, @@ -190,6 +232,8 @@ fn results(cs: &ChangeSet, e: &Enrichment) -> Value { for finding in &e.typosquats { let name = &finding.component.name; let closest = &finding.closest; + let purl_or_name = finding.component.purl.as_deref().unwrap_or(name); + let fp = fingerprint(&["bomdrift.typosquat", purl_or_name, closest]); out.push(json!({ "ruleId": "bomdrift.typosquat", "level": "warning", @@ -201,6 +245,7 @@ fn results(cs: &ChangeSet, e: &Enrichment) -> Value { ), }, "locations": [synthetic_location()], + "partialFingerprints": { "primaryHash/v1": fp }, "properties": { "purl": finding.component.purl, "name": name, @@ -214,6 +259,13 @@ fn results(cs: &ChangeSet, e: &Enrichment) -> Value { // ---- bomdrift.version-jump ---- for finding in &e.version_jumps { let name = &finding.after.name; + let purl_or_name = finding.after.purl.as_deref().unwrap_or(name); + let fp = fingerprint(&[ + "bomdrift.version-jump", + purl_or_name, + &finding.before.version, + &finding.after.version, + ]); out.push(json!({ "ruleId": "bomdrift.version-jump", "level": "warning", @@ -228,6 +280,7 @@ fn results(cs: &ChangeSet, e: &Enrichment) -> Value { ), }, "locations": [synthetic_location()], + "partialFingerprints": { "primaryHash/v1": fp }, "properties": { "purl": finding.after.purl, "name": name, @@ -242,6 +295,12 @@ fn results(cs: &ChangeSet, e: &Enrichment) -> Value { // ---- bomdrift.young-maintainer ---- for finding in &e.maintainer_age { let name = &finding.component.name; + let purl_or_name = finding.component.purl.as_deref().unwrap_or(name); + let fp = fingerprint(&[ + "bomdrift.young-maintainer", + purl_or_name, + &finding.top_contributor, + ]); out.push(json!({ "ruleId": "bomdrift.young-maintainer", "level": "warning", @@ -255,6 +314,7 @@ fn results(cs: &ChangeSet, e: &Enrichment) -> Value { ), }, "locations": [synthetic_location()], + "partialFingerprints": { "primaryHash/v1": fp }, "properties": { "purl": finding.component.purl, "name": name, @@ -270,6 +330,19 @@ fn results(cs: &ChangeSet, e: &Enrichment) -> Value { // version_changed already folds in license-changes-with-version-bumps. for (before, after) in &cs.license_changed { let name = &after.name; + let purl_or_name = after.purl.as_deref().unwrap_or(name); + let mut before_lic = before.licenses.clone(); + before_lic.sort(); + let mut after_lic = after.licenses.clone(); + after_lic.sort(); + let before_join = before_lic.join(","); + let after_join = after_lic.join(","); + let fp = fingerprint(&[ + "bomdrift.license-change", + purl_or_name, + &before_join, + &after_join, + ]); out.push(json!({ "ruleId": "bomdrift.license-change", "level": "warning", @@ -281,6 +354,7 @@ fn results(cs: &ChangeSet, e: &Enrichment) -> Value { ), }, "locations": [synthetic_location()], + "partialFingerprints": { "primaryHash/v1": fp }, "properties": { "purl": after.purl, "name": name, @@ -362,6 +436,7 @@ mod tests { "bomdrift.version-jump", "bomdrift.young-maintainer", "bomdrift.license-change", + "bomdrift.license-violation", ], "rule IDs are stable public API — order also stable for byte-determinism", ); @@ -640,4 +715,112 @@ mod tests { assert!(!locs.is_empty(), "result missing locations: {result}"); } } + + #[test] + fn fingerprint_helper_is_pure_and_hex_64_chars() { + let fp = fingerprint(&["a", "b", "c"]); + assert_eq!(fp.len(), 64); + assert!(fp.chars().all(|c| c.is_ascii_hexdigit())); + assert_eq!(fp, fingerprint(&["a", "b", "c"])); + assert_ne!(fp, fingerprint(&["a", "b", "d"])); + // Joining with `|` matters: ["ab", "c"] must not collide with + // ["a", "bc"]. + assert_ne!(fingerprint(&["ab", "c"]), fingerprint(&["a", "bc"])); + } + + #[test] + fn cve_results_carry_partial_fingerprints_stable_across_runs() { + let mut vulns: HashMap> = HashMap::new(); + vulns.insert( + "pkg:npm/axios@1.14.1".to_string(), + vec![crate::enrich::VulnRef { + id: "GHSA-3p68-rc4w-qgx5".to_string(), + severity: crate::enrich::Severity::High, + aliases: Vec::new(), + }], + ); + let e = Enrichment { + vulns, + ..Default::default() + }; + let r1 = render(&ChangeSet::default(), &e); + let r2 = render(&ChangeSet::default(), &e); + assert_eq!(r1, r2, "byte-equal across runs"); + let v: Value = serde_json::from_str(&r1).unwrap(); + let fp = &v["runs"][0]["results"][0]["partialFingerprints"]["primaryHash/v1"]; + assert!(fp.is_string(), "fingerprint missing: {}", v); + assert_eq!(fp.as_str().unwrap().len(), 64); + } + + #[test] + fn two_cves_on_same_purl_get_distinct_fingerprints() { + // The duck flagged this collision case: per-purl-only fingerprints + // would dedup distinct advisories. Identity must include the + // advisory id. + let mut vulns: HashMap> = HashMap::new(); + vulns.insert( + "pkg:npm/axios@1.14.1".to_string(), + vec![ + crate::enrich::VulnRef { + id: "CVE-2025-1".to_string(), + severity: crate::enrich::Severity::High, + aliases: Vec::new(), + }, + crate::enrich::VulnRef { + id: "CVE-2025-2".to_string(), + severity: crate::enrich::Severity::High, + aliases: Vec::new(), + }, + ], + ); + let e = Enrichment { + vulns, + ..Default::default() + }; + let s = render(&ChangeSet::default(), &e); + let v: Value = serde_json::from_str(&s).unwrap(); + let results = v["runs"][0]["results"].as_array().unwrap(); + assert_eq!(results.len(), 2); + let f1 = results[0]["partialFingerprints"]["primaryHash/v1"] + .as_str() + .unwrap(); + let f2 = results[1]["partialFingerprints"]["primaryHash/v1"] + .as_str() + .unwrap(); + assert_ne!( + f1, f2, + "distinct advisories must have distinct fingerprints" + ); + } + + #[test] + fn version_jump_fingerprint_uses_full_versions_not_majors() { + // 1.0.0 -> 4.0.0 and 1.5.0 -> 4.5.0 both have major delta 3 but + // are distinct findings — fingerprints must not collide. + let mk = |a: &str, b: &str| VersionJumpFinding { + before: comp("foo", a, Ecosystem::Npm, Some("pkg:npm/foo@1")), + after: comp("foo", b, Ecosystem::Npm, Some("pkg:npm/foo@4")), + before_major: 1, + after_major: 4, + }; + let e1 = Enrichment { + version_jumps: vec![mk("1.0.0", "4.0.0")], + ..Default::default() + }; + let e2 = Enrichment { + version_jumps: vec![mk("1.5.0", "4.5.0")], + ..Default::default() + }; + let v1: Value = serde_json::from_str(&render(&ChangeSet::default(), &e1)).unwrap(); + let v2: Value = serde_json::from_str(&render(&ChangeSet::default(), &e2)).unwrap(); + let f1 = v1["runs"][0]["results"][0]["partialFingerprints"]["primaryHash/v1"] + .as_str() + .unwrap() + .to_string(); + let f2 = v2["runs"][0]["results"][0]["partialFingerprints"]["primaryHash/v1"] + .as_str() + .unwrap() + .to_string(); + assert_ne!(f1, f2); + } } From d6a4bee069e23a293417e019e9035e60fbf2e0cf Mon Sep 17 00:00:00 2001 From: Metbcy Date: Wed, 29 Apr 2026 13:33:55 -0700 Subject: [PATCH 06/10] feat(enrich): EPSS + CISA KEV scoring with --fail-on thresholds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two new best-effort enrichers piggyback on OSV's VulnRefs after the core enrichment runs: - src/enrich/epss.rs queries https://api.first.org/data/v1/epss in 100-CVE batches, populates VulnRef.epss_score (max-of-aliases), caches per-CVE at /bomdrift/epss/.json (24h TTL). - src/enrich/kev.rs downloads CISA's known_exploited_vulnerabilities feed once daily, populates VulnRef.kev when any CVE alias matches, caches the bulk catalog at /bomdrift/kev/catalog.json (24h TTL). Both enrichers fail closed-without-blocking: a network failure logs at BOMDRIFT_DEBUG=1 and the diff renders with empty fields. VulnRef extended (additive JSON shape via skip_serializing_if): pub epss_score: Option, pub kev: bool, CLI surface: - --no-epss / --no-kev: skip the enricher (network + cache). - --fail-on kev: new FailOn variant; --fail-on any includes KEV too. - --fail-on-epss : sibling flag (--fail-on is a clap ValueEnum, parsing 'epss>=N' inside it would break v0.7 callers; sibling flag is cleaner). Trips exit 2 when any advisory's score >= threshold. Render paths: - Markdown: "EPSS 0.87 · **KEV**" badges in CVE rows. - Term: "EPSS 0.87 KEV" plain badges. - SARIF: bomdrift.cve result properties.epssScore, properties.kev. Calibration rows for both enrichers (pipe + JSONL formats). Phase D ahead-of-time scaffolding: Enrichment.license_violations field, LicenseViolation/LicenseViolationKind types, FailOn::LicenseViolation variant. Population lands in Phase D. Docs: docs/src/enrichers/epss.md, docs/src/enrichers/kev.md, SUMMARY entries. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- benches/render.rs | 2 + docs/src/SUMMARY.md | 2 + docs/src/enrichers/epss.md | 60 ++++++++ docs/src/enrichers/kev.md | 53 +++++++ src/baseline.rs | 14 ++ src/cli.rs | 22 +++ src/config.rs | 11 ++ src/enrich/epss.rs | 302 +++++++++++++++++++++++++++++++++++++ src/enrich/kev.rs | 218 ++++++++++++++++++++++++++ src/enrich/mod.rs | 54 ++++++- src/enrich/osv.rs | 7 + src/lib.rs | 109 ++++++++++++- src/render/json.rs | 4 + src/render/markdown.rs | 33 +++- src/render/sarif.rs | 42 +++++- src/render/term.rs | 13 +- 16 files changed, 935 insertions(+), 11 deletions(-) create mode 100644 docs/src/enrichers/epss.md create mode 100644 docs/src/enrichers/kev.md create mode 100644 src/enrich/epss.rs create mode 100644 src/enrich/kev.rs diff --git a/benches/render.rs b/benches/render.rs index 4b67ba2..281978e 100644 --- a/benches/render.rs +++ b/benches/render.rs @@ -65,6 +65,8 @@ fn synth_changeset() -> (ChangeSet, Enrichment) { id: format!("GHSA-test-{i:04}"), severity: Severity::High, aliases: Vec::new(), + epss_score: None, + kev: false, }], ); } diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index 00aa99b..6058d01 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -19,6 +19,8 @@ - [Overview](./enrichers/overview.md) - [OSV.dev CVE lookup](./enrichers/osv-cve.md) +- [EPSS](./enrichers/epss.md) +- [CISA KEV](./enrichers/kev.md) - [Typosquat detection](./enrichers/typosquat.md) - [Multi-major version jumps](./enrichers/version-jump.md) - [Maintainer age signal](./enrichers/maintainer-age.md) diff --git a/docs/src/enrichers/epss.md b/docs/src/enrichers/epss.md new file mode 100644 index 0000000..3e644b2 --- /dev/null +++ b/docs/src/enrichers/epss.md @@ -0,0 +1,60 @@ +# EPSS + +bomdrift queries the [Exploit Prediction Scoring System (EPSS)] from +FIRST.org for every CVE-aliased advisory and surfaces the per-CVE score +(0.0 – 1.0) in markdown / terminal / SARIF output. + +EPSS estimates the probability that a given CVE will be exploited in the +next 30 days. Combined with severity it gives reviewers a sharper signal +than CVSS alone — a Critical CVE with EPSS 0.01 is far less urgent than a +Medium CVE with EPSS 0.85. + +## Output + +- **Markdown**: per-advisory badge `EPSS 0.87` after the severity label. +- **Terminal**: same badge, no markup. +- **JSON**: `enrichment.vulns[purl][i].epss_score` numeric field. +- **SARIF**: `properties.epssScore` on `bomdrift.cve` results. + +When an advisory is keyed by GHSA but has CVE aliases, the score is the +**max across all CVE aliases** so a GHSA covering two CVEs surfaces the +worse of the two. + +## Threshold gating + +```bash +bomdrift diff before.json after.json --fail-on-epss 0.5 +``` + +Exits 2 when any advisory has score ≥ 0.5. 0.5 is roughly the top decile +of actively-exploited CVEs; tune for your team's risk appetite. + +## Disabling + +```bash +bomdrift diff before.json after.json --no-epss +``` + +or in `.bomdrift.toml`: + +```toml +[diff] +no_epss = true +``` + +Both forms skip the FIRST.org HTTP call AND the disk cache lookup. + +## Caching + +24h TTL at `/bomdrift/epss/.json`. Negative results +(CVEs FIRST.org returned no score for) are cached to avoid re-querying +recently-published CVEs that haven't been scored yet. + +## Best-effort + +Like every bomdrift enricher, EPSS is best-effort: a network failure or +a malformed response surfaces a `BOMDRIFT_DEBUG=1` stderr note and the +diff renders with empty `epss_score` fields. EPSS being unreachable is +never a reason to block a PR review. + +[Exploit Prediction Scoring System (EPSS)]: https://www.first.org/epss/ diff --git a/docs/src/enrichers/kev.md b/docs/src/enrichers/kev.md new file mode 100644 index 0000000..5e65de9 --- /dev/null +++ b/docs/src/enrichers/kev.md @@ -0,0 +1,53 @@ +# CISA KEV + +bomdrift downloads the [CISA Known Exploited Vulnerabilities catalog] and +flips a `KEV` flag on every advisory whose primary id or aliases include a +CVE listed in the catalog. + +CISA KEV is the highest-confidence "actively exploited in the wild" signal +available — CISA only adds CVEs to the catalog after observing real-world +exploitation. It's a tighter filter than `--fail-on critical-cve` (which +fires on CVSS High or above regardless of exploitation evidence). + +## Output + +- **Markdown**: bold `**KEV**` badge after the severity / EPSS label. +- **Terminal**: plain `KEV` token. +- **JSON**: `enrichment.vulns[purl][i].kev` boolean field. +- **SARIF**: `properties.kev: true` on `bomdrift.cve` results when set. + +## Threshold gating + +```bash +bomdrift diff before.json after.json --fail-on kev +``` + +Exits 2 when any advisory has its KEV flag set. `--fail-on any` also +includes KEV. + +## Disabling + +```bash +bomdrift diff before.json after.json --no-kev +``` + +or in `.bomdrift.toml`: + +```toml +[diff] +no_kev = true +``` + +## Caching + +24h TTL on the bulk catalog JSON at +`/bomdrift/kev/catalog.json`. Once-daily refresh matches CISA's +publication cadence. + +## Best-effort + +Network failure logs at `BOMDRIFT_DEBUG=1` and the diff renders with KEV +flags absent. A stale catalog (within the 24h window) is preferred over +re-fetching on every run. + +[CISA Known Exploited Vulnerabilities catalog]: https://www.cisa.gov/known-exploited-vulnerabilities-catalog diff --git a/src/baseline.rs b/src/baseline.rs index 1953a95..4e89ae5 100644 --- a/src/baseline.rs +++ b/src/baseline.rs @@ -344,6 +344,8 @@ mod tests { id: "CVE-1".into(), severity: Severity::High, aliases: Vec::new(), + epss_score: None, + kev: false, }], ); apply(&mut cs, &mut e, &baseline); @@ -370,11 +372,15 @@ mod tests { id: "CVE-1".into(), severity: Severity::High, aliases: Vec::new(), + epss_score: None, + kev: false, }, VulnRef { id: "CVE-2".into(), severity: Severity::Medium, aliases: Vec::new(), + epss_score: None, + kev: false, }, ], ); @@ -399,6 +405,8 @@ mod tests { id: "CVE-1".into(), severity: Severity::High, aliases: Vec::new(), + epss_score: None, + kev: false, }], ); apply(&mut cs, &mut e, &baseline); @@ -488,11 +496,15 @@ mod tests { id: "GHSA-evil-1234".into(), severity: Severity::Critical, aliases: Vec::new(), + epss_score: None, + kev: false, }, VulnRef { id: "CVE-still-here".into(), severity: Severity::Medium, aliases: Vec::new(), + epss_score: None, + kev: false, }, ], ); @@ -502,6 +514,8 @@ mod tests { id: "GHSA-evil-1234".into(), severity: Severity::Critical, aliases: Vec::new(), + epss_score: None, + kev: false, }], ); apply(&mut cs, &mut e, &baseline); diff --git a/src/cli.rs b/src/cli.rs index 3eec682..f9f8cb2 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -262,6 +262,21 @@ pub struct DiffArgs { /// `typosquat`, `maintainer-age`, `version-jump`, `cve`. `score` is /// the underlying similarity / age / jump-size / CVSS value; /// `threshold` is the constant the finding was compared against. + /// Skip the EPSS enricher (FIRST.org) entirely. Useful for offline / + /// air-gapped CI where outbound HTTP is blocked, or when EPSS data is + /// not part of the team's risk model. Disables both the network call + /// and the disk cache lookup. + #[arg(long)] + pub no_epss: bool, + /// Skip the CISA KEV enricher entirely. + #[arg(long)] + pub no_kev: bool, + /// Trip exit-2 when any advisory's EPSS score is >= this threshold + /// (0.0 - 1.0). Recommended starting point: 0.5 (top decile of + /// actively-exploited CVEs). Implicit `--fail-on cve` semantics — + /// only advisories surface this; non-CVE findings are unaffected. + #[arg(long)] + pub fail_on_epss: Option, #[arg(long)] pub debug_calibration: bool, /// Format for `--debug-calibration` rows. `pipe` (default, back-compat @@ -310,6 +325,13 @@ pub enum FailOn { Typosquat, /// Trip when at least one same-version license change is present. LicenseChange, + /// Trip when any advisory's CISA KEV flag is set (i.e. listed in the + /// Known Exploited Vulnerabilities catalog). KEV is a high-signal + /// "actively exploited in the wild" claim — narrower than `cve` but + /// less rigid than `critical-cve` (KEV entries can be Medium-severity). + Kev, + /// Trip on a license-policy violation (Phase D, v0.8+). + LicenseViolation, /// Trip on ANY finding (CVE, typosquat, version-jump, young-maintainer) /// OR any license-changed-without-version-bump pair (the suspicious case). Any, diff --git a/src/config.rs b/src/config.rs index dcf5525..36eef1c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -26,6 +26,9 @@ pub struct DiffConfig { pub format: Option, pub no_osv: Option, pub no_osv_cache: Option, + pub no_epss: Option, + pub no_kev: Option, + pub fail_on_epss: Option, pub baseline: Option, pub no_maintainer_age: Option, pub fail_on: Option, @@ -64,6 +67,11 @@ fn apply_loaded_diff_config(args: &mut DiffArgs, config: Config) { } args.no_osv |= diff.no_osv.unwrap_or(false); args.no_osv_cache |= diff.no_osv_cache.unwrap_or(false); + args.no_epss |= diff.no_epss.unwrap_or(false); + args.no_kev |= diff.no_kev.unwrap_or(false); + if args.fail_on_epss.is_none() { + args.fail_on_epss = diff.fail_on_epss; + } if args.baseline.is_none() { // Config-derived baseline paths are tolerant of a missing file. // `bomdrift init` ships `.bomdrift.toml` pointing at @@ -144,6 +152,9 @@ mod tests { format: None, no_osv: false, no_osv_cache: false, + no_epss: false, + no_kev: false, + fail_on_epss: None, baseline: None, no_maintainer_age: false, fail_on: None, diff --git a/src/enrich/epss.rs b/src/enrich/epss.rs new file mode 100644 index 0000000..a10040d --- /dev/null +++ b/src/enrich/epss.rs @@ -0,0 +1,302 @@ +//! EPSS (Exploit Prediction Scoring System) enrichment. +//! +//! EPSS publishes a per-CVE probability of exploitation in the next 30 days, +//! refreshed daily. We query +//! in batches and surface the score on every [`VulnRef`] whose primary id +//! or aliases include a CVE-prefixed identifier. +//! +//! Best-effort: a network failure or parse error logs to stderr at +//! `BOMDRIFT_DEBUG=1` and returns Ok with no enrichment applied. The diff +//! still renders. +//! +//! Disk cache: `/bomdrift/epss/.json`, 24h TTL. Mirrors +//! [`crate::enrich::cache`]'s atomicity and miss-on-corrupt semantics. + +use std::collections::HashMap; +use std::path::PathBuf; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +use anyhow::Result; +use serde::{Deserialize, Serialize}; + +use crate::enrich::Enrichment; + +const EPSS_API_URL: &str = "https://api.first.org/data/v1/epss"; +const DEFAULT_TIMEOUT: Duration = Duration::from_secs(15); +/// FIRST.org documents a 100-CVE batch ceiling on the `cve=` param. +const MAX_BATCH: usize = 100; +const SUBDIR: &str = "epss"; +/// 24 hours — same TTL as the OSV cache so successive PR pushes within a +/// work session hit cache. +const CACHE_TTL_SECS: u64 = 24 * 60 * 60; + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct CacheEntry { + fetched_at: u64, + score: Option, +} + +/// Apply EPSS scores to every [`VulnRef`] in `e.vulns`. Updates in place; +/// `--no-epss` callers should skip calling this entirely. Best-effort. +pub fn enrich(e: &mut Enrichment) -> Result<()> { + enrich_with_url(e, EPSS_API_URL, DEFAULT_TIMEOUT) +} + +fn enrich_with_url(e: &mut Enrichment, base_url: &str, timeout: Duration) -> Result<()> { + let cves = collect_cves(e); + if cves.is_empty() { + return Ok(()); + } + let mut scores: HashMap = HashMap::new(); + let mut to_fetch: Vec = Vec::new(); + let cache_root = cache_root(); + for cve in &cves { + if let Some(root) = &cache_root + && let Some(cached) = read_cache(root, cve) + { + if let Some(s) = cached { + scores.insert(cve.clone(), s); + } + continue; + } + to_fetch.push(cve.clone()); + } + + if !to_fetch.is_empty() { + let agent = ureq::AgentBuilder::new().timeout(timeout).build(); + for chunk in to_fetch.chunks(MAX_BATCH) { + match fetch_batch(&agent, base_url, chunk) { + Ok(batch) => { + if let Some(root) = &cache_root { + for cve in chunk { + let s = batch.get(cve).copied(); + write_cache(root, cve, s); + if let Some(score) = s { + scores.insert(cve.clone(), score); + } + } + } else { + for (k, v) in batch { + scores.insert(k, v); + } + } + } + Err(err) => { + if std::env::var("BOMDRIFT_DEBUG").is_ok() { + eprintln!("epss: fetch failed: {err}"); + } + // Best-effort: leave these CVEs unenriched. + } + } + } + } + + apply_scores(e, &scores); + Ok(()) +} + +/// Collect every CVE-prefixed identifier referenced anywhere in `e.vulns`. +fn collect_cves(e: &Enrichment) -> Vec { + let mut set: std::collections::BTreeSet = std::collections::BTreeSet::new(); + for refs in e.vulns.values() { + for v in refs { + for c in v.cves() { + set.insert(c.to_string()); + } + } + } + set.into_iter().collect() +} + +/// Walk every `VulnRef` and set `epss_score` to the max score across its +/// CVE aliases (and primary id when CVE-keyed). +fn apply_scores(e: &mut Enrichment, scores: &HashMap) { + for refs in e.vulns.values_mut() { + for v in refs.iter_mut() { + let mut max: Option = None; + for c in v.cves() { + if let Some(&s) = scores.get(c) { + max = Some(max.map(|m| m.max(s)).unwrap_or(s)); + } + } + if max.is_some() { + v.epss_score = max; + } + } + } +} + +/// FIRST.org `/v1/epss` response shape (subset). +#[derive(Deserialize, Debug)] +struct EpssResponse { + data: Vec, +} +#[derive(Deserialize, Debug)] +struct EpssDatum { + cve: String, + epss: String, // documented as a string in the JSON response. +} + +fn fetch_batch( + agent: &ureq::Agent, + base_url: &str, + cves: &[String], +) -> Result> { + let url = format!("{base_url}?cve={}", cves.join(",")); + let resp = agent + .get(&url) + .set( + "user-agent", + concat!("bomdrift/", env!("CARGO_PKG_VERSION")), + ) + .call()?; + let parsed: EpssResponse = resp.into_json()?; + let mut out = HashMap::with_capacity(parsed.data.len()); + for d in parsed.data { + if let Ok(score) = d.epss.parse::() { + out.insert(d.cve, score); + } + } + Ok(out) +} + +fn cache_root() -> Option { + crate::refresh::default_cache_root() + .ok() + .map(|p| p.join(SUBDIR)) +} + +fn read_cache(root: &std::path::Path, cve: &str) -> Option> { + let path = root.join(format!("{}.json", sanitize(cve))); + let body = std::fs::read(&path).ok()?; + let entry: CacheEntry = serde_json::from_slice(&body).ok()?; + let now = now_secs(); + if now.saturating_sub(entry.fetched_at) > CACHE_TTL_SECS { + return None; + } + Some(entry.score) +} + +fn write_cache(root: &std::path::Path, cve: &str, score: Option) { + if std::fs::create_dir_all(root).is_err() { + return; + } + let entry = CacheEntry { + fetched_at: now_secs(), + score, + }; + let Ok(body) = serde_json::to_vec(&entry) else { + return; + }; + let target = root.join(format!("{}.json", sanitize(cve))); + let mut tmp = target.as_os_str().to_owned(); + tmp.push(".tmp"); + let tmp = PathBuf::from(tmp); + if std::fs::write(&tmp, body).is_err() { + return; + } + let _ = std::fs::rename(&tmp, &target); +} + +fn now_secs() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0) +} + +fn sanitize(id: &str) -> String { + id.chars() + .map(|c| { + if c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' { + c + } else { + '_' + } + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::enrich::{Severity, VulnRef}; + + #[test] + fn parse_epss_response_extracts_cve_to_score_map() { + let body = r#"{ + "status": "OK", + "data": [ + {"cve": "CVE-2025-1111", "epss": "0.876", "percentile": "0.99"}, + {"cve": "CVE-2025-2222", "epss": "0.012", "percentile": "0.50"} + ] + }"#; + let parsed: EpssResponse = serde_json::from_str(body).unwrap(); + let mut out = HashMap::new(); + for d in parsed.data { + out.insert(d.cve, d.epss.parse::().unwrap()); + } + assert!((out["CVE-2025-1111"] - 0.876).abs() < 1e-4); + assert!((out["CVE-2025-2222"] - 0.012).abs() < 1e-4); + } + + #[test] + fn apply_scores_takes_max_across_aliases() { + let mut e = Enrichment::default(); + let mut vulns: HashMap> = HashMap::new(); + vulns.insert( + "pkg:npm/foo@1".into(), + vec![VulnRef { + id: "GHSA-xxxx-yyyy-zzzz".into(), + severity: Severity::High, + aliases: vec!["CVE-2025-1".into(), "CVE-2025-2".into()], + epss_score: None, + kev: false, + }], + ); + e.vulns = vulns; + + let mut scores = HashMap::new(); + scores.insert("CVE-2025-1".to_string(), 0.10); + scores.insert("CVE-2025-2".to_string(), 0.85); + apply_scores(&mut e, &scores); + let v = &e.vulns["pkg:npm/foo@1"][0]; + assert!((v.epss_score.unwrap() - 0.85).abs() < 1e-4); + } + + #[test] + fn collect_cves_dedups_across_components() { + let mut e = Enrichment::default(); + let mut vulns: HashMap> = HashMap::new(); + let v = VulnRef { + id: "CVE-2025-X".into(), + severity: Severity::High, + aliases: vec!["CVE-2025-Y".into()], + epss_score: None, + kev: false, + }; + vulns.insert("pkg:npm/a@1".into(), vec![v.clone()]); + vulns.insert("pkg:npm/b@1".into(), vec![v]); + e.vulns = vulns; + let cves = collect_cves(&e); + assert_eq!(cves, vec!["CVE-2025-X", "CVE-2025-Y"]); + } + + #[test] + fn cache_roundtrip() { + let dir = std::env::temp_dir().join(format!( + "bomdrift-epss-test-{}-{}", + std::process::id(), + now_secs() + )); + std::fs::create_dir_all(&dir).unwrap(); + write_cache(&dir, "CVE-2025-1", Some(0.5)); + let got = read_cache(&dir, "CVE-2025-1").unwrap(); + assert_eq!(got, Some(0.5)); + // Negative caching: no-score-found CVE. + write_cache(&dir, "CVE-2025-2", None); + let got = read_cache(&dir, "CVE-2025-2").unwrap(); + assert_eq!(got, None); + let _ = std::fs::remove_dir_all(&dir); + } +} diff --git a/src/enrich/kev.rs b/src/enrich/kev.rs new file mode 100644 index 0000000..3190929 --- /dev/null +++ b/src/enrich/kev.rs @@ -0,0 +1,218 @@ +//! CISA Known Exploited Vulnerabilities (KEV) catalog enrichment. +//! +//! Single bulk feed at +//! , +//! refreshed daily. We download the catalog once per 24h, parse the +//! `vulnerabilities[].cveID` field, and flip [`VulnRef::kev`] to true on +//! every reference whose primary id or aliases include a KEV CVE. +//! +//! Best-effort: network failure logs at `BOMDRIFT_DEBUG=1` and returns Ok +//! with no enrichment. Disk cache lives at +//! `/bomdrift/kev/catalog.json`. + +use std::collections::HashSet; +use std::path::PathBuf; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +use anyhow::Result; +use serde::Deserialize; + +use crate::enrich::Enrichment; + +const KEV_FEED_URL: &str = + "https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json"; +const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30); +const SUBDIR: &str = "kev"; +const CACHE_FILE: &str = "catalog.json"; +/// 24h — KEV publishes daily. +const CACHE_TTL_SECS: u64 = 24 * 60 * 60; + +#[derive(Deserialize, Debug)] +struct KevFeed { + vulnerabilities: Vec, +} + +#[derive(Deserialize, Debug)] +struct KevEntry { + #[serde(rename = "cveID")] + cve_id: String, +} + +/// Apply KEV flags to every [`VulnRef`] in `e.vulns`. `--no-kev` callers +/// should skip calling this entirely. +pub fn enrich(e: &mut Enrichment) -> Result<()> { + enrich_with_url(e, KEV_FEED_URL, DEFAULT_TIMEOUT) +} + +fn enrich_with_url(e: &mut Enrichment, url: &str, timeout: Duration) -> Result<()> { + if e.vulns.is_empty() { + return Ok(()); + } + let kev_ids = match load_or_fetch(url, timeout) { + Ok(ids) => ids, + Err(err) => { + if std::env::var("BOMDRIFT_DEBUG").is_ok() { + eprintln!("kev: feed unavailable: {err}"); + } + return Ok(()); + } + }; + apply_kev(e, &kev_ids); + Ok(()) +} + +fn apply_kev(e: &mut Enrichment, kev: &HashSet) { + for refs in e.vulns.values_mut() { + for v in refs.iter_mut() { + let hit = v.cves().any(|c| kev.contains(c)); + if hit { + v.kev = true; + } + } + } +} + +fn load_or_fetch(url: &str, timeout: Duration) -> Result> { + let cache_path = cache_path(); + if let Some(path) = &cache_path + && let Some(ids) = read_cache(path) + { + return Ok(ids); + } + + let agent = ureq::AgentBuilder::new().timeout(timeout).build(); + let resp = agent + .get(url) + .set( + "user-agent", + concat!("bomdrift/", env!("CARGO_PKG_VERSION")), + ) + .call()?; + let body = resp.into_string()?; + let parsed: KevFeed = serde_json::from_str(&body)?; + let ids: HashSet = parsed + .vulnerabilities + .into_iter() + .map(|e| e.cve_id) + .collect(); + if let Some(path) = &cache_path { + write_cache(path, &body); + } + Ok(ids) +} + +fn cache_path() -> Option { + crate::refresh::default_cache_root() + .ok() + .map(|p| p.join(SUBDIR).join(CACHE_FILE)) +} + +fn read_cache(path: &std::path::Path) -> Option> { + let meta = std::fs::metadata(path).ok()?; + let modified = meta.modified().ok()?; + let now = SystemTime::now(); + let age = now.duration_since(modified).ok()?; + if age.as_secs() > CACHE_TTL_SECS { + return None; + } + let body = std::fs::read(path).ok()?; + let parsed: KevFeed = serde_json::from_slice(&body).ok()?; + Some( + parsed + .vulnerabilities + .into_iter() + .map(|e| e.cve_id) + .collect(), + ) +} + +fn write_cache(path: &std::path::Path, body: &str) { + if let Some(parent) = path.parent() + && std::fs::create_dir_all(parent).is_err() + { + return; + } + let mut tmp = path.as_os_str().to_owned(); + tmp.push(".tmp"); + let tmp = PathBuf::from(tmp); + if std::fs::write(&tmp, body).is_err() { + return; + } + let _ = std::fs::rename(&tmp, path); +} + +#[allow(dead_code)] +fn now_secs() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::enrich::{Severity, VulnRef}; + use std::collections::HashMap; + + #[test] + fn parse_kev_feed() { + let body = r#"{ + "title": "CISA KEV", + "catalogVersion": "2026.04.29", + "vulnerabilities": [ + {"cveID": "CVE-2024-1111", "vendorProject": "Acme", "product": "X"}, + {"cveID": "CVE-2025-9999", "vendorProject": "Beta", "product": "Y"} + ] + }"#; + let parsed: KevFeed = serde_json::from_str(body).unwrap(); + let ids: HashSet = parsed + .vulnerabilities + .into_iter() + .map(|e| e.cve_id) + .collect(); + assert!(ids.contains("CVE-2024-1111")); + assert!(ids.contains("CVE-2025-9999")); + } + + #[test] + fn apply_kev_flips_flag_on_alias_match() { + let mut e = Enrichment::default(); + let mut vulns: HashMap> = HashMap::new(); + vulns.insert( + "pkg:npm/foo@1".into(), + vec![VulnRef { + id: "GHSA-xxxx-yyyy-zzzz".into(), + severity: Severity::High, + aliases: vec!["CVE-2024-1111".into()], + epss_score: None, + kev: false, + }], + ); + e.vulns = vulns; + + let mut kev = HashSet::new(); + kev.insert("CVE-2024-1111".to_string()); + apply_kev(&mut e, &kev); + assert!(e.vulns["pkg:npm/foo@1"][0].kev); + } + + #[test] + fn apply_kev_leaves_unmatched_refs_alone() { + let mut e = Enrichment::default(); + let mut vulns: HashMap> = HashMap::new(); + vulns.insert( + "pkg:npm/foo@1".into(), + vec![VulnRef { + id: "GHSA-xxxx-yyyy-zzzz".into(), + severity: Severity::High, + aliases: vec!["CVE-2025-NOT-IN-KEV".into()], + epss_score: None, + kev: false, + }], + ); + e.vulns = vulns; + apply_kev(&mut e, &HashSet::new()); + assert!(!e.vulns["pkg:npm/foo@1"][0].kev); + } +} diff --git a/src/enrich/mod.rs b/src/enrich/mod.rs index 8b33d5c..c382f15 100644 --- a/src/enrich/mod.rs +++ b/src/enrich/mod.rs @@ -10,6 +10,8 @@ //! keep that contract or the JSON renderer will fail to compile. pub mod cache; +pub mod epss; +pub mod kev; pub mod maintainer; pub mod osv; pub mod typosquat; @@ -49,6 +51,10 @@ pub struct Enrichment { /// younger than [`maintainer::YOUNG_MAINTAINER_DAYS`]. The xz/Jia Tan /// pattern. Always informational — never trips fail-on. pub maintainer_age: Vec, + /// License-policy violations (Phase D, v0.8+). Distinct from + /// `cs.license_changed` which detects same-version license changes. + /// Empty when no `[license]` block is configured. + pub license_violations: Vec, } impl Enrichment { @@ -64,13 +70,41 @@ impl Enrichment { || !self.typosquats.is_empty() || !self.version_jumps.is_empty() || !self.maintainer_age.is_empty() + || !self.license_violations.is_empty() } } +/// License-policy violation finding (Phase D). Distinct from a license +/// *change* (same component, same version, different license) — this is +/// "the configured policy says this license isn't allowed." +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct LicenseViolation { + pub component: crate::model::Component, + /// Raw SPDX-ish string from the SBOM. May be a compound expression + /// (e.g. `(MIT OR GPL-3.0-only)`) when matched as ambiguous. + pub license: String, + /// Human-readable description of which rule fired (e.g. + /// `"deny: GPL-3.0-only"`, `"ambiguous: (MIT OR GPL-3.0)"`). + pub matched_rule: String, + pub kind: LicenseViolationKind, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +#[serde(rename_all = "kebab-case")] +pub enum LicenseViolationKind { + /// License explicitly on the deny list (or matched a deny glob). + Deny, + /// Compound expression that couldn't be safely evaluated against the + /// configured policy with `allow_ambiguous=false`. + Ambiguous, + /// Atomic license that wasn't on the allow list when `allow` was set. + NotAllowed, +} + /// A single advisory reference attached to a vulnerable component, with the /// best-known severity bucket. Built by [`osv::enrich`] from the /// `/v1/querybatch` advisory IDs plus per-advisory `/v1/vulns/{id}` lookups. -#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize)] +#[derive(Debug, Clone, Default, PartialEq, Serialize)] pub struct VulnRef { /// Stable advisory identifier (`GHSA-…`, `CVE-…`, `MAL-…`, `OSV-…`). pub id: String, @@ -84,6 +118,22 @@ pub struct VulnRef { /// from OSV's `aliases[]` field; empty when offline or pre-v0.8. #[serde(default, skip_serializing_if = "Vec::is_empty")] pub aliases: Vec, + /// EPSS probability of exploitation in the next 30 days (0.0–1.0) + /// from . `None` when offline, when no + /// CVE alias resolves, or when the user passed `--no-epss`. Populated + /// in v0.8+ by the EPSS enricher post-OSV. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub epss_score: Option, + /// CISA Known-Exploited-Vulnerabilities flag. `true` when any CVE + /// alias appears in the published KEV catalog. `false` otherwise + /// (including offline / `--no-kev`). Populated in v0.8+ by the KEV + /// enricher post-OSV. + #[serde(default, skip_serializing_if = "is_false")] + pub kev: bool, +} + +fn is_false(b: &bool) -> bool { + !*b } impl VulnRef { @@ -94,6 +144,8 @@ impl VulnRef { id: id.into(), severity, aliases: Vec::new(), + epss_score: None, + kev: false, } } diff --git a/src/enrich/osv.rs b/src/enrich/osv.rs index e972547..f37875a 100644 --- a/src/enrich/osv.rs +++ b/src/enrich/osv.rs @@ -150,6 +150,8 @@ fn enrich_with( id, severity, aliases, + epss_score: None, + kev: false, } }) .collect(); @@ -163,6 +165,7 @@ fn enrich_with( typosquats: Vec::new(), version_jumps: Vec::new(), maintainer_age: Vec::new(), + license_violations: Vec::new(), }) } @@ -472,6 +475,8 @@ mod tests { id: "GHSA-xxxx-yyyy-zzzz".to_string(), severity: Severity::High, aliases: vec!["CVE-2025-1111".to_string(), "OSV-2025-1".to_string()], + epss_score: None, + kev: false, }; let cves: Vec<&str> = v.cves().collect(); assert_eq!(cves, vec!["CVE-2025-1111"]); @@ -486,6 +491,8 @@ mod tests { "GHSA-aaaa-bbbb-cccc".to_string(), "CVE-2025-1111".to_string(), ], + epss_score: None, + kev: false, }; let cves: Vec<&str> = v.cves().collect(); assert_eq!(cves, vec!["CVE-2025-9999", "CVE-2025-1111"]); diff --git a/src/lib.rs b/src/lib.rs index 9e39db2..dd4e447 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -117,6 +117,22 @@ fn run_diff(mut args: DiffArgs) -> Result<()> { } }; + // EPSS / KEV enrichment piggyback on OSV's VulnRefs and only have + // anything to do when there are CVE-aliased advisories. Skip both if + // there are no vulns. + if !args.no_epss + && !enrichment.vulns.is_empty() + && let Err(err) = enrich::epss::enrich(&mut enrichment) + { + eprintln!("warning: EPSS enrichment failed, continuing without it: {err:#}"); + } + if !args.no_kev + && !enrichment.vulns.is_empty() + && let Err(err) = enrich::kev::enrich(&mut enrichment) + { + eprintln!("warning: KEV enrichment failed, continuing without it: {err:#}"); + } + // Typosquat detection is pure-compute (embedded reference list) and always // runs, regardless of `--no-osv`. Findings are informational. enrichment.typosquats = enrich::typosquat::enrich(&cs); @@ -239,7 +255,17 @@ fn run_diff(mut args: DiffArgs) -> Result<()> { ); } - if tripped(&cs, &enrichment, fail_on) || budget_tripped { + let epss_tripped = args + .fail_on_epss + .is_some_and(|threshold| any_epss_at_or_above(&enrichment, threshold)); + if epss_tripped { + let threshold = args.fail_on_epss.unwrap_or(0.0); + eprintln!( + "bomdrift: policy gate tripped: --fail-on-epss {threshold:.2} (one or more advisories at or above this score)" + ); + } + + if tripped(&cs, &enrichment, fail_on) || budget_tripped || epss_tripped { std::process::exit(FAIL_ON_EXIT_CODE); } @@ -263,10 +289,25 @@ pub fn tripped(cs: &ChangeSet, e: &Enrichment, threshold: FailOn) -> bool { FailOn::CriticalCve => any_advisory_at_or_above(e, Severity::High), FailOn::Typosquat => !e.typosquats.is_empty(), FailOn::LicenseChange => !cs.license_changed.is_empty(), - FailOn::Any => e.has_findings() || !cs.license_changed.is_empty(), + FailOn::Kev => any_kev(e), + FailOn::LicenseViolation => !e.license_violations.is_empty(), + FailOn::Any => e.has_findings() || !cs.license_changed.is_empty() || any_kev(e), } } +/// True when any advisory across all components has its CISA KEV flag set. +pub fn any_kev(e: &Enrichment) -> bool { + e.vulns.values().any(|refs| refs.iter().any(|r| r.kev)) +} + +/// True when any advisory has an EPSS score >= the threshold. +pub fn any_epss_at_or_above(e: &Enrichment, threshold: f32) -> bool { + e.vulns.values().any(|refs| { + refs.iter() + .any(|r| r.epss_score.is_some_and(|s| s >= threshold)) + }) +} + pub fn budget_tripped( cs: &ChangeSet, max_added: Option, @@ -350,6 +391,28 @@ fn write_calibration_lines( CalibrationThreshold::Text("high+"), format, ); + for cve in vuln.cves() { + if let Some(score) = vuln.epss_score { + write_calibration_row( + out, + "epss", + &format!("{purl}+{cve}"), + CalibrationScore::Float(score as f64), + CalibrationThreshold::Float(0.5), + format, + ); + } + if vuln.kev { + write_calibration_row( + out, + "kev", + &format!("{purl}+{cve}"), + CalibrationScore::Text("true"), + CalibrationThreshold::Text("kev"), + format, + ); + } + } } } } @@ -550,6 +613,8 @@ mod tests { id: "CVE-2025-1".into(), severity, aliases: Vec::new(), + epss_score: None, + kev: false, }], ); Enrichment { @@ -802,4 +867,44 @@ mod tests { assert_eq!(v["score"], "HIGH"); assert_eq!(v["threshold"], "high+"); } + + #[test] + fn fail_on_kev_trips_when_any_advisory_kev_set() { + let mut e = enrichment_with_cve_at(Severity::Medium); + // Flip the kev flag on the single advisory. + for refs in e.vulns.values_mut() { + refs[0].kev = true; + } + assert!(tripped(&ChangeSet::default(), &e, FailOn::Kev)); + assert!(!tripped( + &ChangeSet::default(), + &enrichment_with_cve_at(Severity::Medium), + FailOn::Kev + )); + } + + #[test] + fn any_epss_threshold_gating() { + let mut e = enrichment_with_cve_at(Severity::Medium); + for refs in e.vulns.values_mut() { + refs[0].epss_score = Some(0.6); + } + assert!(any_epss_at_or_above(&e, 0.5)); + assert!(any_epss_at_or_above(&e, 0.6)); + assert!(!any_epss_at_or_above(&e, 0.7)); + } + + #[test] + fn calibration_emits_epss_and_kev_rows_when_set() { + let mut e = enrichment_with_cve_at(Severity::High); + for refs in e.vulns.values_mut() { + refs[0].epss_score = Some(0.87); + refs[0].kev = true; + } + let mut buf = Vec::new(); + write_calibration_lines(&e, &mut buf, crate::cli::DebugFormat::Pipe); + let s = String::from_utf8(buf).unwrap(); + assert!(s.contains("epss|"), "missing epss row: {s}"); + assert!(s.contains("kev|"), "missing kev row: {s}"); + } } diff --git a/src/render/json.rs b/src/render/json.rs index aa791b3..1c1ba17 100644 --- a/src/render/json.rs +++ b/src/render/json.rs @@ -182,6 +182,8 @@ mod tests { id: "GHSA-3p68-rc4w-qgx5".to_string(), severity: crate::enrich::Severity::High, aliases: Vec::new(), + epss_score: None, + kev: false, }], ); @@ -201,6 +203,8 @@ mod tests { typosquats, version_jumps: Vec::new(), maintainer_age: Vec::new(), + + license_violations: Vec::new(), }; let cs = ChangeSet::default(); diff --git a/src/render/markdown.rs b/src/render/markdown.rs index 1abb66c..d4d9708 100644 --- a/src/render/markdown.rs +++ b/src/render/markdown.rs @@ -470,10 +470,17 @@ fn write_one_vuln_row(out: &mut String, c: &Component, enrichment: &Enrichment) let advisories = sorted .iter() .map(|r| { - format!( + let mut s = format!( "[{}](https://osv.dev/vulnerability/{}) `{}`", r.id, r.id, r.severity - ) + ); + if let Some(score) = r.epss_score { + s.push_str(&format!(" · EPSS {score:.2}")); + } + if r.kev { + s.push_str(" · **KEV**"); + } + s }) .collect::>() .join(", "); @@ -618,6 +625,8 @@ mod tests { id: "GHSA-xxxx-yyyy-zzzz".to_string(), severity: crate::enrich::Severity::Critical, aliases: Vec::new(), + epss_score: None, + kev: false, }], ); let md = render(&cs, &e); @@ -651,16 +660,22 @@ mod tests { id: "CVE-2025-medium".to_string(), severity: crate::enrich::Severity::Medium, aliases: Vec::new(), + epss_score: None, + kev: false, }, crate::enrich::VulnRef { id: "CVE-2025-critical".to_string(), severity: crate::enrich::Severity::Critical, aliases: Vec::new(), + epss_score: None, + kev: false, }, crate::enrich::VulnRef { id: "CVE-2025-high".to_string(), severity: crate::enrich::Severity::High, aliases: Vec::new(), + epss_score: None, + kev: false, }, ], ); @@ -690,6 +705,8 @@ mod tests { id: "GHSA-xxxx-yyyy-zzzz".to_string(), severity: crate::enrich::Severity::Critical, aliases: Vec::new(), + epss_score: None, + kev: false, }], ); let summary = render_with_options( @@ -751,6 +768,8 @@ mod tests { id: "GHSA-xxxx-yyyy-zzzz".to_string(), severity: crate::enrich::Severity::High, aliases: Vec::new(), + epss_score: None, + kev: false, }], ); @@ -980,6 +999,8 @@ mod tests { id: "GHSA-medium".into(), severity: crate::enrich::Severity::Medium, aliases: Vec::new(), + epss_score: None, + kev: false, }], ); e.vulns.insert( @@ -988,6 +1009,8 @@ mod tests { id: "CVE-2025-critical".into(), severity: crate::enrich::Severity::Critical, aliases: Vec::new(), + epss_score: None, + kev: false, }], ); let md = render(&cs, &e); @@ -1024,6 +1047,8 @@ mod tests { id: "GHSA-medium".into(), severity: crate::enrich::Severity::Medium, aliases: Vec::new(), + epss_score: None, + kev: false, }], ); e.vulns.insert( @@ -1032,6 +1057,8 @@ mod tests { id: "CVE-2025-critical".into(), severity: crate::enrich::Severity::Critical, aliases: Vec::new(), + epss_score: None, + kev: false, }], ); let md = render(&cs, &e); @@ -1210,6 +1237,8 @@ mod tests { id: "GHSA-x".into(), severity: crate::enrich::Severity::High, aliases: Vec::new(), + epss_score: None, + kev: false, }], ); e.typosquats diff --git a/src/render/sarif.rs b/src/render/sarif.rs index bc0f15b..3f1c6a8 100644 --- a/src/render/sarif.rs +++ b/src/render/sarif.rs @@ -206,6 +206,24 @@ fn results(cs: &ChangeSet, e: &Enrichment) -> Value { for advisory in advisories { let purl_str: &str = purl; let fp = fingerprint(&["bomdrift.cve", purl_str, &advisory.id]); + let mut props = serde_json::Map::new(); + props.insert("purl".into(), Value::String(purl.clone())); + props.insert("advisoryId".into(), Value::String(advisory.id.clone())); + props.insert( + "severity".into(), + Value::String(advisory.severity.as_str().into()), + ); + if let Some(score) = advisory.epss_score { + props.insert( + "epssScore".into(), + serde_json::Number::from_f64(score as f64) + .map(Value::Number) + .unwrap_or(Value::Null), + ); + } + if advisory.kev { + props.insert("kev".into(), Value::Bool(true)); + } out.push(json!({ "ruleId": "bomdrift.cve", "level": sarif_level(advisory.severity), @@ -219,11 +237,7 @@ fn results(cs: &ChangeSet, e: &Enrichment) -> Value { }, "locations": [synthetic_location()], "partialFingerprints": { "primaryHash/v1": fp }, - "properties": { - "purl": purl, - "advisoryId": advisory.id, - "severity": advisory.severity.as_str(), - }, + "properties": Value::Object(props), })); } } @@ -456,11 +470,15 @@ mod tests { id: "GHSA-3p68-rc4w-qgx5".to_string(), severity: crate::enrich::Severity::High, aliases: Vec::new(), + epss_score: None, + kev: false, }, crate::enrich::VulnRef { id: "CVE-2025-99999".to_string(), severity: crate::enrich::Severity::Medium, aliases: Vec::new(), + epss_score: None, + kev: false, }, ], ); @@ -505,6 +523,8 @@ mod tests { id: "OSV-2025-1".to_string(), severity: crate::enrich::Severity::None, aliases: Vec::new(), + epss_score: None, + kev: false, }], ); let e = Enrichment { @@ -528,6 +548,8 @@ mod tests { id: "CVE-2025-1".to_string(), severity: crate::enrich::Severity::Medium, aliases: Vec::new(), + epss_score: None, + kev: false, }] }; @@ -649,6 +671,8 @@ mod tests { id: "CVE-2025-1".to_string(), severity: crate::enrich::Severity::High, aliases: Vec::new(), + epss_score: None, + kev: false, }], ); let e = Enrichment { @@ -691,6 +715,8 @@ mod tests { id: "CVE-1".into(), severity: crate::enrich::Severity::Medium, aliases: Vec::new(), + epss_score: None, + kev: false, }], ); let e = Enrichment { @@ -737,6 +763,8 @@ mod tests { id: "GHSA-3p68-rc4w-qgx5".to_string(), severity: crate::enrich::Severity::High, aliases: Vec::new(), + epss_score: None, + kev: false, }], ); let e = Enrichment { @@ -765,11 +793,15 @@ mod tests { id: "CVE-2025-1".to_string(), severity: crate::enrich::Severity::High, aliases: Vec::new(), + epss_score: None, + kev: false, }, crate::enrich::VulnRef { id: "CVE-2025-2".to_string(), severity: crate::enrich::Severity::High, aliases: Vec::new(), + epss_score: None, + kev: false, }, ], ); diff --git a/src/render/term.rs b/src/render/term.rs index fd7c005..a95bb5f 100644 --- a/src/render/term.rs +++ b/src/render/term.rs @@ -133,7 +133,16 @@ pub fn render_with_color(cs: &ChangeSet, enrichment: &Enrichment, color: ColorCh sorted.sort_by(|a, b| b.severity.cmp(&a.severity).then_with(|| a.id.cmp(&b.id))); let advisories = sorted .iter() - .map(|r| format!("{} ({})", r.id, r.severity)) + .map(|r| { + let mut s = format!("{} ({})", r.id, r.severity); + if let Some(score) = r.epss_score { + s.push_str(&format!(" EPSS {score:.2}")); + } + if r.kev { + s.push_str(" KEV"); + } + s + }) .collect::>() .join(", "); let _ = writeln!( @@ -311,6 +320,8 @@ mod tests { id: "MAL-2026-2306".to_string(), severity: crate::enrich::Severity::Critical, aliases: Vec::new(), + epss_score: None, + kev: false, }], ); From 30a5573420b8bdd68a9d3e12099e8cae30942acb Mon Sep 17 00:00:00 2001 From: Metbcy Date: Wed, 29 Apr 2026 13:38:17 -0700 Subject: [PATCH 07/10] feat(baseline): optional expires + reason fields with stderr warnings v0.8 baseline schema is purely additive. Each entry in suppressed_advisories may now be either: - a bare string (v0.5 form, unchanged), or - an object {id, purl?, expires?, reason?} (v0.8 form). Behavior: - expires field parsed via clock::parse_ymd (strict YYYY-MM-DD). Malformed dates surface as a load error naming the offending entry. - expires < today() (clock honors SOURCE_DATE_EPOCH): entry skipped for suppression and recorded on Baseline.expired_entries. lib.rs prints one warning per expired entry to stderr after baseline load. - No expires: entry suppresses indefinitely (v0.5 semantics). CLI: bomdrift baseline add GHSA-X --expires 2026-12-31 --reason '...' The new flags route through baseline::add_suppression_full, which emits the v0.8 object form when either field is set; otherwise the v0.5 string form is preserved. Idempotency now matches by id across both shapes. comment-suppress companion action picks up an optional 'reason: ' line in the trigger comment body and forwards it via --reason. Tests cover: expired warns + still renders, active suppresses, no- expiry suppresses, malformed errors, round-trip, SOURCE_DATE_EPOCH override, idempotent re-add against object-form entry. Docs: docs/src/baseline.md extended with the new fields, CLI usage, warning message format, worked rotation example. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- comment-suppress/entrypoint.sh | 16 ++- docs/src/baseline.md | 69 +++++++++ src/baseline.rs | 249 ++++++++++++++++++++++++++++++--- src/cli.rs | 13 ++ src/lib.rs | 30 +++- 5 files changed, 357 insertions(+), 20 deletions(-) diff --git a/comment-suppress/entrypoint.sh b/comment-suppress/entrypoint.sh index 2f3c0a5..a46c903 100755 --- a/comment-suppress/entrypoint.sh +++ b/comment-suppress/entrypoint.sh @@ -67,6 +67,16 @@ if [ -z "$advisory_id" ]; then fail "could not parse advisory id from comment body: $comment_body" fi +# Optional `reason: ` line in the comment body. v0.8+: when +# present, the entry is recorded in the v0.8 object form so the reason +# is preserved alongside the advisory id. Pattern matches the start of +# any line (case-insensitive) so reviewers can write +# `reason: awaiting upstream patch (issue #42)` on a continuation line. +reason="$(printf '%s\n' "$comment_body" \ + | grep -iE '^\s*reason:\s*' \ + | head -n1 \ + | sed -E 's/^\s*[Rr]eason:\s*//')" + # Validate it looks like an advisory id we'd expect from OSV.dev. Reject # anything else early so a typo doesn't turn into a no-op suppress with no # user-visible feedback. @@ -213,7 +223,11 @@ git config user.email "41898282+github-actions[bot]@users.noreply.github.com" baseline_path="${BASELINE_PATH:-.bomdrift/baseline.json}" log "Adding ${advisory_id} to ${baseline_path}" -"$bomdrift_bin" baseline add "$advisory_id" --path "$baseline_path" +baseline_args=(baseline add "$advisory_id" --path "$baseline_path") +if [ -n "$reason" ]; then + baseline_args+=(--reason "$reason") +fi +"$bomdrift_bin" "${baseline_args[@]}" endlog # Stage + commit. If `bomdrift baseline add` was a no-op (idempotent diff --git a/docs/src/baseline.md b/docs/src/baseline.md index 955a13d..59099b3 100644 --- a/docs/src/baseline.md +++ b/docs/src/baseline.md @@ -294,3 +294,72 @@ ecosystem-wide bump). The same per-component recipe works — replace the `typosquat` array with `version_jump`, key by the after-version's `purl`. Update the entry on the next jump. + +## Time-boxed suppressions (`expires` + `reason`) + +v0.8 adds two optional fields on each `suppressed_advisories` entry: + +```json +{ + "suppressed_advisories": [ + { + "id": "GHSA-evil-1234", + "purl": "pkg:npm/foo", + "expires": "2026-12-31", + "reason": "Awaiting upstream patch (issue #42)" + }, + "GHSA-old-school" + ] +} +``` + +Both fields are optional. String entries (the v0.5 form) keep working — +the array is a union of both shapes. + +### Behavior + +- **Active entry** (`expires` is today or in the future, OR no `expires`): + finding is suppressed as before. +- **Expired entry** (`expires` is strictly before today): finding + surfaces, and bomdrift prints one warning line per expired entry to + stderr: + + ``` + warning: baseline entry GHSA-evil-1234 (pkg:npm/foo) expired 2026-04-29; finding will surface in this run — was: Awaiting upstream patch (issue #42) + ``` + +- **Malformed `expires`** (e.g. `2026/12/31`): bomdrift refuses to load + the baseline rather than silently treating it as never-expiring. Use + strict `YYYY-MM-DD` zero-padded. + +The "today" comparison honors `SOURCE_DATE_EPOCH` so reproducible-build +contexts stay deterministic. + +### CLI + +```bash +bomdrift baseline add GHSA-evil-1234 \ + --expires 2026-12-31 \ + --reason "Awaiting upstream patch (issue #42)" +``` + +The `comment-suppress` companion action also picks up an optional +`reason: ` line in the triggering comment body: + +``` +/bomdrift suppress GHSA-evil-1234 +reason: Awaiting upstream patch (issue #42) +``` + +### Worked rotation example + +Six months ago the team accepted GHSA-evil-1234 with a 6-month +expiry. Today the warning fires: + +``` +warning: baseline entry GHSA-evil-1234 expired 2026-04-29 … +``` + +The reviewer either renews the suppression (new PR, new expiry + +reason) or removes the entry and merges the upstream patch. +Suppressions become reviewed work-items, not silent forever-state. diff --git a/src/baseline.rs b/src/baseline.rs index 4e89ae5..ea59a8f 100644 --- a/src/baseline.rs +++ b/src/baseline.rs @@ -40,6 +40,7 @@ use std::path::Path; use anyhow::{Context, Result}; +use crate::clock; use crate::diff::ChangeSet; use crate::enrich::Enrichment; @@ -57,6 +58,21 @@ pub struct Baseline { /// comment-driven suppression flow). The exact-match `vuln_keys` set /// remains the canonical match for diff-output-style baselines. suppressed_advisories: HashSet, + /// v0.8+ entries that have already passed their `expires` date. + /// Surface to the caller for stderr warnings; do NOT contribute to + /// suppression. + pub expired_entries: Vec, +} + +/// A baseline entry whose `expires` date is strictly before today. The diff +/// will surface the underlying finding; bomdrift prints one warning per +/// expired entry to stderr after baseline load. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ExpiredEntry { + pub id: String, + pub purl: Option, + pub expires: String, + pub reason: Option, } impl Baseline { @@ -65,15 +81,26 @@ impl Baseline { .with_context(|| format!("reading baseline file: {}", path.display()))?; let value: serde_json::Value = serde_json::from_str(&body) .with_context(|| format!("parsing baseline JSON: {}", path.display()))?; - Ok(Self::from_value(&value)) + Self::from_value_strict(&value) } - /// Build a `Baseline` from an already-parsed bomdrift JSON document. Every - /// extraction step is best-effort — a baseline missing the `enrichment` - /// or `changes` block produces an empty key set for that section, never - /// an error. (Pinning the parser to a strict schema would force users to - /// regenerate baselines on every minor version bump; not worth it.) + /// Build a `Baseline` from an already-parsed bomdrift JSON document. + /// Tolerant: a missing `enrichment` or `changes` block produces an + /// empty key set for that section, never an error. Malformed + /// `expires` dates are silently ignored — use [`Self::from_value_strict`] + /// if you want to surface those as errors. pub fn from_value(value: &serde_json::Value) -> Self { + Self::from_value_inner(value, false).unwrap_or_default() + } + + /// Strict variant: an object-form `suppressed_advisories` entry with a + /// malformed `expires` date is an error rather than a silent skip. + /// Used by [`Self::load`] so users see typos immediately. + pub fn from_value_strict(value: &serde_json::Value) -> Result { + Self::from_value_inner(value, true) + } + + fn from_value_inner(value: &serde_json::Value, strict: bool) -> Result { let mut out = Self::default(); let enrichment = &value["enrichment"]; @@ -128,26 +155,68 @@ impl Baseline { } } - // v0.5+ simple suppression list — written by - // `bomdrift baseline add `. Any advisory ID in this - // array suppresses across ALL purls. The shape is forgiving: - // accepts a JSON array of strings under either - // `suppressed_advisories` (canonical) or `suppressed_ids` (alias - // we kept short for hand-edited use). Both are read; either form - // is valid output from `baseline add`. + // v0.5+ simple suppression list, optionally extended in v0.8 to + // object form `{ "id": ..., "purl": ..., "expires": ..., "reason": ... }`. + // Both shapes coexist in one array. Keys read: `suppressed_advisories` + // (canonical) and `suppressed_ids` (alias retained for back-compat). for key in ["suppressed_advisories", "suppressed_ids"] { if let Some(arr) = value[key].as_array() { for entry in arr { + // String form (v0.5+). if let Some(id) = entry.as_str() { if !id.is_empty() { out.suppressed_advisories.insert(id.to_string()); } + continue; + } + // Object form (v0.8+). + if let Some(obj) = entry.as_object() { + let id = obj.get("id").and_then(|v| v.as_str()).unwrap_or(""); + if id.is_empty() { + if strict { + anyhow::bail!( + "baseline `{key}` entry missing required `id` field: {entry}" + ); + } + continue; + } + let purl = obj.get("purl").and_then(|v| v.as_str()).map(str::to_string); + let reason = obj + .get("reason") + .and_then(|v| v.as_str()) + .map(str::to_string); + if let Some(expires_s) = obj.get("expires").and_then(|v| v.as_str()) { + match clock::parse_ymd(expires_s) { + Ok(date) => { + if clock::is_expired(date) { + out.expired_entries.push(ExpiredEntry { + id: id.to_string(), + purl: purl.clone(), + expires: expires_s.to_string(), + reason: reason.clone(), + }); + // Expired entries do NOT contribute to suppression. + continue; + } + } + Err(err) => { + if strict { + return Err(err.context(format!( + "baseline entry {id} ({}): malformed expires", + purl.as_deref().unwrap_or("*") + ))); + } + continue; + } + } + } + out.suppressed_advisories.insert(id.to_string()); } } } } - out + Ok(out) } /// True when this baseline contains zero suppressible entries (e.g. an @@ -226,10 +295,26 @@ pub fn apply(_cs: &mut ChangeSet, e: &mut Enrichment, baseline: &Baseline) { /// safely be run against a baseline generated by `bomdrift diff /// --output json`. pub fn add_suppression(path: &Path, id: &str) -> Result { + add_suppression_full(path, id, None, None) +} + +/// Like [`add_suppression`] but accepts optional `expires` (YYYY-MM-DD) and +/// `reason` fields. When either is provided, the new entry is written in +/// the v0.8 object form `{id, expires?, reason?}`; existing string-form +/// entries elsewhere in the array are left untouched. +pub fn add_suppression_full( + path: &Path, + id: &str, + expires: Option<&str>, + reason: Option<&str>, +) -> Result { let trimmed = id.trim(); if trimmed.is_empty() { anyhow::bail!("advisory id must not be empty"); } + if let Some(s) = expires { + clock::parse_ymd(s).with_context(|| format!("invalid --expires {s:?}"))?; + } let mut doc: serde_json::Value = if path.exists() { let body = std::fs::read_to_string(path) @@ -262,13 +347,27 @@ pub fn add_suppression(path: &Path, id: &str) -> Result { .as_array_mut() .ok_or_else(|| anyhow::anyhow!("baseline `suppressed_advisories` field is not an array"))?; - let already_present = arr - .iter() - .any(|v| v.as_str().map(|s| s == trimmed).unwrap_or(false)); + let already_present = arr.iter().any(|v| match v { + serde_json::Value::String(s) => s == trimmed, + serde_json::Value::Object(o) => o.get("id").and_then(|x| x.as_str()) == Some(trimmed), + _ => false, + }); if already_present { return Ok(AddOutcome::AlreadyPresent); } - arr.push(serde_json::Value::String(trimmed.to_string())); + if expires.is_some() || reason.is_some() { + let mut entry = serde_json::Map::new(); + entry.insert("id".into(), serde_json::Value::String(trimmed.to_string())); + if let Some(s) = expires { + entry.insert("expires".into(), serde_json::Value::String(s.to_string())); + } + if let Some(s) = reason { + entry.insert("reason".into(), serde_json::Value::String(s.to_string())); + } + arr.push(serde_json::Value::Object(entry)); + } else { + arr.push(serde_json::Value::String(trimmed.to_string())); + } if let Some(parent) = path.parent() { if !parent.as_os_str().is_empty() { @@ -625,6 +724,120 @@ mod tests { let _ = std::fs::remove_dir_all(&dir); } + // ---- v0.8 expires + reason ----------------------------------------- + + fn lock_today(epoch: i64) -> impl Drop { + // SAFETY: tests serialize on these env mutations via a process-wide + // mutex inside crate::clock; see clock::tests for the same pattern. + struct Guard; + impl Drop for Guard { + fn drop(&mut self) { + unsafe { + std::env::remove_var("SOURCE_DATE_EPOCH"); + } + } + } + unsafe { + std::env::set_var("SOURCE_DATE_EPOCH", epoch.to_string()); + } + Guard + } + + #[test] + fn expired_object_entry_warns_and_does_not_suppress() { + // 2026-05-01 (epoch 1777593600) is "today"; the entry expired 2026-04-30. + let _g = lock_today(1777593600); + let baseline = Baseline::from_value(&json!({ + "suppressed_advisories": [ + { "id": "GHSA-old", "expires": "2026-04-30", "reason": "awaiting upstream" } + ] + })); + assert_eq!(baseline.expired_entries.len(), 1); + assert_eq!(baseline.expired_entries[0].id, "GHSA-old"); + assert!( + !baseline.suppressed_advisories.contains("GHSA-old"), + "expired entry must NOT contribute to suppression" + ); + } + + #[test] + fn active_object_entry_suppresses() { + let _g = lock_today(1777593600); // 2026-05-01 + let baseline = Baseline::from_value(&json!({ + "suppressed_advisories": [ + { "id": "GHSA-future", "expires": "2030-01-01" } + ] + })); + assert!(baseline.suppressed_advisories.contains("GHSA-future")); + assert!(baseline.expired_entries.is_empty()); + } + + #[test] + fn no_expires_object_entry_suppresses_indefinitely() { + let baseline = Baseline::from_value(&json!({ + "suppressed_advisories": [ + { "id": "GHSA-perma", "reason": "false positive" } + ] + })); + assert!(baseline.suppressed_advisories.contains("GHSA-perma")); + } + + #[test] + fn malformed_expires_errors_strict() { + let v = json!({ + "suppressed_advisories": [ + { "id": "GHSA-bad", "expires": "yesterday" } + ] + }); + let err = Baseline::from_value_strict(&v).unwrap_err(); + let msg = format!("{err:#}"); + assert!(msg.contains("GHSA-bad"), "error must name the entry: {msg}"); + } + + #[test] + fn add_suppression_full_writes_object_form_when_metadata_present() { + let dir = tempdir_unique("add-full"); + let path = dir.join("baseline.json"); + let outcome = add_suppression_full( + &path, + "GHSA-x", + Some("2030-12-31"), + Some("Awaiting upstream patch"), + ) + .unwrap(); + assert_eq!(outcome, AddOutcome::Added); + let v: serde_json::Value = + serde_json::from_str(&std::fs::read_to_string(&path).unwrap()).unwrap(); + let entry = &v["suppressed_advisories"][0]; + assert_eq!(entry["id"], "GHSA-x"); + assert_eq!(entry["expires"], "2030-12-31"); + assert_eq!(entry["reason"], "Awaiting upstream patch"); + let _ = std::fs::remove_dir_all(&dir); + } + + #[test] + fn add_suppression_full_rejects_malformed_expires() { + let dir = tempdir_unique("add-bad-date"); + let path = dir.join("baseline.json"); + let err = add_suppression_full(&path, "GHSA-x", Some("2030/12/31"), None); + assert!(err.is_err()); + let _ = std::fs::remove_dir_all(&dir); + } + + #[test] + fn add_suppression_full_idempotent_against_existing_object_entry() { + let dir = tempdir_unique("add-idem-obj"); + let path = dir.join("baseline.json"); + std::fs::write( + &path, + r#"{"suppressed_advisories": [{"id": "GHSA-dupe", "expires": "2030-01-01"}]}"#, + ) + .unwrap(); + let outcome = add_suppression_full(&path, "GHSA-dupe", Some("2031-01-01"), None).unwrap(); + assert_eq!(outcome, AddOutcome::AlreadyPresent); + let _ = std::fs::remove_dir_all(&dir); + } + fn tempdir_unique(stem: &str) -> std::path::PathBuf { let path = std::env::temp_dir().join(format!( "bomdrift-baseline-{stem}-{}-{}", diff --git a/src/cli.rs b/src/cli.rs index f9f8cb2..b681a7b 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -75,6 +75,19 @@ pub struct BaselineAddArgs { /// created if missing. #[arg(long, default_value = ".bomdrift/baseline.json")] pub path: PathBuf, + + /// Optional expiry date (YYYY-MM-DD). Once today is past this date, + /// the entry stops suppressing and bomdrift prints a warning to + /// stderr. Useful for time-boxed risk acceptance ("ignore until + /// upstream ships a fix"). Strict format: zero-padded month/day. + #[arg(long)] + pub expires: Option, + + /// Optional human-readable reason recorded alongside the entry. + /// Surfaces in the v0.9 VEX export and in the warning printed when + /// the entry expires. Free-form text. + #[arg(long)] + pub reason: Option, } #[derive(Args, Debug)] diff --git a/src/lib.rs b/src/lib.rs index dd4e447..eacbaf9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -68,7 +68,18 @@ fn write_scaffold_file(path: &Path, contents: &str, force: bool) -> Result<()> { fn run_baseline(action: BaselineAction) -> Result<()> { match action { BaselineAction::Add(args) => { - let outcome = baseline::add_suppression(&args.path, &args.id)?; + // Validate --expires upfront so a typo'd date doesn't write a + // bad entry that errors on the NEXT diff load. + if let Some(s) = &args.expires { + clock::parse_ymd(s) + .with_context(|| format!("--expires must be YYYY-MM-DD, got {s:?}"))?; + } + let outcome = baseline::add_suppression_full( + &args.path, + &args.id, + args.expires.as_deref(), + args.reason.as_deref(), + )?; match outcome { baseline::AddOutcome::Added => { eprintln!( @@ -161,6 +172,23 @@ fn run_diff(mut args: DiffArgs) -> Result<()> { // type that the baseline doesn't know about simply isn't suppressed. if let Some(path) = &args.baseline { let baseline = baseline::Baseline::load(path)?; + for ent in &baseline.expired_entries { + eprintln!( + "warning: baseline entry {id}{purl} expired {expires}; finding will surface in this run{reason}", + id = ent.id, + purl = ent + .purl + .as_deref() + .map(|p| format!(" ({p})")) + .unwrap_or_default(), + expires = ent.expires, + reason = ent + .reason + .as_deref() + .map(|r| format!(" — was: {r}")) + .unwrap_or_default(), + ); + } baseline::apply(&mut cs, &mut enrichment, &baseline); } From 171c1d10da00191494452106406a5572b7dd2683 Mon Sep 17 00:00:00 2001 From: Metbcy Date: Wed, 29 Apr 2026 13:43:17 -0700 Subject: [PATCH 08/10] feat(license): allow/deny policy with fail-closed compound-expression handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New src/enrich/license.rs evaluates each Added or VersionChanged component against a configured Policy { allow, deny, allow_ambiguous }: - Atomic license: exact compare against allow/deny; trailing-* glob for deny ('AGPL-*' matches 'AGPL-3.0-only'). Deny wins when both match. - Compound expression (any of AND/OR/WITH/parens): treated as ambiguous. With allow_ambiguous=false (default) and any policy configured, emits an Ambiguous violation. With allow_ambiguous=true, permitted (with the understanding that v0.9's spdx evaluator will replace this). - NOASSERTION / OTHER / empty: ambiguous (same fail-closed semantics). Distinct from existing ChangeSet::license_changed (same-version license drift) — that's a heuristic, this is a policy gate. CLI: --allow-licenses, --deny-licenses, --allow-ambiguous-licenses (matches Dependency Review Action flag names exactly). [license] block in .bomdrift.toml; CLI flags override (not merge) when set. Render paths: - Markdown: new 'License violations' section + summary-table row. - Term: [LIC] tag with matched rule. - JSON: enrichment.license_violations array (already wired in B). - SARIF: bomdrift.license-violation results emit with stable partialFingerprints.primaryHash/v1 hashed from ruleId | purl | license. Rule was registered in Phase A. --fail-on license-violation trips exit 2; --fail-on any includes it. --debug-calibration row: license|||. Tests cover: allow-pass, deny-fail, glob expansion, ambiguous fail-closed, ambiguous permitted, allow+deny precedence (deny wins), version-changed evaluation, empty-policy no-op, NOASSERTION ambiguous, SARIF roundtrip with stable fingerprint, fail-on threshold gating. Docs: docs/src/license-policy.md, SUMMARY entry under Output. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- docs/src/SUMMARY.md | 1 + docs/src/license-policy.md | 76 ++++++++++ src/cli.rs | 15 ++ src/config.rs | 30 ++++ src/enrich/license.rs | 297 +++++++++++++++++++++++++++++++++++++ src/enrich/mod.rs | 1 + src/lib.rs | 46 ++++++ src/render/markdown.rs | 36 +++++ src/render/sarif.rs | 63 ++++++++ src/render/term.rs | 21 +++ 10 files changed, 586 insertions(+) create mode 100644 docs/src/license-policy.md create mode 100644 src/enrich/license.rs diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index 6058d01..2cd04a5 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -13,6 +13,7 @@ - [Output formats](./output-formats.md) - [SARIF + Code Scanning](./sarif.md) +- [License policy](./license-policy.md) - [Baseline & suppression](./baseline.md) # Enrichers diff --git a/docs/src/license-policy.md b/docs/src/license-policy.md new file mode 100644 index 0000000..2d95999 --- /dev/null +++ b/docs/src/license-policy.md @@ -0,0 +1,76 @@ +# License policy + +bomdrift can enforce a license allow/deny policy on every newly added or +version-changed component. Distinct from the `License changed` finding +(which detects same-version license drift), this is "the configured +policy says this license isn't allowed." + +## Configuration + +In `.bomdrift.toml`: + +```toml +[license] +allow = ["MIT", "Apache-2.0", "BSD-3-Clause", "ISC"] +deny = ["GPL-3.0-only", "AGPL-*"] +allow_ambiguous = false +``` + +Or via CLI flags (override the config block when set, matching the +[GitHub Dependency Review Action] flag names exactly): + +```bash +bomdrift diff before.json after.json \ + --allow-licenses MIT,Apache-2.0,BSD-3-Clause \ + --deny-licenses 'GPL-3.0-only,AGPL-*' +``` + +Both flags accept comma-separated values and may be repeated. + +## Matching rules (v0.8 — fail-closed) + +| Input | With `allow_ambiguous=false` | With `allow_ambiguous=true` | +|---|---|---| +| Atomic license on `allow` | permit | permit | +| Atomic license on `deny` | **deny** | **deny** | +| Atomic license matching `*`-suffix glob in `deny` (`AGPL-*` ↔ `AGPL-3.0-only`) | **deny** | **deny** | +| Atomic license not on `allow` (when `allow` is non-empty) | **not-allowed** | **not-allowed** | +| Compound expression `(MIT OR GPL-3.0)` | **ambiguous** | permit | +| `NOASSERTION` / `OTHER` / empty | **ambiguous** | permit | + +**Deny wins** when a license matches both allow and deny. + +Compound SPDX expression evaluation (`(MIT OR Apache-2.0)` against +`allow={Apache-2.0}` resolves to permit) lands in v0.9 via the `spdx` +crate. v0.8 fails closed on every compound expression unless +`allow_ambiguous=true` is set explicitly. + +## Threshold gating + +```bash +bomdrift diff before.json after.json --fail-on license-violation +``` + +Exits 2 when any violation is present. `--fail-on any` also includes +license violations. + +## Output + +- **Markdown**: new "License violations" section before "License + changed", with ecosystem / name / version / license / matched-rule + columns. +- **Terminal**: `[LIC]` tag + matched rule per finding. +- **JSON**: `enrichment.license_violations` top-level array. +- **SARIF**: `bomdrift.license-violation` rule + per-finding result with + stable `partialFingerprints.primaryHash/v1`. See + [SARIF + Code Scanning](./sarif.md). + +## Suppression + +License violations honor the standard `--baseline` machinery via the +v0.5 `suppressed_advisories` field. Use a fully-qualified license +identifier (or the SPDX expression as written by the SBOM) as the +suppression key. The v0.8 `expires` + `reason` fields work the same +way. + +[GitHub Dependency Review Action]: https://github.com/actions/dependency-review-action diff --git a/src/cli.rs b/src/cli.rs index b681a7b..e7586a8 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -290,6 +290,21 @@ pub struct DiffArgs { /// only advisories surface this; non-CVE findings are unaffected. #[arg(long)] pub fail_on_epss: Option, + /// Comma-separated SPDX license identifiers (or `*`-suffix globs) + /// permitted by policy. May be repeated. CLI flag takes precedence + /// over `[license] allow` in `.bomdrift.toml` (override, not merge). + #[arg(long, value_delimiter = ',')] + pub allow_licenses: Vec, + /// Comma-separated SPDX license identifiers (or `*`-suffix globs) + /// forbidden by policy. May be repeated. Deny wins when a license + /// matches both allow and deny. + #[arg(long, value_delimiter = ',')] + pub deny_licenses: Vec, + /// When set, compound SPDX expressions like `(MIT OR GPL-3.0)` are + /// permitted (the v0.9 SPDX evaluator will replace this with proper + /// expression evaluation). Off by default — fail-closed. + #[arg(long)] + pub allow_ambiguous_licenses: bool, #[arg(long)] pub debug_calibration: bool, /// Format for `--debug-calibration` rows. `pipe` (default, back-compat diff --git a/src/config.rs b/src/config.rs index 36eef1c..f23239e 100644 --- a/src/config.rs +++ b/src/config.rs @@ -13,6 +13,19 @@ use serde::Deserialize; use crate::cli::{DebugFormat, DiffArgs, FailOn, InputFormat, OutputFormat, Platform}; +/// `[license]` block in `.bomdrift.toml`. CLI flags +/// (`--allow-licenses`/`--deny-licenses`) override this block when set +/// (override, not merge — matches the Dependency Review Action). +#[derive(Debug, Default, Deserialize)] +pub struct LicenseConfig { + #[serde(default)] + pub allow: Vec, + #[serde(default)] + pub deny: Vec, + #[serde(default)] + pub allow_ambiguous: bool, +} + const DEFAULT_CONFIG_PATH: &str = ".bomdrift.toml"; #[derive(Debug, Default, Deserialize)] @@ -29,6 +42,7 @@ pub struct DiffConfig { pub no_epss: Option, pub no_kev: Option, pub fail_on_epss: Option, + pub license: Option, pub baseline: Option, pub no_maintainer_age: Option, pub fail_on: Option, @@ -117,6 +131,19 @@ fn apply_loaded_diff_config(args: &mut DiffArgs, config: Config) { if args.output_file.is_none() { args.output_file = diff.output_file; } + + // [license] block: CLI flags override (not merge) when set. Mirrors + // Dependency Review Action semantics so users moving between bomdrift + // and DRA don't get surprises. + if let Some(lic) = diff.license { + if args.allow_licenses.is_empty() { + args.allow_licenses = lic.allow; + } + if args.deny_licenses.is_empty() { + args.deny_licenses = lic.deny; + } + args.allow_ambiguous_licenses |= lic.allow_ambiguous; + } } fn load_config(explicit: Option<&Path>) -> Result> { @@ -169,6 +196,9 @@ mod tests { debug_calibration: false, debug_calibration_format: DebugFormat::default(), output_file: None, + allow_licenses: Vec::new(), + deny_licenses: Vec::new(), + allow_ambiguous_licenses: false, } } diff --git a/src/enrich/license.rs b/src/enrich/license.rs new file mode 100644 index 0000000..59f065e --- /dev/null +++ b/src/enrich/license.rs @@ -0,0 +1,297 @@ +//! License-policy enrichment (v0.8+). +//! +//! Distinct from [`crate::diff::ChangeSet::license_changed`] which detects +//! same-version license drift. This module evaluates each newly-added or +//! version-changed component's licenses against a configured allow / deny +//! policy and emits a [`LicenseViolation`] for every mismatch. +//! +//! ## Matching rules (v0.8 — fail-closed) +//! +//! - **Atomic** license string (no `AND`/`OR`/`WITH`/parentheses): exact +//! compare against allow/deny. Glob: `*` suffix matches any prefix +//! (`AGPL-*` matches `AGPL-3.0-only`, `AGPL-1.0-only`). +//! - **Compound** expression: ambiguous. With `allow_ambiguous=false` +//! (default) AND any policy is configured (allow OR deny non-empty), +//! emit an Ambiguous violation. With `allow_ambiguous=true`, permit. +//! - `NOASSERTION` / `OTHER` / empty: ambiguous (same fail-closed +//! semantics). +//! +//! Deny wins when a license matches both allow and deny. +//! +//! Full SPDX expression evaluation arrives in v0.9 via the `spdx` crate. + +use crate::diff::ChangeSet; +use crate::enrich::{LicenseViolation, LicenseViolationKind}; +use crate::model::Component; + +/// Policy configuration. Empty allow + empty deny means "no policy" — the +/// enricher returns no violations. Either or both may be set. +#[derive(Debug, Clone, Default)] +pub struct Policy { + pub allow: Vec, + pub deny: Vec, + pub allow_ambiguous: bool, +} + +impl Policy { + pub fn is_active(&self) -> bool { + !self.allow.is_empty() || !self.deny.is_empty() + } +} + +/// Evaluate `policy` against every Added or VersionChanged component in +/// `cs`. Returns one violation per (component, license) pair that fails. +pub fn enrich(cs: &ChangeSet, policy: &Policy) -> Vec { + if !policy.is_active() { + return Vec::new(); + } + let mut out = Vec::new(); + for c in &cs.added { + evaluate_component(c, policy, &mut out); + } + for (_before, after) in &cs.version_changed { + evaluate_component(after, policy, &mut out); + } + out +} + +fn evaluate_component(c: &Component, policy: &Policy, out: &mut Vec) { + if c.licenses.is_empty() { + // Empty license set: treat as ambiguous (we can't claim it's + // allowed). Fail-closed when policy is active and + // allow_ambiguous=false. + if !policy.allow_ambiguous { + out.push(LicenseViolation { + component: c.clone(), + license: "(empty)".to_string(), + matched_rule: "ambiguous: empty license set".to_string(), + kind: LicenseViolationKind::Ambiguous, + }); + } + return; + } + for lic in &c.licenses { + if let Some(v) = evaluate_one(c, lic, policy) { + out.push(v); + } + } +} + +fn evaluate_one(c: &Component, lic: &str, policy: &Policy) -> Option { + let trimmed = lic.trim(); + let is_compound = is_compound_expression(trimmed); + let is_unknown = matches!( + trimmed.to_ascii_uppercase().as_str(), + "" | "NOASSERTION" | "OTHER" + ); + + if is_compound || is_unknown { + if policy.allow_ambiguous { + return None; + } + return Some(LicenseViolation { + component: c.clone(), + license: trimmed.to_string(), + matched_rule: format!("ambiguous: {trimmed}"), + kind: LicenseViolationKind::Ambiguous, + }); + } + + // Atomic. Deny wins when both match. + if let Some(rule) = matches_any(trimmed, &policy.deny) { + return Some(LicenseViolation { + component: c.clone(), + license: trimmed.to_string(), + matched_rule: format!("deny: {rule}"), + kind: LicenseViolationKind::Deny, + }); + } + if !policy.allow.is_empty() && matches_any(trimmed, &policy.allow).is_none() { + return Some(LicenseViolation { + component: c.clone(), + license: trimmed.to_string(), + matched_rule: format!("not in allow list: {trimmed}"), + kind: LicenseViolationKind::NotAllowed, + }); + } + None +} + +/// Return Some(rule) when `lic` matches any pattern in `patterns`. Glob +/// support is the trailing-`*` form only. +fn matches_any(lic: &str, patterns: &[String]) -> Option { + for p in patterns { + if matches_pattern(lic, p) { + return Some(p.clone()); + } + } + None +} + +fn matches_pattern(lic: &str, pattern: &str) -> bool { + if let Some(prefix) = pattern.strip_suffix('*') { + lic.starts_with(prefix) + } else { + lic == pattern + } +} + +fn is_compound_expression(s: &str) -> bool { + // Any of the SPDX operators or parens makes this a compound expression. + if s.contains('(') || s.contains(')') { + return true; + } + for token in s.split_whitespace() { + if matches!(token, "AND" | "OR" | "WITH") { + return true; + } + } + false +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::model::{Ecosystem, Relationship}; + + fn comp(name: &str, licenses: Vec<&str>) -> Component { + Component { + name: name.into(), + version: "1.0.0".into(), + ecosystem: Ecosystem::Npm, + purl: Some(format!("pkg:npm/{name}@1.0.0")), + licenses: licenses.into_iter().map(String::from).collect(), + supplier: None, + hashes: Vec::new(), + relationship: Relationship::Unknown, + source_url: None, + bom_ref: None, + } + } + + fn cs_with_added(c: Component) -> ChangeSet { + ChangeSet { + added: vec![c], + ..Default::default() + } + } + + #[test] + fn allow_pass_no_violation() { + let cs = cs_with_added(comp("foo", vec!["MIT"])); + let policy = Policy { + allow: vec!["MIT".into(), "Apache-2.0".into()], + ..Default::default() + }; + assert!(enrich(&cs, &policy).is_empty()); + } + + #[test] + fn deny_fail_violation() { + let cs = cs_with_added(comp("foo", vec!["GPL-3.0-only"])); + let policy = Policy { + deny: vec!["GPL-3.0-only".into()], + ..Default::default() + }; + let v = enrich(&cs, &policy); + assert_eq!(v.len(), 1); + assert_eq!(v[0].kind, LicenseViolationKind::Deny); + assert!(v[0].matched_rule.contains("GPL-3.0-only")); + } + + #[test] + fn glob_expansion_matches_prefix() { + let cs = cs_with_added(comp("foo", vec!["AGPL-3.0-only"])); + let policy = Policy { + deny: vec!["AGPL-*".into()], + ..Default::default() + }; + let v = enrich(&cs, &policy); + assert_eq!(v.len(), 1); + assert_eq!(v[0].matched_rule, "deny: AGPL-*"); + } + + #[test] + fn compound_ambiguous_fails_closed_by_default() { + let cs = cs_with_added(comp("foo", vec!["(MIT OR GPL-3.0-only)"])); + let policy = Policy { + allow: vec!["MIT".into()], + ..Default::default() + }; + let v = enrich(&cs, &policy); + assert_eq!(v.len(), 1); + assert_eq!(v[0].kind, LicenseViolationKind::Ambiguous); + } + + #[test] + fn compound_ambiguous_permitted_when_flag_set() { + let cs = cs_with_added(comp("foo", vec!["(MIT OR GPL-3.0-only)"])); + let policy = Policy { + allow: vec!["MIT".into()], + allow_ambiguous: true, + ..Default::default() + }; + assert!(enrich(&cs, &policy).is_empty()); + } + + #[test] + fn deny_wins_over_allow_when_both_match() { + let cs = cs_with_added(comp("foo", vec!["GPL-3.0-only"])); + let policy = Policy { + allow: vec!["GPL-3.0-only".into()], + deny: vec!["GPL-3.0-only".into()], + ..Default::default() + }; + let v = enrich(&cs, &policy); + assert_eq!(v.len(), 1); + assert_eq!(v[0].kind, LicenseViolationKind::Deny); + } + + #[test] + fn license_not_in_allow_list_violates() { + let cs = cs_with_added(comp("foo", vec!["BSD-3-Clause"])); + let policy = Policy { + allow: vec!["MIT".into()], + ..Default::default() + }; + let v = enrich(&cs, &policy); + assert_eq!(v.len(), 1); + assert_eq!(v[0].kind, LicenseViolationKind::NotAllowed); + } + + #[test] + fn noassertion_treated_as_ambiguous() { + let cs = cs_with_added(comp("foo", vec!["NOASSERTION"])); + let policy = Policy { + allow: vec!["MIT".into()], + ..Default::default() + }; + let v = enrich(&cs, &policy); + assert_eq!(v.len(), 1); + assert_eq!(v[0].kind, LicenseViolationKind::Ambiguous); + } + + #[test] + fn empty_policy_is_inactive() { + let cs = cs_with_added(comp("foo", vec!["GPL-3.0-only"])); + let policy = Policy::default(); + assert!(enrich(&cs, &policy).is_empty()); + } + + #[test] + fn version_changed_components_evaluated() { + let before = comp("foo", vec!["MIT"]); + let mut after = comp("foo", vec!["GPL-3.0-only"]); + after.version = "2.0.0".into(); + let cs = ChangeSet { + version_changed: vec![(before, after)], + ..Default::default() + }; + let policy = Policy { + deny: vec!["GPL-3.0-only".into()], + ..Default::default() + }; + let v = enrich(&cs, &policy); + assert_eq!(v.len(), 1); + } +} diff --git a/src/enrich/mod.rs b/src/enrich/mod.rs index c382f15..6a3f1c9 100644 --- a/src/enrich/mod.rs +++ b/src/enrich/mod.rs @@ -12,6 +12,7 @@ pub mod cache; pub mod epss; pub mod kev; +pub mod license; pub mod maintainer; pub mod osv; pub mod typosquat; diff --git a/src/lib.rs b/src/lib.rs index eacbaf9..82e8971 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -166,6 +166,16 @@ fn run_diff(mut args: DiffArgs) -> Result<()> { } } + // License-policy enrichment (Phase D, v0.8). Pure-compute, runs after + // OSV/EPSS/KEV. Empty allow + empty deny means "no policy" — the + // enricher returns no violations. + let license_policy = enrich::license::Policy { + allow: args.allow_licenses.clone(), + deny: args.deny_licenses.clone(), + allow_ambiguous: args.allow_ambiguous_licenses, + }; + enrichment.license_violations = enrich::license::enrich(&cs, &license_policy); + // Apply the baseline AFTER all enrichers run — suppression operates on // the realized finding set, not on intermediate inputs. This keeps the // baseline file format stable as new enrichers are added: a new finding @@ -443,6 +453,23 @@ fn write_calibration_lines( } } } + for v in &e.license_violations { + write_calibration_row( + out, + "license", + v.component + .purl + .as_deref() + .unwrap_or(v.component.name.as_str()), + CalibrationScore::Text(&v.license), + CalibrationThreshold::Text(match v.kind { + crate::enrich::LicenseViolationKind::Deny => "deny", + crate::enrich::LicenseViolationKind::Ambiguous => "ambiguous", + crate::enrich::LicenseViolationKind::NotAllowed => "not-allowed", + }), + format, + ); + } } /// Numeric or symbolic score for a calibration row. Float/Int rendered @@ -935,4 +962,23 @@ mod tests { assert!(s.contains("epss|"), "missing epss row: {s}"); assert!(s.contains("kev|"), "missing kev row: {s}"); } + + #[test] + fn fail_on_license_violation_trips() { + use crate::enrich::{LicenseViolation, LicenseViolationKind}; + let mut e = Enrichment::default(); + e.license_violations.push(LicenseViolation { + component: comp("foo"), + license: "GPL-3.0-only".into(), + matched_rule: "deny: GPL-3.0-only".into(), + kind: LicenseViolationKind::Deny, + }); + assert!(tripped(&ChangeSet::default(), &e, FailOn::LicenseViolation)); + assert!(tripped(&ChangeSet::default(), &e, FailOn::Any)); + assert!(!tripped( + &ChangeSet::default(), + &Enrichment::default(), + FailOn::LicenseViolation + )); + } } diff --git a/src/render/markdown.rs b/src/render/markdown.rs index d4d9708..6566bbd 100644 --- a/src/render/markdown.rs +++ b/src/render/markdown.rs @@ -118,6 +118,13 @@ pub fn render_with_options(cs: &ChangeSet, enrichment: &Enrichment, opts: Option enrichment.maintainer_age.len() ); } + if !enrichment.license_violations.is_empty() { + let _ = writeln!( + out, + "| License violations | {} |", + enrichment.license_violations.len() + ); + } out.push('\n'); if opts.summary_only { @@ -170,6 +177,35 @@ pub fn render_with_options(cs: &ChangeSet, enrichment: &Enrichment, opts: Option section_close(&mut out); } + if !enrichment.license_violations.is_empty() { + section_open( + &mut out, + "License violations", + enrichment.license_violations.len(), + None, + ); + out.push_str( + "One or more changed components have a license that the configured \ + policy disallows. Review the matched rule and either update the \ + component, exempt it via an explicit baseline entry, or relax the \ + policy. \ + [Why this matters](https://metbcy.github.io/bomdrift/license-policy.html)\n\n", + ); + out.push_str("| Ecosystem | Name | Version | License | Rule |\n|---|---|---|---|---|\n"); + for v in &enrichment.license_violations { + let _ = writeln!( + out, + "| {} | {} | {} | `{}` | {} |", + v.component.ecosystem, + v.component.name, + v.component.version, + v.license, + v.matched_rule, + ); + } + section_close(&mut out); + } + if !cs.license_changed.is_empty() { section_open( &mut out, diff --git a/src/render/sarif.rs b/src/render/sarif.rs index 3f1c6a8..44600d0 100644 --- a/src/render/sarif.rs +++ b/src/render/sarif.rs @@ -379,6 +379,39 @@ fn results(cs: &ChangeSet, e: &Enrichment) -> Value { })); } + // ---- bomdrift.license-violation ---- + for v in &e.license_violations { + let name = &v.component.name; + let purl_or_name = v.component.purl.as_deref().unwrap_or(name); + let fp = fingerprint(&["bomdrift.license-violation", purl_or_name, &v.license]); + out.push(json!({ + "ruleId": "bomdrift.license-violation", + "level": "warning", + "message": { + "text": format!( + "`{name}` license `{lic}` violates policy ({rule}).", + name = name, + lic = v.license, + rule = v.matched_rule, + ), + }, + "locations": [synthetic_location()], + "partialFingerprints": { "primaryHash/v1": fp }, + "properties": { + "purl": v.component.purl, + "name": name, + "version": v.component.version, + "license": v.license, + "matchedRule": v.matched_rule, + "kind": match v.kind { + crate::enrich::LicenseViolationKind::Deny => "deny", + crate::enrich::LicenseViolationKind::Ambiguous => "ambiguous", + crate::enrich::LicenseViolationKind::NotAllowed => "not-allowed", + }, + }, + })); + } + Value::Array(out) } @@ -855,4 +888,34 @@ mod tests { .to_string(); assert_ne!(f1, f2); } + + #[test] + fn license_violation_emits_result_with_stable_fingerprint() { + use crate::enrich::{LicenseViolation, LicenseViolationKind}; + let comp = comp("foo", "1.0.0", Ecosystem::Npm, Some("pkg:npm/foo@1.0.0")); + let e = Enrichment { + license_violations: vec![LicenseViolation { + component: comp, + license: "GPL-3.0-only".into(), + matched_rule: "deny: GPL-3.0-only".into(), + kind: LicenseViolationKind::Deny, + }], + ..Default::default() + }; + let r1 = render(&ChangeSet::default(), &e); + let r2 = render(&ChangeSet::default(), &e); + assert_eq!(r1, r2, "byte-equal across runs"); + let v: Value = serde_json::from_str(&r1).unwrap(); + let result = &v["runs"][0]["results"][0]; + assert_eq!(result["ruleId"], "bomdrift.license-violation"); + assert_eq!(result["properties"]["license"], "GPL-3.0-only"); + assert_eq!(result["properties"]["kind"], "deny"); + assert_eq!( + result["partialFingerprints"]["primaryHash/v1"] + .as_str() + .unwrap() + .len(), + 64 + ); + } } diff --git a/src/render/term.rs b/src/render/term.rs index a95bb5f..95ce088 100644 --- a/src/render/term.rs +++ b/src/render/term.rs @@ -170,6 +170,27 @@ pub fn render_with_color(cs: &ChangeSet, enrichment: &Enrichment, color: ColorCh out.push('\n'); } + if !enrichment.license_violations.is_empty() { + let _ = writeln!( + out, + "License violations ({}):", + enrichment.license_violations.len() + ); + for v in &enrichment.license_violations { + let _ = writeln!( + out, + " {} {}:{}@{} - {} [{}]", + tag("[LIC]", Tone::High, color), + v.component.ecosystem, + v.component.name, + v.component.version, + v.license, + v.matched_rule, + ); + } + out.push('\n'); + } + out } From 9d22160cc0d42d72134f420c97c1eb1e1dee4544 Mon Sep 17 00:00:00 2001 From: Metbcy Date: Wed, 29 Apr 2026 13:46:17 -0700 Subject: [PATCH 09/10] chore(release): prepare v0.8.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Cargo.toml + Cargo.lock: 0.7.0 → 0.8.0 - README.md, docs/src/quickstart.md, .github/ISSUE_TEMPLATE: pin examples bumped v0.7.0 → v0.8.0 - CHANGELOG.md: 0.8.0 entry covering F1-F4 + A-D (foundations: time crate, SOURCE_DATE_EPOCH, OSV aliases, JSONL debug; features: SARIF Code Scanning, EPSS+KEV, license policy, baseline expiry). Explicit Scope notes section listing v0.9-deferred items. - STATUS.md: new ✓ rows for SARIF Code Scanning, EPSS, KEV, license policy, baseline expiry. Bitbucket / Azure DevOps + VEX moved to 'Planned for v0.9'. - docs/src/roadmap.md: new 'Shipped (v0.8)' section; 'Planned (v0.9)' refreshed with VEX consume + emit, SPDX evaluator, multi-SCM, registry enrichers, GitLab comment-suppress, non-goals doc. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/ISSUE_TEMPLATE/action-broke.md | 2 +- CHANGELOG.md | 99 ++++++++++++++++++++++++++ Cargo.lock | 2 +- Cargo.toml | 2 +- README.md | 8 +-- STATUS.md | 10 ++- docs/src/quickstart.md | 6 +- docs/src/roadmap.md | 63 ++++++++++++---- 8 files changed, 167 insertions(+), 25 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/action-broke.md b/.github/ISSUE_TEMPLATE/action-broke.md index 3b4c604..db76fdd 100644 --- a/.github/ISSUE_TEMPLATE/action-broke.md +++ b/.github/ISSUE_TEMPLATE/action-broke.md @@ -36,6 +36,6 @@ failure is usually obvious if you expand all groups. --> ## Environment -- **bomdrift version pin**: `@v1` / `@v0.7.0` / `@` +- **bomdrift version pin**: `@v1` / `@v0.8.0` / `@` - **Runner**: - **Trigger event**: diff --git a/CHANGELOG.md b/CHANGELOG.md index 320474b..efa3763 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,105 @@ project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +## [0.8.0] - 2026-04-29 + +The "supply-chain hardening" milestone. v0.8 finishes SARIF for GitHub +Code Scanning, lights up exploit-prediction (EPSS) and +known-exploited-in-the-wild (CISA KEV) signals on every advisory, +introduces an explicit license allow/deny policy with fail-closed +compound-expression handling, and adds time-boxed risk-acceptance to +the suppression baseline. + +### Added + +- **SARIF + GitHub Code Scanning end-to-end.** Every result now carries + a stable `partialFingerprints.primaryHash/v1` hash so Code Scanning's + alert dedup threads correctly across runs. New action input + `upload-to-code-scanning: true` wires + `github/codeql-action/upload-sarif@v3` for one-line opt-in. New + `--output-file ` CLI flag avoids YAML `>`-redirection + quirks. Per-rule fingerprint identity tuples documented at + [docs/src/sarif.md](docs/src/sarif.md). + +- **EPSS scoring (FIRST.org).** Every CVE-aliased advisory surfaces an + exploitation-probability badge in markdown / terminal / SARIF / + JSON. `--fail-on-epss ` trips exit 2 when any advisory exceeds + the threshold. `--no-epss` opt-out + 24h disk cache at + `/bomdrift/epss/`. Best-effort: network failure logs at + `BOMDRIFT_DEBUG=1`, diff still renders. + [docs/src/enrichers/epss.md](docs/src/enrichers/epss.md) + +- **CISA KEV (Known Exploited Vulnerabilities).** A `KEV` flag flips + on every advisory whose primary id or CVE alias appears in CISA's + catalog. `--fail-on kev` + `--no-kev` flags. Once-daily catalog + cache at `/bomdrift/kev/catalog.json`. + [docs/src/enrichers/kev.md](docs/src/enrichers/kev.md) + +- **License allow/deny policy.** New `[license]` block in + `.bomdrift.toml` (or `--allow-licenses`/`--deny-licenses` CLI flags + matching Dependency Review Action names). Atomic exact match + + `*`-suffix glob (`AGPL-*`); compound expressions like + `(MIT OR GPL-3.0)` fail closed by default unless + `allow_ambiguous=true`. Distinct from same-version license drift: + this is a policy gate. New SARIF rule + `bomdrift.license-violation`. `--fail-on license-violation` trips + exit 2. [docs/src/license-policy.md](docs/src/license-policy.md) + +- **Suppression expiry + reason.** Each `suppressed_advisories` entry + may now be the v0.8 object form + `{id, expires?: "YYYY-MM-DD", reason?: "free text"}`. Expired + entries surface a stderr warning and stop suppressing; bomdrift + refuses to load malformed dates. `bomdrift baseline add --expires + --reason` records the metadata; the `comment-suppress` companion + action picks up an optional `reason: ` line in the trigger + comment body. + [docs/src/baseline.md](docs/src/baseline.md#time-boxed-suppressions-expires--reason) + +- **OSV CVE aliases threaded through `VulnRef`.** OSV `/v1/vulns/{id}` + responses now feed CVE aliases into `VulnRef.aliases` (sorted, + byte-deterministic). EPSS / KEV / future VEX consumption all read + from `VulnRef::cves()`. + +- **`time` crate adoption + `clock` module.** New `src/clock.rs` is + the single source of truth for date/time across the codebase. + Honors `SOURCE_DATE_EPOCH` (read per call so test fixtures can vary + it). All v0.8 features that emit dates / compare dates go through + this module — reproducible-build contexts stay deterministic. + +- **`--debug-calibration-format `.** New JSONL alternative + to the v0.7 pipe-delimited calibration tap. Numeric scores stay + numeric in JSON; severity buckets stay strings. Adding a new + finding kind is one call to a dispatch helper, not a fork. + +### Changed + +- `--fail-on any` now also includes KEV-flagged advisories and + license-violation findings. +- SARIF rule list grew from 5 to 6 (added `bomdrift.license-violation`). + +### Scope notes + +The following items were deliberately **deferred to v0.9** rather than +half-shipped: + +- **GitLab comment-driven suppress.** The `/bomdrift suppress` flow + works on GitHub via the `comment-suppress` companion action. Porting + to GitLab needs a webhook bridge with five distinct security guards + (token verification, event-type filter, project allowlist, + commenter-permission check, MR-context guard). Shipping the bridge + without those is a vulnerability — moved to v0.9 with a security + review milestone. +- **Multi-SCM (Bitbucket + Azure DevOps).** Templates and footer + shapes need per-platform comment-API exploration; deferred to v0.9. +- **VEX consume + emit.** Both depend on the `time` crate + foundation that lands here. Consume in v0.9-G; emit in v0.9-H. The + baseline-expiry + reason fields added in v0.8 feed directly into + the VEX `status_notes` field when emit lands. +- **SPDX expression evaluator.** v0.8 fails closed on compound + expressions; v0.9 adopts the `spdx` crate (~30kb) for proper + evaluation. The `allow_ambiguous` flag becomes redundant at that + point. + ## [0.7.0] - 2026-04-30 The "broaden the platform, polish the edges" milestone. v0.7 takes the diff --git a/Cargo.lock b/Cargo.lock index 1255455..ce2d988 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -123,7 +123,7 @@ dependencies = [ [[package]] name = "bomdrift" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "clap", diff --git a/Cargo.toml b/Cargo.toml index 713e63b..466d40e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "bomdrift" -version = "0.7.0" +version = "0.8.0" edition = "2024" rust-version = "1.85" description = "SBOM diff with supply-chain risk signals (CVEs, typosquats, maintainer-age)." diff --git a/README.md b/README.md index 8eeada7..c808598 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,7 @@ jobs: # verify-signatures: true (set false on trusted mirrors) ``` -Pin to `@v1` for the latest v0.x; pin to `@v0.7.0` for reproducible builds. Run `bomdrift init` if you want a checked-in `.bomdrift.toml` policy and both workflows scaffolded locally. See the [Action reference](https://metbcy.github.io/bomdrift/github-action.html) for every input. +Pin to `@v1` for the latest v0.x; pin to `@v0.8.0` for reproducible builds. Run `bomdrift init` if you want a checked-in `.bomdrift.toml` policy and both workflows scaffolded locally. See the [Action reference](https://metbcy.github.io/bomdrift/github-action.html) for every input. #### Optional: in-comment suppression (v0.5+) @@ -112,7 +112,7 @@ Comment `/bomdrift suppress GHSA-xxxx` on any PR; the sub-action appends to `.bo Pre-built binaries cover Linux x86_64 + aarch64, macOS aarch64, and Windows x86_64. Each archive is cosign-signed via Sigstore + GitHub OIDC. ```bash -VERSION=v0.7.0 +VERSION=v0.8.0 TARGET=x86_64-unknown-linux-gnu curl -sSL -o bomdrift.tar.gz \ "https://github.com/Metbcy/bomdrift/releases/download/${VERSION}/bomdrift-${VERSION}-${TARGET}.tar.gz" @@ -128,7 +128,7 @@ Verify the archive's signature before you trust the binary — see [Release sign ### From source ```bash -cargo install --locked --git https://github.com/Metbcy/bomdrift --tag v0.7.0 bomdrift +cargo install --locked --git https://github.com/Metbcy/bomdrift --tag v0.8.0 bomdrift ``` Requires Rust 1.85+ (the project uses edition 2024). @@ -230,7 +230,7 @@ Every release archive is signed with cosign keyless via Sigstore (GitHub OIDC). ```bash # Replace VERSION + TARGET with your downloaded archive's pair -VERSION=v0.7.0 +VERSION=v0.8.0 TARGET=x86_64-unknown-linux-gnu ARCHIVE=bomdrift-${VERSION}-${TARGET}.tar.gz diff --git a/STATUS.md b/STATUS.md index 5a21fa1..31878a2 100644 --- a/STATUS.md +++ b/STATUS.md @@ -12,9 +12,15 @@ keeping the project OSS-first: no hosted dashboard, no account, no telemetry. | Local CLI | Supported on Linux x86_64/aarch64, macOS aarch64, Windows x86_64 | | SBOM formats | CycloneDX JSON, SPDX JSON, Syft JSON | | In-comment suppression (GitHub) | Supported through `Metbcy/bomdrift/comment-suppress@v1` | -| GitLab CI merge requests | Supported through the `examples/gitlab-ci/` template (v0.7+); in-comment suppression deferred to v0.8 | +| GitHub Code Scanning (SARIF upload) | Supported (v0.8+) — set `upload-to-code-scanning: 'true'` | +| EPSS exploit-prediction scoring | Supported (v0.8+) — auto, opt-out via `--no-epss` | +| CISA KEV (known-exploited) flagging | Supported (v0.8+) — auto, opt-out via `--no-kev` | +| License allow/deny policy | Supported (v0.8+) — `[license]` block / CLI flags | +| Suppression expiry (`expires` + `reason`) | Supported (v0.8+) — time-boxed risk acceptance | +| GitLab CI merge requests | Supported through the `examples/gitlab-ci/` template (v0.7+); in-comment suppression deferred to v0.9 | | GitHub Enterprise / self-hosted runners | Expected to work, not broadly tested yet | -| Bitbucket | Not supported | +| Bitbucket / Azure DevOps | Planned for v0.9 | +| VEX consume / emit | Planned for v0.9 | | Hosted dashboard / SaaS | Not planned | ## Known limitations diff --git a/docs/src/quickstart.md b/docs/src/quickstart.md index 80f0b5e..7876ae0 100644 --- a/docs/src/quickstart.md +++ b/docs/src/quickstart.md @@ -25,7 +25,7 @@ jobs: ``` The `@v1` mutable tag tracks the latest v0.x release. Pin to a specific -version (`@v0.7.0`) if you prefer reproducible builds. See +version (`@v0.8.0`) if you prefer reproducible builds. See [GitHub Action](./github-action.md) for every input. If you prefer a checked-in policy file, install the binary and run @@ -39,7 +39,7 @@ Pre-built binaries cover Linux x86_64 + aarch64, macOS aarch64, and Windows x86_64. Each archive is cosign-signed via Sigstore + GitHub OIDC. ```bash -VERSION=v0.7.0 +VERSION=v0.8.0 TARGET=x86_64-unknown-linux-gnu curl -sSL -o bomdrift.tar.gz \ "https://github.com/Metbcy/bomdrift/releases/download/${VERSION}/bomdrift-${VERSION}-${TARGET}.tar.gz" @@ -56,7 +56,7 @@ To verify the archive's signature before you trust the binary, see ## From source ```bash -cargo install --locked --git https://github.com/Metbcy/bomdrift --tag v0.7.0 bomdrift +cargo install --locked --git https://github.com/Metbcy/bomdrift --tag v0.8.0 bomdrift ``` Requires Rust 1.85+ (the project uses edition 2024). diff --git a/docs/src/roadmap.md b/docs/src/roadmap.md index b29e8d7..a3ecc8c 100644 --- a/docs/src/roadmap.md +++ b/docs/src/roadmap.md @@ -3,12 +3,53 @@ What's planned, what's deliberately out of scope, and what the acceptance criteria for new contributions look like. -## Planned - -The list below is intentionally short — bomdrift is small on purpose. -Items are grouped by likely landing area and rough sizing. - -### Future candidates (not committed) +## Shipped (v0.8 — supply-chain hardening) + +- **SARIF + GitHub Code Scanning** with stable per-result fingerprints + and one-line action opt-in (`upload-to-code-scanning: true`). +- **EPSS scoring** on every CVE-aliased advisory; `--fail-on-epss` + threshold gating. +- **CISA KEV flagging** of known-exploited advisories; + `--fail-on kev`. +- **License allow/deny policy** with `*`-suffix glob matching and + fail-closed compound-expression handling. New + `bomdrift.license-violation` SARIF rule. +- **Baseline `expires` + `reason`** for time-boxed risk acceptance, + with stderr warnings on expired entries. +- **`time` crate adoption + `clock` module** — single source of truth + for date/time, honors `SOURCE_DATE_EPOCH`. +- **OSV CVE aliases** threaded through `VulnRef` (prerequisite for + EPSS / KEV / VEX). +- **`--debug-calibration-format jsonl`** alternative to the v0.7 + pipe-delimited format. +- **`--output-file `** CLI flag (avoids `>` redirection in YAML). + +## Planned (v0.9 — interoperability + breadth) + +- **VEX consume** — `--vex ` accepts OpenVEX 0.2.0 + CycloneDX + VEX 1.6 statements; `not_affected` / `fixed` suppress findings, + `under_investigation` annotates. +- **VEX emit** — `--emit-vex ` emits an OpenVEX document from + baseline-suppressed findings. Defaults to + `under_investigation` (the safe truth-claim); per-entry + `vex_status` override required for `not_affected`. +- **SPDX expression evaluator** — replaces v0.8's atomic+glob matcher + with full `(MIT OR Apache-2.0)` evaluation via the `spdx` crate. + Deprecates `allow_ambiguous`. +- **Multi-SCM templates** — Bitbucket Pipelines + Azure DevOps with + per-platform footer shapes and PR-comment upsert recipes. +- **Registry-metadata enrichers** — npm `time.modified`, PyPI + `info.yanked`, crates.io `versions[].yanked`. New finding kinds: + `RecentlyPublished`, `Deprecated`, `MaintainerSetChanged`. +- **GitLab in-comment suppression** with explicit security guards + (token verification, event filter, project allowlist, commenter + permissions, fork-MR safety). Reference Cloudflare Worker bridge. +- **Explicit non-goals doc** — reachability, tarball static analysis, + auto-fix PR generation, container image scanning, SAST/secrets, + risk-score dashboards. Pair with Endor/Snyk for reachability, + Renovate/Dependabot for auto-fix. + +## Future candidates (not committed) - **GraphQL maintainer-age** — was investigated for v0.4 and deferred. The current REST implementation already uses `?per_page=1` + Link-header @@ -23,13 +64,9 @@ Items are grouped by likely landing area and rough sizing. internal-mirror.example.com without a SHA-256 attestation"). Probably WASM-based for sandboxing. - **GitLab in-comment suppression** — v0.7 ships the GitLab CI - template + `--platform gitlab` (the diff path), but the - comment-driven `/bomdrift suppress ` flow on GitLab is - deferred. GitLab note-event webhooks have a different model than - GitHub PR comments — wiring the safe path (rate-limit, fork-MR - safety, command parsing, double-trigger debounce) is a v0.8 - candidate once we see real adoption data on the v0.7 manual - path. + template + `--platform gitlab` (the diff path); v0.9 will add the + comment-driven `/bomdrift suppress ` flow with explicit + security guards. - **Calibration tuning from `--debug-calibration` data** — v0.7 added the diagnostic flag; v0.8 may revise `SIMILARITY_THRESHOLD`, `YOUNG_MAINTAINER_DAYS`, and OSV cache From 1b91368c4938db076061c17c6457451382f56dff Mon Sep 17 00:00:00 2001 From: Metbcy Date: Wed, 29 Apr 2026 13:53:39 -0700 Subject: [PATCH 10/10] fix(deps): bump time to 0.3.47 to clear RUSTSEC-2026-0009 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CI audit + deny gates caught RUSTSEC-2026-0009 (DoS via stack exhaustion in time's RFC 2822 parser, fixed in 0.3.47+). Bumping to 0.3.47 requires Rust 1.88, so MSRV moves 1.85 -> 1.88. bomdrift does not parse user-supplied RFC 2822 input — the advisory is not exploitable here — but tightening the dep is the right call rather than carrying an audit-deny exception. Two clippy lints surfaced under 1.88 and were also fixed: - src/baseline.rs: collapsed nested if into &&-chain. - benches/diff.rs: used i.is_multiple_of(2). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Cargo.lock | 16 ++++++++-------- Cargo.toml | 2 +- benches/diff.rs | 2 +- src/baseline.rs | 10 +++++----- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ce2d988..f455489 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -721,9 +721,9 @@ dependencies = [ [[package]] name = "num-conv" -version = "0.1.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" [[package]] name = "num-traits" @@ -1205,9 +1205,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.45" +version = "0.3.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9e442fc33d7fdb45aa9bfeb312c095964abdf596f7567261062b2a7107aaabd" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" dependencies = [ "deranged", "itoa", @@ -1220,15 +1220,15 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.7" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b36ee98fd31ec7426d599183e8fe26932a8dc1fb76ddb6214d05493377d34ca" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" [[package]] name = "time-macros" -version = "0.2.25" +version = "0.2.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71e552d1249bf61ac2a52db88179fd0673def1e1ad8243a00d9ec9ed71fee3dd" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" dependencies = [ "num-conv", "time-core", diff --git a/Cargo.toml b/Cargo.toml index 466d40e..7db67b2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "bomdrift" version = "0.8.0" edition = "2024" -rust-version = "1.85" +rust-version = "1.88" description = "SBOM diff with supply-chain risk signals (CVEs, typosquats, maintainer-age)." license = "Apache-2.0" repository = "https://github.com/Metbcy/bomdrift" diff --git a/benches/diff.rs b/benches/diff.rs index 0b98517..cc52422 100644 --- a/benches/diff.rs +++ b/benches/diff.rs @@ -26,7 +26,7 @@ fn load(path: &str) -> Sbom { fn synth_component(i: usize, version_offset: usize) -> Component { let name = format!("pkg-{i:04}"); let mut version = format!("1.{}.0", i % 50); - if i % 2 == 0 { + if i.is_multiple_of(2) { version = format!("1.{}.0", (i % 50) + version_offset); } let purl = format!("pkg:npm/{name}@{version}"); diff --git a/src/baseline.rs b/src/baseline.rs index ea59a8f..bad8cd1 100644 --- a/src/baseline.rs +++ b/src/baseline.rs @@ -369,11 +369,11 @@ pub fn add_suppression_full( arr.push(serde_json::Value::String(trimmed.to_string())); } - if let Some(parent) = path.parent() { - if !parent.as_os_str().is_empty() { - std::fs::create_dir_all(parent) - .with_context(|| format!("creating parent dir: {}", parent.display()))?; - } + if let Some(parent) = path.parent() + && !parent.as_os_str().is_empty() + { + std::fs::create_dir_all(parent) + .with_context(|| format!("creating parent dir: {}", parent.display()))?; } // Atomic temp-file + rename, mirroring src/refresh.rs's pattern.