From fcc693b6a300519a5686760faf09f0b108edf2a3 Mon Sep 17 00:00:00 2001 From: Metbcy Date: Mon, 1 Jun 2026 19:46:42 +0000 Subject: [PATCH 1/3] test(typosquat): close 13 mutation-test gaps (#35) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds 5 tests covering 13 logic/return-value mutants in src/enrich/typosquat.rs surfaced by cargo-mutants round 2: - maven_best_match_includes_distance_equal_to_max_levenshtein: pins line 371 boundary (`>` vs `>=` at MAVEN_MAX_LEVENSHTEIN). - maven_best_match_picks_closest_when_multiple_candidates_within_distance: pins line 375 match guard (true/false stubs + `>=`/`<` swap). - maven_best_match_score_formula_matches_one_minus_dist_over_len_plus_one: pins lines 380-381 arithmetic (`+`/`-`/`*`, `/`/`%`). - suspicious_suffix_containment_requires_strict_delta_over_legit_len: pins line 416 `+`/`-` mutant via strict boundary. - default_cache_path_targets_typosquat_subdir_with_ecosystem_filename: pins line 471 None/Default::default stubs. 4 label-string mutants (lines 150 `cache_filename`, 504 `ecosystem_label`) accepted and documented in audit log — no behavior depends on the literal string content. cargo test --lib enrich::typosquat: 50 passed (was 45). cargo clippy --all-targets -- -D warnings: clean. --- docs/src/internals/mutation-testing.md | 36 ++++++++++ src/enrich/typosquat.rs | 98 ++++++++++++++++++++++++++ 2 files changed, 134 insertions(+) diff --git a/docs/src/internals/mutation-testing.md b/docs/src/internals/mutation-testing.md index 02020a8..9e04cba 100644 --- a/docs/src/internals/mutation-testing.md +++ b/docs/src/internals/mutation-testing.md @@ -30,3 +30,39 @@ Each entry: `module — caught / unviable / surviving / total — date`. The tar No test additions required. The diff engine's existing unit and property tests fully cover the mutation surface. The three unviable mutants were mutations that produced code that does not compile — they're excluded from the surviving denominator because the compiler itself rejects them before the test suite runs. + +### `src/baseline.rs` — v0.9.9 round 2 + +- **Total mutants:** 54 +- **Caught (round 1):** 42 +- **Unviable:** 3 +- **Surviving (round 1):** 9 +- **Surviving rate (after #63):** 0 / 51 = **0.0%** +- Date: 2026-06-01 + +PR #63 added 7 tests covering 9 logic survivors across `apply` (suppression matching), `add_suppression_full` (object-form writes), and `doc_kind` (JSON variant labeling). All previously-missed mutants now caught. + +### `src/enrich/typosquat.rs` — v0.9.9 round 2 + +- **Total mutants:** 98 +- **Caught (round 1):** 76 +- **Unviable:** 5 +- **Surviving (round 1):** 17 +- **Closed by new tests:** 13 (logic + return-value) +- **Accepted as label-string:** 4 (see below) +- **Surviving after audit:** 0 logic / 4 acceptable label +- Date: 2026-06-01 + +Tests added in this PR cover: + +- `best_match_maven` boundary at `dist == MAVEN_MAX_LEVENSHTEIN` (line 371 `>`/`>=` mutant) +- `best_match_maven` closer-wins selection through the match guard (line 375 guard mutants) +- `best_match_maven` score formula `1 - dist / (len + 1)` (lines 380-381 arithmetic mutants) +- `has_suspicious_suffix_containment` strict-delta boundary (line 416 `+`/`-` mutant) +- `default_cache_path` returns `Some(.txt>)` (line 471 None/Default mutants) + +**Accepted label-string mutants (documented, not closed):** + +- `SupportedEcosystem::cache_filename` returning `""` or `"xyzzy"` (line 150): the function's only caller, `default_cache_path`, joins the string into a `PathBuf`. No downstream behavior depends on the literal filename content as long as the path resolves; the only place we assert on the content is the new `default_cache_path` test (above), which fixes the suffix shape but not the exact ecosystem label. +- `ecosystem_label` returning `""` or `"xyzzy"` (line 504): the function is used only for a one-shot `eprintln!` user-facing log message in `load_legit_list`. Logging text is not contractual; no test should pin the human-readable label. + diff --git a/src/enrich/typosquat.rs b/src/enrich/typosquat.rs index 1d17ec7..dbcd74f 100644 --- a/src/enrich/typosquat.rs +++ b/src/enrich/typosquat.rs @@ -1085,4 +1085,102 @@ mod tests { "lowering the threshold must not reduce findings" ); } + + // ---- Mutation-test gap closers (issue #35) --------------------------- + + #[test] + fn maven_best_match_includes_distance_equal_to_max_levenshtein() { + // commons-lang3 (12 chars) vs commons-lng2 (11): Levenshtein = 2 + // (delete 'a', substitute '3'->'2'). Exactly at MAVEN_MAX_LEVENSHTEIN. + // Guards line 371: changing `>` to `>=` would drop this finding. + let findings = enrich(&cs_added(vec![comp_eco( + "org.apache.commons:commons-lng2", + Ecosystem::Maven, + )])); + assert_eq!( + findings.len(), + 1, + "dist == MAVEN_MAX_LEVENSHTEIN must still flag; got {findings:?}" + ); + assert!(findings[0].closest.ends_with(":commons-lang3")); + } + + #[test] + fn maven_best_match_picks_closest_when_multiple_candidates_within_distance() { + // Direct unit test of best_match_maven to pin the "closer wins" + // selection logic. Guards line 375 match guard (`true`/`false` + // stubs and `>=`->`<` swap all break this ordering). + // + // candidate "guavb" (5 chars): + // vs "guava" -> dist 1 + // vs "gauva" -> dist 2 + // Both are within MAVEN_MAX_LEVENSHTEIN=2 and the algorithm must + // pick "guava" (closer). Order legit so the farther match comes + // FIRST -- that way the `dist >= d` guard is the only thing that + // promotes the closer second entry. + let legit = vec![ + "x.y:gauva".to_string(), // dist 2, seen first + "x.y:guava".to_string(), // dist 1, must win + ]; + let got = best_match_maven("x.y:guavb", &legit, 0.0); + assert_eq!( + got.map(|(name, _)| name), + Some("x.y:guava"), + "closer match must beat earlier farther match" + ); + } + + #[test] + fn maven_best_match_score_formula_matches_one_minus_dist_over_len_plus_one() { + // Guards the arithmetic on lines 380-381: + // denom = legit_artifact.len() + 1 + // raw = 1.0 - dist / denom + // For artifact "guava" (5) with dist 1: denom = 6, raw = 1 - 1/6. + // Threshold pulled low so `.max(threshold)` does not clamp. + let legit = vec!["x.y:guava".to_string()]; + let (name, score) = best_match_maven("x.y:guavb", &legit, 0.1) + .expect("guavb must match guava within Lev 2"); + assert_eq!(name, "x.y:guava"); + let expected = 1.0_f64 - 1.0 / 6.0; + assert!( + (score - expected).abs() < 1e-9, + "score {score} must equal 1 - 1/(len+1) = {expected}" + ); + } + + #[test] + fn suspicious_suffix_containment_requires_strict_delta_over_legit_len() { + // Guards line 416: `candidate.len() <= legit.len() + SUFFIX_BOOST_MIN_DELTA`. + // Boundary case: candidate length equals legit + delta exactly. + // SUFFIX_BOOST_MIN_DELTA = 3, so legit "crypto" (6) + 3 = 9. + // candidate "ab-crypto" (9 chars) must NOT be suspicious -- need + // strictly MORE than that delta. + assert!( + !has_suspicious_suffix_containment("ab-crypto", "crypto"), + "candidate at exactly len + delta is below the suspicion bar" + ); + // One char over the boundary flips it on. + assert!( + has_suspicious_suffix_containment("abc-crypto", "crypto"), + "candidate at len + delta + 1 must trip the rule" + ); + } + + #[test] + fn default_cache_path_targets_typosquat_subdir_with_ecosystem_filename() { + // Guards line 471 return-value mutants (None / Some(Default::default())). + // The path must end with `typosquat/.txt`. + for (eco, fname) in [ + (SupportedEcosystem::Npm, "npm.txt"), + (SupportedEcosystem::PyPI, "pypi.txt"), + (SupportedEcosystem::Maven, "maven.txt"), + ] { + let p = default_cache_path(eco).expect("cache root resolves under test"); + let s = p.to_string_lossy(); + assert!( + s.ends_with(&format!("typosquat/{fname}")), + "path {s} must end with typosquat/{fname}" + ); + } + } } From 62af5e6e5786f206f43226f436889abbe40c037d Mon Sep 17 00:00:00 2001 From: Metbcy Date: Mon, 1 Jun 2026 21:35:17 +0000 Subject: [PATCH 2/3] test(typosquat): assert default_cache_path via Path components for Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Windows uses backslash separators, so ends_with("typosquat/npm.txt") fails on windows-latest CI. Compare file_name() and parent component instead — separator-agnostic. --- src/enrich/typosquat.rs | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/enrich/typosquat.rs b/src/enrich/typosquat.rs index dbcd74f..17838cc 100644 --- a/src/enrich/typosquat.rs +++ b/src/enrich/typosquat.rs @@ -1176,10 +1176,19 @@ mod tests { (SupportedEcosystem::Maven, "maven.txt"), ] { let p = default_cache_path(eco).expect("cache root resolves under test"); - let s = p.to_string_lossy(); - assert!( - s.ends_with(&format!("typosquat/{fname}")), - "path {s} must end with typosquat/{fname}" + // Compare via Path components so this test works on both + // Unix ("typosquat/npm.txt") and Windows ("typosquat\npm.txt"). + assert_eq!( + p.file_name().and_then(|s| s.to_str()), + Some(fname), + "path {} must have filename {fname}", + p.display() + ); + assert_eq!( + p.parent().and_then(|d| d.file_name()).and_then(|s| s.to_str()), + Some("typosquat"), + "path {} must sit under a 'typosquat' subdir", + p.display() ); } } From 5ad196c4d558ce44aa3441929f676122d5e456b1 Mon Sep 17 00:00:00 2001 From: Metbcy Date: Mon, 1 Jun 2026 21:37:15 +0000 Subject: [PATCH 3/3] style: cargo fmt --- src/enrich/typosquat.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/enrich/typosquat.rs b/src/enrich/typosquat.rs index 17838cc..b11f531 100644 --- a/src/enrich/typosquat.rs +++ b/src/enrich/typosquat.rs @@ -1185,7 +1185,9 @@ mod tests { p.display() ); assert_eq!( - p.parent().and_then(|d| d.file_name()).and_then(|s| s.to_str()), + p.parent() + .and_then(|d| d.file_name()) + .and_then(|s| s.to_str()), Some("typosquat"), "path {} must sit under a 'typosquat' subdir", p.display()