From 2360267bae296a6c692f20e040b9d47a33905f7d Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Fri, 5 Jun 2026 18:36:06 +1000 Subject: [PATCH] fix(scan): require trust for judged suppressions --- src/wardline/cli/scan.py | 12 ++++++++++++ src/wardline/core/judge_run.py | 1 + src/wardline/core/judged.py | 3 +++ src/wardline/core/run.py | 8 +++++++- tests/unit/cli/test_cli.py | 22 +++++++++++++++------- tests/unit/core/test_judged.py | 17 +++++++++++++++++ 6 files changed, 55 insertions(+), 8 deletions(-) diff --git a/src/wardline/cli/scan.py b/src/wardline/cli/scan.py index c44a2327..45331ed4 100644 --- a/src/wardline/cli/scan.py +++ b/src/wardline/cli/scan.py @@ -101,6 +101,15 @@ default=False, help="Allow wardline.yaml source_roots to resolve outside PATH.", ) +@click.option( + "--trust-judged-suppressions", + is_flag=True, + default=False, + help=( + "Trust repository .wardline/judged.yaml records for scan suppression. " + "Do not enable for untrusted pull-request contents." + ), +) def scan( path: Path, config_path: Path | None, @@ -118,6 +127,7 @@ def scan( yes: bool, strict_defaults: bool, allow_source_root_escape: bool, + trust_judged_suppressions: bool, ) -> None: """Scan PATH for findings.""" default_name = "findings.sarif" if fmt == "sarif" else "findings.jsonl" @@ -150,6 +160,7 @@ def scan( trusted_packs=trusted_packs, strict_defaults=strict_defaults, confine_to_root=not allow_source_root_escape, + trust_judged_suppressions=trust_judged_suppressions, ) findings = result.findings if fix: @@ -185,6 +196,7 @@ def confirm_cb(rel_path: str, orig: str, replacement: str, f: Finding) -> bool: trusted_packs=trusted_packs, strict_defaults=strict_defaults, confine_to_root=not allow_source_root_escape, + trust_judged_suppressions=trust_judged_suppressions, ) findings = result.findings if fmt == "sarif": diff --git a/src/wardline/core/judge_run.py b/src/wardline/core/judge_run.py index bce78b56..0b27ebda 100644 --- a/src/wardline/core/judge_run.py +++ b/src/wardline/core/judge_run.py @@ -179,6 +179,7 @@ def _default_caller(req: JudgeRequest) -> JudgeResponse: trust_local_packs=trust_local_packs, trusted_packs=trusted_packs, strict_defaults=strict_defaults, + trust_judged_suppressions=True, ) judged_set = load_judged(root / ".wardline" / "judged.yaml") diff --git a/src/wardline/core/judged.py b/src/wardline/core/judged.py index 7dfcabce..4dc91501 100644 --- a/src/wardline/core/judged.py +++ b/src/wardline/core/judged.py @@ -110,6 +110,9 @@ def load_judged(path: Path) -> JudgedSet: if fp in seen: raise ConfigError(f"{path.name} findings[{idx}]: duplicate fingerprint {fp!r}") seen.add(fp) + verdict = _require_str(e, "verdict", idx, path.name) + if verdict != "FALSE_POSITIVE": + raise ConfigError(f"{path.name} findings[{idx}].verdict must be FALSE_POSITIVE") rationale = _require_str(e, "rationale", idx, path.name) # Provenance is the audit primitive — never default it. A judged record with # no attributable model / policy / confidence is an unauditable suppression. diff --git a/src/wardline/core/run.py b/src/wardline/core/run.py index 1dceb69a..0f9cbd34 100644 --- a/src/wardline/core/run.py +++ b/src/wardline/core/run.py @@ -85,6 +85,7 @@ def run_scan( trust_local_packs: bool = False, trusted_packs: tuple[str, ...] = (), strict_defaults: bool = False, + trust_judged_suppressions: bool = False, ) -> ScanResult: """Discover → analyze → apply suppressions. Pure function of (disk + config). @@ -94,6 +95,11 @@ def run_scan( ``confine_to_root`` (default True) makes ``discover`` reject any ``source_root`` that resolves outside ``root``. Callers that intentionally scan outside the project root must opt out explicitly. + + ``trust_judged_suppressions`` is deliberately false by default because + ``.wardline/judged.yaml`` is repository-controlled input. Enabling it is an + operator trust decision suitable for local scans of a trusted checkout, not + enforcement on untrusted pull-request contents. """ from wardline.scanner.analyzer import build_analyzer from wardline.scanner.grammar import TrustGrammar, default_grammar @@ -184,7 +190,7 @@ def run_scan( cache.save() baseline = load_baseline(root / ".wardline" / "baseline.yaml") waivers = WaiverSet(parse_waivers(cfg.waivers)) - judged = load_judged(root / ".wardline" / "judged.yaml") + judged = load_judged(root / ".wardline" / "judged.yaml") if trust_judged_suppressions else None findings = apply_suppressions(raw, baseline, waivers, today=date.today(), judged=judged) if new_since is not None: diff --git a/tests/unit/cli/test_cli.py b/tests/unit/cli/test_cli.py index efc6328e..ef3b5ea4 100644 --- a/tests/unit/cli/test_cli.py +++ b/tests/unit/cli/test_cli.py @@ -957,9 +957,9 @@ def test_judge_low_confidence_fp_held_back_from_write(monkeypatch, tmp_path) -> assert not (proj / ".wardline" / "judged.yaml").exists() -def test_judge_write_then_scan_gate_is_cleared(monkeypatch, tmp_path) -> None: - # The regression that pins the headline panel finding: a JUDGED FP written by - # `judge --write` must suppress the finding for `scan --fail-on` too. +def test_judge_write_then_scan_gate_requires_trust_flag(monkeypatch, tmp_path) -> None: + # judged.yaml is repository-controlled input, so scan ignores it unless the + # operator explicitly trusts judged suppressions for this checkout. import wardline.cli.judge as judge_cli from wardline.cli.main import cli @@ -974,10 +974,18 @@ def test_judge_write_then_scan_gate_is_cleared(monkeypatch, tmp_path) -> None: jres = CliRunner().invoke(cli, ["judge", str(proj), "--write"]) assert jres.exit_code == 0, jres.output assert (proj / ".wardline" / "judged.yaml").exists() - # 3) scan now sees the JUDGED suppression -> gate cleared, summary shows it - after = CliRunner().invoke(cli, ["scan", str(proj), "--output", str(out), "--fail-on", "INFO"]) - assert after.exit_code == 0, after.output - assert "judged" in after.output + # 3) default scan does not trust repository-controlled judged.yaml, so the + # active defect still trips the gate. + untrusted = CliRunner().invoke(cli, ["scan", str(proj), "--output", str(out), "--fail-on", "INFO"]) + assert untrusted.exit_code == 1, untrusted.output + assert "0 judged" in untrusted.output + # 4) an explicit local trust decision preserves the judged-suppression flow. + trusted = CliRunner().invoke( + cli, + ["scan", str(proj), "--output", str(out), "--fail-on", "INFO", "--trust-judged-suppressions"], + ) + assert trusted.exit_code == 0, trusted.output + assert "1 judged" in trusted.output def test_scan_fix_and_fix_command(tmp_path: Path) -> None: diff --git a/tests/unit/core/test_judged.py b/tests/unit/core/test_judged.py index 92324fb5..f51699e6 100644 --- a/tests/unit/core/test_judged.py +++ b/tests/unit/core/test_judged.py @@ -87,3 +87,20 @@ def test_out_of_range_confidence_raises(tmp_path: Path) -> None: ) with pytest.raises(ConfigError): load_judged(path) + + +def test_non_false_positive_verdict_raises(tmp_path: Path) -> None: + path = tmp_path / "judged.yaml" + path.write_text( + "version: 1\nfindings:\n" + f" - fingerprint: {'a' * 64}\n" + " verdict: TRUE_POSITIVE\n" + " rationale: x\n" + " model_id: m\n" + " policy_hash: sha256:x\n" + " confidence: 0.9\n" + " recorded_at: 2026-05-30T00:00:00+00:00\n", + encoding="utf-8", + ) + with pytest.raises(ConfigError, match="verdict must be FALSE_POSITIVE"): + load_judged(path)