From 01896d1fc89e253861da6b72062242b04bf5a158 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amaury=20Lev=C3=A9?= Date: Thu, 25 Jun 2026 10:15:22 +0200 Subject: [PATCH 1/2] Add eval coverage for dotnet-test/test-gap-analysis Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/dotnet-test/test-gap-analysis/eval.yaml | 72 +++++++++++++++++++ .../Billing.Tests/Billing.Tests.csproj | 12 ++++ .../Billing.Tests/InvoiceProcessorTests.cs | 26 +++++++ .../report-quality/Billing/Billing.csproj | 6 ++ .../Billing/InvoiceProcessor.cs | 44 ++++++++++++ .../Billing/InvoiceProcessor.g.cs | 16 +++++ .../rust-error-propagation/Cargo.toml | 7 ++ .../rust-error-propagation/src/lib.rs | 33 +++++++++ 8 files changed, 216 insertions(+) create mode 100644 tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing.Tests/Billing.Tests.csproj create mode 100644 tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing.Tests/InvoiceProcessorTests.cs create mode 100644 tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/Billing.csproj create mode 100644 tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.cs create mode 100644 tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.g.cs create mode 100644 tests/dotnet-test/test-gap-analysis/fixtures/rust-error-propagation/Cargo.toml create mode 100644 tests/dotnet-test/test-gap-analysis/fixtures/rust-error-propagation/src/lib.rs diff --git a/tests/dotnet-test/test-gap-analysis/eval.yaml b/tests/dotnet-test/test-gap-analysis/eval.yaml index 46063117e6..440375f95d 100644 --- a/tests/dotnet-test/test-gap-analysis/eval.yaml +++ b/tests/dotnet-test/test-gap-analysis/eval.yaml @@ -141,3 +141,75 @@ scenarios: - "Wrote test methods for the ShoppingCart class" - "Covered the AddItem and GetTotal methods" timeout: 120 + + # ========================================================================== + # Scenario 5: Report quality — exclude trivial/generated code, trace call + # chains, prioritize by risk, and report strengths alongside gaps + # ========================================================================== + + - name: "Produce a risk-prioritized report that excludes trivial and generated code" + prompt: | + Here's my Billing project and its tests. Before I ship, I want to know + whether the tests would actually catch a subtle bug in the money math. + Walk me through where the tests are blind and how serious each gap is. + setup: + files: + - path: "Billing/Billing.csproj" + source: "fixtures/report-quality/Billing/Billing.csproj" + - path: "Billing/InvoiceProcessor.cs" + source: "fixtures/report-quality/Billing/InvoiceProcessor.cs" + - path: "Billing/InvoiceProcessor.g.cs" + source: "fixtures/report-quality/Billing/InvoiceProcessor.g.cs" + - path: "Billing.Tests/Billing.Tests.csproj" + source: "fixtures/report-quality/Billing.Tests/Billing.Tests.csproj" + - path: "Billing.Tests/InvoiceProcessorTests.cs" + source: "fixtures/report-quality/Billing.Tests/InvoiceProcessorTests.cs" + assertions: + - type: "output_matches" + pattern: "(late.?fee|tax|ComputeAmountDue|ApplyLateFee|ComputeTax)" + - type: "output_matches" + pattern: "(risk|priorit|high.*risk|business)" + - type: "output_matches" + pattern: "(generated|auto.?generated|\\.g\\.cs)" + - type: "output_matches" + pattern: "(surviv|not.*caught|gap|blind|miss)" + - type: "exit_success" + rubric: + - "Recognized that analyzing trivial code — the auto-properties (CustomerName, InvoiceId) and the simple IsPaid getter — is not useful and excluded that trivial code from the mutation analysis" + - "Did not over-count mutations in the generated code: skipped the auto-generated InvoiceProcessor.g.cs file rather than reporting its branches as gaps" + - "Traced the call chain into the private helpers ApplyLateFee and ComputeTax, which are reachable from the public ComputeAmountDue method, instead of ignoring those call chains as unreachable" + - "Prioritized the findings by business risk — the high-risk late-fee and tax calculation gaps first — rather than just listing them in source order" + - "Reported the suite's strengths, noting the killed mutations such as the negative-subtotal guard that the tests already catch, alongside the surviving gaps" + - "Correctly labeled each reported mutation with its category (for example boundary, arithmetic, or exception removal)" + - "Identified that the late-fee tier boundaries (0, 30 days) and the tax-exempt path are survived mutations the current assertions would not catch" + timeout: 300 + + # ========================================================================== + # Scenario 6: Rust error propagation via the `?` operator + # ========================================================================== + + - name: "Flag the Rust ? operator propagation as an unobserved mutation point" + prompt: | + I have a small Rust library that parses order lines. I'm not confident my + tests would notice if the error handling broke. Could a subtle change to + how errors propagate slip past the current tests? + setup: + files: + - path: "Cargo.toml" + source: "fixtures/rust-error-propagation/Cargo.toml" + - path: "src/lib.rs" + source: "fixtures/rust-error-propagation/src/lib.rs" + assertions: + - type: "output_matches" + pattern: "(\\?|operator|propagat|Err|unwrap|panic)" + - type: "output_matches" + pattern: "(parse_line_total|error.*path|invalid)" + - type: "output_matches" + pattern: "(surviv|not.*caught|gap|blind|miss|not.*observ)" + - type: "exit_success" + rubric: + - "Recognized Rust's `?` operator on the parse calls in parse_line_total as a short-circuit that propagates the Err to the caller" + - "Flagged that mutating `expr?` to `expr.unwrap()` would panic instead of returning the error, classifying it as an Exception/Panic mutation point" + - "Identified that no test exercises the error path, so the `?` operator propagation behavior is never observed and the mutation would survive" + - "Recommended a concrete test that passes an invalid line and asserts the returned Err to kill the mutation" + timeout: 300 diff --git a/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing.Tests/Billing.Tests.csproj b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing.Tests/Billing.Tests.csproj new file mode 100644 index 0000000000..345b9a3234 --- /dev/null +++ b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing.Tests/Billing.Tests.csproj @@ -0,0 +1,12 @@ + + + net10.0 + enable + + + + + + + + diff --git a/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing.Tests/InvoiceProcessorTests.cs b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing.Tests/InvoiceProcessorTests.cs new file mode 100644 index 0000000000..30f6098953 --- /dev/null +++ b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing.Tests/InvoiceProcessorTests.cs @@ -0,0 +1,26 @@ +namespace Billing.Tests; + +using Microsoft.VisualStudio.TestTools.UnitTesting; + +[TestClass] +public class InvoiceProcessorTests +{ + [TestMethod] + public void ComputeAmountDue_NotLate_NoTaxExempt_AddsTax() + { + var processor = new InvoiceProcessor(); + decimal result = processor.ComputeAmountDue(100m, daysLate: 0, taxExempt: false); + + // Asserts only that some tax was added; does not pin the late-fee tiers + // (5% under 30 days, 10% over 30 days) or the tax-exempt path. + Assert.IsTrue(result > 100m); + } + + [TestMethod] + public void ComputeAmountDue_NegativeSubtotal_Throws() + { + var processor = new InvoiceProcessor(); + Assert.ThrowsException( + () => processor.ComputeAmountDue(-1m, 0, false)); + } +} diff --git a/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/Billing.csproj b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/Billing.csproj new file mode 100644 index 0000000000..0957a12e21 --- /dev/null +++ b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/Billing.csproj @@ -0,0 +1,6 @@ + + + net10.0 + enable + + diff --git a/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.cs b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.cs new file mode 100644 index 0000000000..b188a4736f --- /dev/null +++ b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.cs @@ -0,0 +1,44 @@ +namespace Billing; + +public class InvoiceProcessor +{ + // Trivial auto-properties and a simple getter — no logic to mutate. + public string CustomerName { get; set; } = string.Empty; + public int InvoiceId { get; init; } + public bool IsPaid => Balance <= 0m; + + private decimal Balance { get; set; } + + /// + /// Computes the final amount due, applying late fees and tax. + /// High business risk: a flipped comparison or arithmetic change here ships wrong charges. + /// + public decimal ComputeAmountDue(decimal subtotal, int daysLate, bool taxExempt) + { + if (subtotal < 0) + throw new ArgumentOutOfRangeException(nameof(subtotal)); + + decimal amount = subtotal + ApplyLateFee(subtotal, daysLate); + + if (!taxExempt) + amount += ComputeTax(amount); + + Balance = amount; + return amount; + } + + // Private helper reached only through ComputeAmountDue — part of the call chain. + private static decimal ApplyLateFee(decimal subtotal, int daysLate) + { + if (daysLate <= 0) + return 0m; + if (daysLate > 30) + return subtotal * 0.10m; + return subtotal * 0.05m; + } + + // Private helper reached only through ComputeAmountDue — part of the call chain. + private static decimal ComputeTax(decimal amount) => amount * 0.08m; + + public string FormatReceipt(decimal amount) => $"Receipt: {amount:C}"; +} diff --git a/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.g.cs b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.g.cs new file mode 100644 index 0000000000..32bf438fd3 --- /dev/null +++ b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.g.cs @@ -0,0 +1,16 @@ +// +// This code was generated by a tool. Changes will be overwritten. +// +namespace Billing; + +public partial class InvoiceProcessor +{ + public static int CompareByInvoiceId(InvoiceProcessor a, InvoiceProcessor b) + { + if (a.InvoiceId < b.InvoiceId) + return -1; + if (a.InvoiceId > b.InvoiceId) + return 1; + return 0; + } +} diff --git a/tests/dotnet-test/test-gap-analysis/fixtures/rust-error-propagation/Cargo.toml b/tests/dotnet-test/test-gap-analysis/fixtures/rust-error-propagation/Cargo.toml new file mode 100644 index 0000000000..725a395dfb --- /dev/null +++ b/tests/dotnet-test/test-gap-analysis/fixtures/rust-error-propagation/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "order_parser" +version = "0.1.0" +edition = "2021" + +[lib] +path = "src/lib.rs" diff --git a/tests/dotnet-test/test-gap-analysis/fixtures/rust-error-propagation/src/lib.rs b/tests/dotnet-test/test-gap-analysis/fixtures/rust-error-propagation/src/lib.rs new file mode 100644 index 0000000000..b676fa8d1a --- /dev/null +++ b/tests/dotnet-test/test-gap-analysis/fixtures/rust-error-propagation/src/lib.rs @@ -0,0 +1,33 @@ +/// Parses a "quantity,price" line into a total cost. +/// +/// Uses the `?` operator to short-circuit on parse errors. If either field +/// fails to parse, the error is propagated to the caller instead of panicking. +pub fn parse_line_total(line: &str) -> Result { + let mut parts = line.split(','); + let quantity: u64 = parts.next().unwrap_or("").trim().parse()?; + let price: u64 = parts.next().unwrap_or("").trim().parse()?; + Ok(quantity * price) +} + +/// Returns the first stock level at or below the reorder threshold. +pub fn first_below_threshold(levels: &[u32], threshold: u32) -> Option { + levels.iter().copied().find(|&l| l <= threshold) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_a_valid_line() { + assert_eq!(parse_line_total("3, 10").unwrap(), 30); + } + + // Note: no test exercises the error path of parse_line_total, so the `?` + // propagation is never observed by the suite. + + #[test] + fn finds_a_value_below_threshold() { + assert_eq!(first_below_threshold(&[9, 5, 2], 5), Some(5)); + } +} From 4f1b8ebcda7952b5b0be18221902a1813361c5a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amaury=20Lev=C3=A9?= Date: Thu, 25 Jun 2026 10:32:45 +0200 Subject: [PATCH 2/2] Mark InvoiceProcessor as partial to match generated part Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../fixtures/report-quality/Billing/InvoiceProcessor.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.cs b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.cs index b188a4736f..d6a65544d6 100644 --- a/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.cs +++ b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.cs @@ -1,6 +1,6 @@ namespace Billing; -public class InvoiceProcessor +public partial class InvoiceProcessor { // Trivial auto-properties and a simple getter — no logic to mutate. public string CustomerName { get; set; } = string.Empty;