From 01896d1fc89e253861da6b72062242b04bf5a158 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amaury=20Lev=C3=A9?= <amauryleve@microsoft.com>
Date: Thu, 25 Jun 2026 10:15:22 +0200
Subject: [PATCH 1/2] Add eval coverage for dotnet-test/test-gap-analysis

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 tests/dotnet-test/test-gap-analysis/eval.yaml | 72 +++++++++++++++++++
 .../Billing.Tests/Billing.Tests.csproj        | 12 ++++
 .../Billing.Tests/InvoiceProcessorTests.cs    | 26 +++++++
 .../report-quality/Billing/Billing.csproj     |  6 ++
 .../Billing/InvoiceProcessor.cs               | 44 ++++++++++++
 .../Billing/InvoiceProcessor.g.cs             | 16 +++++
 .../rust-error-propagation/Cargo.toml         |  7 ++
 .../rust-error-propagation/src/lib.rs         | 33 +++++++++
 8 files changed, 216 insertions(+)
 create mode 100644 tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing.Tests/Billing.Tests.csproj
 create mode 100644 tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing.Tests/InvoiceProcessorTests.cs
 create mode 100644 tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/Billing.csproj
 create mode 100644 tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.cs
 create mode 100644 tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.g.cs
 create mode 100644 tests/dotnet-test/test-gap-analysis/fixtures/rust-error-propagation/Cargo.toml
 create mode 100644 tests/dotnet-test/test-gap-analysis/fixtures/rust-error-propagation/src/lib.rs

diff --git a/tests/dotnet-test/test-gap-analysis/eval.yaml b/tests/dotnet-test/test-gap-analysis/eval.yaml
index 46063117e6..440375f95d 100644
--- a/tests/dotnet-test/test-gap-analysis/eval.yaml
+++ b/tests/dotnet-test/test-gap-analysis/eval.yaml
@@ -141,3 +141,75 @@ scenarios:
       - "Wrote test methods for the ShoppingCart class"
       - "Covered the AddItem and GetTotal methods"
     timeout: 120
+
+  # ==========================================================================
+  # Scenario 5: Report quality — exclude trivial/generated code, trace call
+  # chains, prioritize by risk, and report strengths alongside gaps
+  # ==========================================================================
+
+  - name: "Produce a risk-prioritized report that excludes trivial and generated code"
+    prompt: |
+      Here's my Billing project and its tests. Before I ship, I want to know
+      whether the tests would actually catch a subtle bug in the money math.
+      Walk me through where the tests are blind and how serious each gap is.
+    setup:
+      files:
+        - path: "Billing/Billing.csproj"
+          source: "fixtures/report-quality/Billing/Billing.csproj"
+        - path: "Billing/InvoiceProcessor.cs"
+          source: "fixtures/report-quality/Billing/InvoiceProcessor.cs"
+        - path: "Billing/InvoiceProcessor.g.cs"
+          source: "fixtures/report-quality/Billing/InvoiceProcessor.g.cs"
+        - path: "Billing.Tests/Billing.Tests.csproj"
+          source: "fixtures/report-quality/Billing.Tests/Billing.Tests.csproj"
+        - path: "Billing.Tests/InvoiceProcessorTests.cs"
+          source: "fixtures/report-quality/Billing.Tests/InvoiceProcessorTests.cs"
+    assertions:
+      - type: "output_matches"
+        pattern: "(late.?fee|tax|ComputeAmountDue|ApplyLateFee|ComputeTax)"
+      - type: "output_matches"
+        pattern: "(risk|priorit|high.*risk|business)"
+      - type: "output_matches"
+        pattern: "(generated|auto.?generated|\\.g\\.cs)"
+      - type: "output_matches"
+        pattern: "(surviv|not.*caught|gap|blind|miss)"
+      - type: "exit_success"
+    rubric:
+      - "Recognized that analyzing trivial code — the auto-properties (CustomerName, InvoiceId) and the simple IsPaid getter — is not useful and excluded that trivial code from the mutation analysis"
+      - "Did not over-count mutations in the generated code: skipped the auto-generated InvoiceProcessor.g.cs file rather than reporting its branches as gaps"
+      - "Traced the call chain into the private helpers ApplyLateFee and ComputeTax, which are reachable from the public ComputeAmountDue method, instead of ignoring those call chains as unreachable"
+      - "Prioritized the findings by business risk — the high-risk late-fee and tax calculation gaps first — rather than just listing them in source order"
+      - "Reported the suite's strengths, noting the killed mutations such as the negative-subtotal guard that the tests already catch, alongside the surviving gaps"
+      - "Correctly labeled each reported mutation with its category (for example boundary, arithmetic, or exception removal)"
+      - "Identified that the late-fee tier boundaries (0, 30 days) and the tax-exempt path are survived mutations the current assertions would not catch"
+    timeout: 300
+
+  # ==========================================================================
+  # Scenario 6: Rust error propagation via the `?` operator
+  # ==========================================================================
+
+  - name: "Flag the Rust ? operator propagation as an unobserved mutation point"
+    prompt: |
+      I have a small Rust library that parses order lines. I'm not confident my
+      tests would notice if the error handling broke. Could a subtle change to
+      how errors propagate slip past the current tests?
+    setup:
+      files:
+        - path: "Cargo.toml"
+          source: "fixtures/rust-error-propagation/Cargo.toml"
+        - path: "src/lib.rs"
+          source: "fixtures/rust-error-propagation/src/lib.rs"
+    assertions:
+      - type: "output_matches"
+        pattern: "(\\?|operator|propagat|Err|unwrap|panic)"
+      - type: "output_matches"
+        pattern: "(parse_line_total|error.*path|invalid)"
+      - type: "output_matches"
+        pattern: "(surviv|not.*caught|gap|blind|miss|not.*observ)"
+      - type: "exit_success"
+    rubric:
+      - "Recognized Rust's `?` operator on the parse calls in parse_line_total as a short-circuit that propagates the Err to the caller"
+      - "Flagged that mutating `expr?` to `expr.unwrap()` would panic instead of returning the error, classifying it as an Exception/Panic mutation point"
+      - "Identified that no test exercises the error path, so the `?` operator propagation behavior is never observed and the mutation would survive"
+      - "Recommended a concrete test that passes an invalid line and asserts the returned Err to kill the mutation"
+    timeout: 300
diff --git a/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing.Tests/Billing.Tests.csproj b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing.Tests/Billing.Tests.csproj
new file mode 100644
index 0000000000..345b9a3234
--- /dev/null
+++ b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing.Tests/Billing.Tests.csproj
@@ -0,0 +1,12 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <TargetFramework>net10.0</TargetFramework>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+  <ItemGroup>
+    <PackageReference Include="MSTest" Version="4.1.0" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\Billing\Billing.csproj" />
+  </ItemGroup>
+</Project>
diff --git a/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing.Tests/InvoiceProcessorTests.cs b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing.Tests/InvoiceProcessorTests.cs
new file mode 100644
index 0000000000..30f6098953
--- /dev/null
+++ b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing.Tests/InvoiceProcessorTests.cs
@@ -0,0 +1,26 @@
+namespace Billing.Tests;
+
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+
+[TestClass]
+public class InvoiceProcessorTests
+{
+    [TestMethod]
+    public void ComputeAmountDue_NotLate_NoTaxExempt_AddsTax()
+    {
+        var processor = new InvoiceProcessor();
+        decimal result = processor.ComputeAmountDue(100m, daysLate: 0, taxExempt: false);
+
+        // Asserts only that some tax was added; does not pin the late-fee tiers
+        // (5% under 30 days, 10% over 30 days) or the tax-exempt path.
+        Assert.IsTrue(result > 100m);
+    }
+
+    [TestMethod]
+    public void ComputeAmountDue_NegativeSubtotal_Throws()
+    {
+        var processor = new InvoiceProcessor();
+        Assert.ThrowsException<ArgumentOutOfRangeException>(
+            () => processor.ComputeAmountDue(-1m, 0, false));
+    }
+}
diff --git a/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/Billing.csproj b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/Billing.csproj
new file mode 100644
index 0000000000..0957a12e21
--- /dev/null
+++ b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/Billing.csproj
@@ -0,0 +1,6 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <TargetFramework>net10.0</TargetFramework>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+</Project>
diff --git a/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.cs b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.cs
new file mode 100644
index 0000000000..b188a4736f
--- /dev/null
+++ b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.cs
@@ -0,0 +1,44 @@
+namespace Billing;
+
+public class InvoiceProcessor
+{
+    // Trivial auto-properties and a simple getter — no logic to mutate.
+    public string CustomerName { get; set; } = string.Empty;
+    public int InvoiceId { get; init; }
+    public bool IsPaid => Balance <= 0m;
+
+    private decimal Balance { get; set; }
+
+    /// <summary>
+    /// Computes the final amount due, applying late fees and tax.
+    /// High business risk: a flipped comparison or arithmetic change here ships wrong charges.
+    /// </summary>
+    public decimal ComputeAmountDue(decimal subtotal, int daysLate, bool taxExempt)
+    {
+        if (subtotal < 0)
+            throw new ArgumentOutOfRangeException(nameof(subtotal));
+
+        decimal amount = subtotal + ApplyLateFee(subtotal, daysLate);
+
+        if (!taxExempt)
+            amount += ComputeTax(amount);
+
+        Balance = amount;
+        return amount;
+    }
+
+    // Private helper reached only through ComputeAmountDue — part of the call chain.
+    private static decimal ApplyLateFee(decimal subtotal, int daysLate)
+    {
+        if (daysLate <= 0)
+            return 0m;
+        if (daysLate > 30)
+            return subtotal * 0.10m;
+        return subtotal * 0.05m;
+    }
+
+    // Private helper reached only through ComputeAmountDue — part of the call chain.
+    private static decimal ComputeTax(decimal amount) => amount * 0.08m;
+
+    public string FormatReceipt(decimal amount) => $"Receipt: {amount:C}";
+}
diff --git a/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.g.cs b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.g.cs
new file mode 100644
index 0000000000..32bf438fd3
--- /dev/null
+++ b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.g.cs
@@ -0,0 +1,16 @@
+// <auto-generated>
+//     This code was generated by a tool. Changes will be overwritten.
+// </auto-generated>
+namespace Billing;
+
+public partial class InvoiceProcessor
+{
+    public static int CompareByInvoiceId(InvoiceProcessor a, InvoiceProcessor b)
+    {
+        if (a.InvoiceId < b.InvoiceId)
+            return -1;
+        if (a.InvoiceId > b.InvoiceId)
+            return 1;
+        return 0;
+    }
+}
diff --git a/tests/dotnet-test/test-gap-analysis/fixtures/rust-error-propagation/Cargo.toml b/tests/dotnet-test/test-gap-analysis/fixtures/rust-error-propagation/Cargo.toml
new file mode 100644
index 0000000000..725a395dfb
--- /dev/null
+++ b/tests/dotnet-test/test-gap-analysis/fixtures/rust-error-propagation/Cargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "order_parser"
+version = "0.1.0"
+edition = "2021"
+
+[lib]
+path = "src/lib.rs"
diff --git a/tests/dotnet-test/test-gap-analysis/fixtures/rust-error-propagation/src/lib.rs b/tests/dotnet-test/test-gap-analysis/fixtures/rust-error-propagation/src/lib.rs
new file mode 100644
index 0000000000..b676fa8d1a
--- /dev/null
+++ b/tests/dotnet-test/test-gap-analysis/fixtures/rust-error-propagation/src/lib.rs
@@ -0,0 +1,33 @@
+/// Parses a "quantity,price" line into a total cost.
+///
+/// Uses the `?` operator to short-circuit on parse errors. If either field
+/// fails to parse, the error is propagated to the caller instead of panicking.
+pub fn parse_line_total(line: &str) -> Result<u64, std::num::ParseIntError> {
+    let mut parts = line.split(',');
+    let quantity: u64 = parts.next().unwrap_or("").trim().parse()?;
+    let price: u64 = parts.next().unwrap_or("").trim().parse()?;
+    Ok(quantity * price)
+}
+
+/// Returns the first stock level at or below the reorder threshold.
+pub fn first_below_threshold(levels: &[u32], threshold: u32) -> Option<u32> {
+    levels.iter().copied().find(|&l| l <= threshold)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parses_a_valid_line() {
+        assert_eq!(parse_line_total("3, 10").unwrap(), 30);
+    }
+
+    // Note: no test exercises the error path of parse_line_total, so the `?`
+    // propagation is never observed by the suite.
+
+    #[test]
+    fn finds_a_value_below_threshold() {
+        assert_eq!(first_below_threshold(&[9, 5, 2], 5), Some(5));
+    }
+}

From 4f1b8ebcda7952b5b0be18221902a1813361c5a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amaury=20Lev=C3=A9?= <amauryleve@microsoft.com>
Date: Thu, 25 Jun 2026 10:32:45 +0200
Subject: [PATCH 2/2] Mark InvoiceProcessor as partial to match generated part

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../fixtures/report-quality/Billing/InvoiceProcessor.cs         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.cs b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.cs
index b188a4736f..d6a65544d6 100644
--- a/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.cs
+++ b/tests/dotnet-test/test-gap-analysis/fixtures/report-quality/Billing/InvoiceProcessor.cs
@@ -1,6 +1,6 @@
 namespace Billing;
 
-public class InvoiceProcessor
+public partial class InvoiceProcessor
 {
     // Trivial auto-properties and a simple getter — no logic to mutate.
     public string CustomerName { get; set; } = string.Empty;