dotnet · Evangelink · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026
diff --git a/tests/dotnet-test/grade-tests/eval.yaml b/tests/dotnet-test/grade-tests/eval.yaml
@@ -125,3 +125,101 @@ scenarios:
       - "Optionally pointed the user at `test-quality-auditor` agent or `test-anti-patterns` skill for full-suite analysis"
     reject_tools: ["edit", "create"]
     timeout: 120
+
+  # ==========================================================================
+  # Scenario 4: Go table-driven tests — idiomatic loop must NOT be flagged as
+  # conditional logic, and every grade must rest on an observable signal
+  # ==========================================================================
+
+  - name: "Grade Go table-driven tests without misreading the loop as branching"
+    prompt: |
+      Please grade each of the following Go test functions individually for
+      test quality and produce a compact per-test table (one row per test)
+      plus a short summary. They live in `calculator_test.go` and the code
+      under test is in `calculator.go`. Do not modify any files.
+
+      Test functions to grade:
+        - TestAdd_TableDriven
+        - TestDivide_ByZero
+        - TestParse_NoError
+        - TestReset_NoAssertions
+    setup:
+      files:
+        - path: "go.mod"
+          source: "fixtures/go-table-driven/go.mod"
+        - path: "calculator.go"
+          source: "fixtures/go-table-driven/calculator.go"
+        - path: "calculator_test.go"
+          source: "fixtures/go-table-driven/calculator_test.go"
+    assertions:
+      - type: "output_matches"
+        pattern: "(\\|\\s*Test\\s*\\|\\s*Grade\\s*\\|)"
+      - type: "output_matches"
+        pattern: "(TestAdd_TableDriven.*\\|\\s*A\\s*\\|)"
+      - type: "output_matches"
+        pattern: "(TestDivide_ByZero.*\\|\\s*A\\s*\\|)"
+      - type: "output_matches"
+        pattern: "(TestParse_NoError.*\\|\\s*C\\s*\\|)"
+      - type: "output_matches"
+        pattern: "(TestReset_NoAssertions.*\\|\\s*F\\s*\\|)"
+      - type: "exit_success"
+    rubric:
+      - "Detected Go and its standard `testing` package and loaded the Go language extension before grading"
+      - "Graded `TestAdd_TableDriven` as A — recognized the idiomatic table-driven subtests driven by `t.Run`"
+      - "Did NOT flag the Go table-driven `for ... range` / `if got != tt.want` loop as conditional logic or branching — it is the idiomatic assertion pattern and incurs no deduction"
+      - "Graded `TestDivide_ByZero` as A — the returned error is checked, which is a complete assertion of the error path"
+      - "Graded `TestParse_NoError` as C — only checks that no error came back and never verifies the parsed value (trivial assertion)"
+      - "Graded `TestReset_NoAssertions` as F — calls the function but never asserts via `t.Error`/`t.Fatal`"
+      - "Justified every grade with at least one observable signal from the captured test body rather than a speculative or hypothetical deduction"
+      - "Did NOT inflate deductions to justify a lower grade — started each test at A and deducted only for observable issues"
+    reject_tools: ["edit", "create"]
+    timeout: 300
+
+  # ==========================================================================
+  # Scenario 5: Production code unavailable — behavioral concerns are Unverified,
+  # not deductions, and the PR-comment report stays compact
+  # ==========================================================================
+
+  - name: "Grade tests when the production code under test is unavailable"
+    prompt: |
+      Please grade each of the following test methods individually for test
+      quality and produce a compact per-test table (one row per test) plus a
+      short summary that we can post as a PR comment. They live in
+      `Payments.Tests/PaymentGatewayTests.cs`. The production project
+      `Payments.Core` is not in this workspace, so its source is unavailable.
+      Do not modify any files.
+
+      Test methods to grade:
+        - Payments.Tests.PaymentGatewayTests.Charge_ValidCard_ReturnsApprovedResult
+        - Payments.Tests.PaymentGatewayTests.Charge_NegativeAmount_ThrowsArgumentOutOfRange
+        - Payments.Tests.PaymentGatewayTests.Refund_ExistingCharge_ReturnsReceipt
+        - Payments.Tests.PaymentGatewayTests.Settle_PendingBatch_Runs
+    setup:
+      files:
+        - path: "Payments.Tests/Payments.Tests.csproj"
+          source: "fixtures/production-unavailable/Payments.Tests/Payments.Tests.csproj"
+        - path: "Payments.Tests/PaymentGatewayTests.cs"
+          source: "fixtures/production-unavailable/Payments.Tests/PaymentGatewayTests.cs"
+    assertions:
+      - type: "output_matches"
+        pattern: "(\\|\\s*Test\\s*\\|\\s*Grade\\s*\\|)"
+      - type: "output_matches"
+        pattern: "(Charge_ValidCard_ReturnsApprovedResult.*\\|\\s*A\\s*\\|)"
+      - type: "output_matches"
+        pattern: "(Charge_NegativeAmount_ThrowsArgumentOutOfRange.*\\|\\s*A\\s*\\|)"
+      - type: "output_matches"
+        pattern: "(Refund_ExistingCharge_ReturnsReceipt.*\\|\\s*C\\s*\\|)"
+      - type: "output_matches"
+        pattern: "(Settle_PendingBatch_Runs.*\\|\\s*F\\s*\\|)"
+      - type: "output_matches"
+        pattern: "(?i)(unverified)"
+      - type: "exit_success"
+    rubric:
+      - "Did NOT penalize the tests because the production code under test (`Payments.Core`) is unavailable — marked behavioral concerns about uncovered behaviors as `Unverified` instead of deducting"
+      - "Graded `Charge_ValidCard_ReturnsApprovedResult` as A on the observable signal of its equality assertions, without inventing weaknesses that would need the unavailable production source to judge"
+      - "Graded `Charge_NegativeAmount_ThrowsArgumentOutOfRange` as A — the exception assertion is complete on its own"
+      - "Graded `Refund_ExistingCharge_ReturnsReceipt` as C — only a trivial `IsNotNull` on the receipt"
+      - "Graded `Settle_PendingBatch_Runs` as F — no assertions at all"
+      - "Kept the report compact and PR-comment-friendly — did not spill a giant (e.g. 500-row) table into the PR comment, and noted that any overflow would collapse into a `<details>` block per the row cap"
+    reject_tools: ["edit", "create"]
+    timeout: 300
diff --git a/tests/dotnet-test/grade-tests/fixtures/go-table-driven/calculator.go b/tests/dotnet-test/grade-tests/fixtures/go-table-driven/calculator.go
@@ -0,0 +1,31 @@
+package calc
+
+import "errors"
+
+// Add returns the sum of a and b.
+func Add(a, b int) int {
+	return a + b
+}
+
+// Divide returns a/b, or an error when b is zero.
+func Divide(a, b int) (int, error) {
+	if b == 0 {
+		return 0, errors.New("division by zero")
+	}
+	return a / b, nil
+}
+
+// Parse converts s to an int. It is intentionally simple for the fixture.
+func Parse(s string) (int, error) {
+	n := 0
+	for _, r := range s {
+		if r < '0' || r > '9' {
+			return 0, errors.New("not a number")
+		}
+		n = n*10 + int(r-'0')
+	}
+	return n, nil
+}
+
+// Reset clears the accumulator (no return value).
+func Reset() {}
diff --git a/tests/dotnet-test/grade-tests/fixtures/go-table-driven/calculator_test.go b/tests/dotnet-test/grade-tests/fixtures/go-table-driven/calculator_test.go
@@ -0,0 +1,62 @@
+package calc
+
+import "testing"
+
+// ============================================================
+// STRONG TEST: idiomatic table-driven test with subtests.
+// The `for` loop and the `if got != tt.want` comparison are
+// the canonical Go assertion pattern, NOT branching/conditional
+// logic in the test under grade.
+// Expected grade: A (90–100)
+// ============================================================
+func TestAdd_TableDriven(t *testing.T) {
+	tests := []struct {
+		name string
+		a, b int
+		want int
+	}{
+		{"positives", 2, 3, 5},
+		{"with zero", 0, 7, 7},
+		{"negatives", -4, -6, -10},
+		{"mixed sign", -2, 5, 3},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := Add(tt.a, tt.b)
+			if got != tt.want {
+				t.Errorf("Add(%d, %d) = %d, want %d", tt.a, tt.b, got, tt.want)
+			}
+		})
+	}
+}
+
+// ============================================================
+// STRONG TEST: error path verified by checking the returned error.
+// Expected grade: A (90–100)
+// ============================================================
+func TestDivide_ByZero(t *testing.T) {
+	_, err := Divide(10, 0)
+	if err == nil {
+		t.Fatal("expected an error dividing by zero, got nil")
+	}
+}
+
+// ============================================================
+// WEAK TEST: only checks that no error came back — does not
+// verify the parsed value. Trivial assertion.
+// Expected grade: C (70–79)
+// ============================================================
+func TestParse_NoError(t *testing.T) {
+	_, err := Parse("123")
+	if err != nil {
+		t.Errorf("unexpected error: %v", err)
+	}
+}
+
+// ============================================================
+// BAD TEST: calls the function but never asserts anything.
+// Expected grade: F (0–59)
+// ============================================================
+func TestReset_NoAssertions(t *testing.T) {
+	Reset()
+}
diff --git a/tests/dotnet-test/grade-tests/fixtures/go-table-driven/go.mod b/tests/dotnet-test/grade-tests/fixtures/go-table-driven/go.mod
@@ -0,0 +1,3 @@
+module calc
+
+go 1.22
diff --git a/...et-test/grade-tests/fixtures/production-unavailable/Payments.Tests/PaymentGatewayTests.cs b/...et-test/grade-tests/fixtures/production-unavailable/Payments.Tests/PaymentGatewayTests.cs
@@ -0,0 +1,61 @@
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+using Payments.Core; // Production assembly is NOT present in this fixture.
+
+namespace Payments.Tests;
+
+[TestClass]
+public class PaymentGatewayTests
+{
+    // ============================================================
+    // STRONG TEST: AAA structure, equality assertion on the result.
+    // Expected grade: A (90–100)
+    // ============================================================
+    [TestMethod]
+    public void Charge_ValidCard_ReturnsApprovedResult()
+    {
+        var gateway = new PaymentGateway();
+
+        var result = gateway.Charge("4111111111111111", 49.99m);
+
+        Assert.AreEqual(PaymentStatus.Approved, result.Status);
+        Assert.AreEqual(49.99m, result.AmountCharged);
+    }
+
+    // ============================================================
+    // STRONG TEST: exception path is complete on its own.
+    // Expected grade: A (90–100)
+    // ============================================================
+    [TestMethod]
+    public void Charge_NegativeAmount_ThrowsArgumentOutOfRange()
+    {
+        var gateway = new PaymentGateway();
+
+        Assert.ThrowsException<ArgumentOutOfRangeException>(
+            () => gateway.Charge("4111111111111111", -1m));
+    }
+
+    // ============================================================
+    // WEAK TEST: only a not-null check on the returned receipt.
+    // Expected grade: C (70–79)
+    // ============================================================
+    [TestMethod]
+    public void Refund_ExistingCharge_ReturnsReceipt()
+    {
+        var gateway = new PaymentGateway();
+
+        var receipt = gateway.Refund("txn-123");
+
+        Assert.IsNotNull(receipt);
+    }
+
+    // ============================================================
+    // BAD TEST: no assertions at all.
+    // Expected grade: F (0–59)
+    // ============================================================
+    [TestMethod]
+    public void Settle_PendingBatch_Runs()
+    {
+        var gateway = new PaymentGateway();
+        gateway.SettleBatch();
+    }
+}
diff --git a/...net-test/grade-tests/fixtures/production-unavailable/Payments.Tests/Payments.Tests.csproj b/...net-test/grade-tests/fixtures/production-unavailable/Payments.Tests/Payments.Tests.csproj
@@ -0,0 +1,19 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>net8.0</TargetFramework>
+    <Nullable>enable</Nullable>
+    <IsPackable>false</IsPackable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.11.1" />
+    <PackageReference Include="MSTest.TestFramework" Version="3.6.0" />
+    <PackageReference Include="MSTest.TestAdapter" Version="3.6.0" />
+  </ItemGroup>
+
+  <!-- NOTE: the production project under test (Payments.Core) is intentionally
+       NOT referenced or present in this fixture. The code under test is
+       unavailable, so behavioral concerns must be marked Unverified. -->
+
+</Project>