From 9f99201e799d5f57288237a1965cfe896ae5a4e2 Mon Sep 17 00:00:00 2001 From: Christopher Date: Thu, 9 Apr 2026 04:11:12 +0000 Subject: [PATCH] fix(stats): include execution errors in pass/total denominator Previously the summary showed passedCount/gradedCount where gradedCount excluded execution errors, making results appear better than they were. Now shows passedCount/total to match the Convex Evals convention that inspired this design: the denominator is all tests actually attempted, not just those that reached the grading stage. Execution errors are still reported separately in the detail section. Closes #998 --- apps/cli/src/commands/eval/statistics.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/cli/src/commands/eval/statistics.ts b/apps/cli/src/commands/eval/statistics.ts index 553d14878..98abf987a 100644 --- a/apps/cli/src/commands/eval/statistics.ts +++ b/apps/cli/src/commands/eval/statistics.ts @@ -226,7 +226,7 @@ export function formatEvaluationSummary( } else { overallVerdict = overallPassed ? 'PASS' : 'FAIL'; verdictColor = overallPassed ? '\x1b[32m' : '\x1b[31m'; - verdictText = `RESULT: ${overallVerdict} (${summary.passedCount}/${gradedCount} scored >= ${threshold}, mean: ${formatScore(summary.mean)})`; + verdictText = `RESULT: ${overallVerdict} (${summary.passedCount}/${summary.total} scored >= ${threshold}, mean: ${formatScore(summary.mean)})`; } lines.push('\n==================================================');