fix(stats): rename INCONCLUSIVE verdict to ERROR (#1002)

christso · web-flow · commit cdc01e30bbbf · 2026-04-09T14:40:48.000+10:00
* fix(stats): rename INCONCLUSIVE verdict to ERROR INCONCLUSIVE implies uncertainty; ERROR is shorter and accurately describes the situation — all tests crashed before grading. Closes #1000 * fix test: narrow ERROR assertion to RESULT: ERROR
diff --git a/apps/cli/src/commands/eval/statistics.ts b/apps/cli/src/commands/eval/statistics.ts
@@ -220,9 +220,9 @@ export function formatEvaluationSummary(
   let verdictColor: string;
   let verdictText: string;
   if (allExecutionErrors) {
-    overallVerdict = 'INCONCLUSIVE';
+    overallVerdict = 'ERROR';
     verdictColor = '\x1b[33m'; // yellow
-    verdictText = `RESULT: INCONCLUSIVE  (all ${summary.total} test(s) had execution errors — no evaluation was performed)`;
+    verdictText = `RESULT: ERROR  (all ${summary.total} test(s) had execution errors — no evaluation was performed)`;
   } else {
     overallVerdict = overallPassed ? 'PASS' : 'FAIL';
     verdictColor = overallPassed ? '\x1b[32m' : '\x1b[31m';
diff --git a/apps/cli/test/commands/eval/statistics-inconclusive.test.ts b/apps/cli/test/commands/eval/statistics-inconclusive.test.ts
@@ -20,8 +20,8 @@ function makeResult(overrides: Partial<EvaluationResult> = {}): EvaluationResult
   };
 }
 
-describe('formatEvaluationSummary — inconclusive verdict', () => {
-  it('shows INCONCLUSIVE when all tests are execution errors', () => {
+describe('formatEvaluationSummary — error verdict', () => {
+  it('shows ERROR when all tests are execution errors', () => {
     const results = [
       makeResult({
         testId: 'err-1',
@@ -46,7 +46,7 @@ describe('formatEvaluationSummary — inconclusive verdict', () => {
     const summary = calculateEvaluationSummary(results);
     const output = formatEvaluationSummary(summary);
 
-    expect(output).toContain('RESULT: INCONCLUSIVE');
+    expect(output).toContain('RESULT: ERROR');
     expect(output).toContain('all 3 test(s) had execution errors');
     expect(output).toContain('no evaluation was performed');
   });
@@ -65,9 +65,9 @@ describe('formatEvaluationSummary — inconclusive verdict', () => {
     const summary = calculateEvaluationSummary(results);
     const output = formatEvaluationSummary(summary);
 
-    // Should show PASS (the one graded test passed) not INCONCLUSIVE
+    // Should show PASS (the one graded test passed) not ERROR
     expect(output).toContain('RESULT: PASS');
-    expect(output).not.toContain('INCONCLUSIVE');
+    expect(output).not.toContain('RESULT: ERROR');
   });
 
   it('shows FAIL when there are quality failures mixed with execution errors', () => {
@@ -85,7 +85,7 @@ describe('formatEvaluationSummary — inconclusive verdict', () => {
     const output = formatEvaluationSummary(summary, { threshold: 0.8 });
 
     expect(output).toContain('RESULT: FAIL');
-    expect(output).not.toContain('INCONCLUSIVE');
+    expect(output).not.toContain('RESULT: ERROR');
   });
 
   it('shows PASS when all tests pass and none are errors', () => {
@@ -98,6 +98,6 @@ describe('formatEvaluationSummary — inconclusive verdict', () => {
     const output = formatEvaluationSummary(summary);
 
     expect(output).toContain('RESULT: PASS');
-    expect(output).not.toContain('INCONCLUSIVE');
+    expect(output).not.toContain('RESULT: ERROR');
   });
 });