From 0972754315a8cdd8d28c589646d4dda220308b83 Mon Sep 17 00:00:00 2001 From: Christopher Date: Thu, 9 Apr 2026 04:30:15 +0000 Subject: [PATCH 1/2] fix(stats): rename INCONCLUSIVE verdict to ERROR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit INCONCLUSIVE implies uncertainty; ERROR is shorter and accurately describes the situation — all tests crashed before grading. Closes #1000 --- apps/cli/src/commands/eval/statistics.ts | 4 ++-- .../commands/eval/statistics-inconclusive.test.ts | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/apps/cli/src/commands/eval/statistics.ts b/apps/cli/src/commands/eval/statistics.ts index 98abf987a..2d584c55d 100644 --- a/apps/cli/src/commands/eval/statistics.ts +++ b/apps/cli/src/commands/eval/statistics.ts @@ -220,9 +220,9 @@ export function formatEvaluationSummary( let verdictColor: string; let verdictText: string; if (allExecutionErrors) { - overallVerdict = 'INCONCLUSIVE'; + overallVerdict = 'ERROR'; verdictColor = '\x1b[33m'; // yellow - verdictText = `RESULT: INCONCLUSIVE (all ${summary.total} test(s) had execution errors — no evaluation was performed)`; + verdictText = `RESULT: ERROR (all ${summary.total} test(s) had execution errors — no evaluation was performed)`; } else { overallVerdict = overallPassed ? 'PASS' : 'FAIL'; verdictColor = overallPassed ? '\x1b[32m' : '\x1b[31m'; diff --git a/apps/cli/test/commands/eval/statistics-inconclusive.test.ts b/apps/cli/test/commands/eval/statistics-inconclusive.test.ts index 76e6196dd..736b78361 100644 --- a/apps/cli/test/commands/eval/statistics-inconclusive.test.ts +++ b/apps/cli/test/commands/eval/statistics-inconclusive.test.ts @@ -20,8 +20,8 @@ function makeResult(overrides: Partial = {}): EvaluationResult }; } -describe('formatEvaluationSummary — inconclusive verdict', () => { - it('shows INCONCLUSIVE when all tests are execution errors', () => { +describe('formatEvaluationSummary — error verdict', () => { + it('shows ERROR when all tests are execution errors', () => { const results = [ makeResult({ testId: 'err-1', @@ -46,7 +46,7 @@ describe('formatEvaluationSummary — inconclusive verdict', () => { const summary = calculateEvaluationSummary(results); const output = formatEvaluationSummary(summary); - expect(output).toContain('RESULT: INCONCLUSIVE'); + expect(output).toContain('RESULT: ERROR'); expect(output).toContain('all 3 test(s) had execution errors'); expect(output).toContain('no evaluation was performed'); }); @@ -65,9 +65,9 @@ describe('formatEvaluationSummary — inconclusive verdict', () => { const summary = calculateEvaluationSummary(results); const output = formatEvaluationSummary(summary); - // Should show PASS (the one graded test passed) not INCONCLUSIVE + // Should show PASS (the one graded test passed) not ERROR expect(output).toContain('RESULT: PASS'); - expect(output).not.toContain('INCONCLUSIVE'); + expect(output).not.toContain('ERROR'); }); it('shows FAIL when there are quality failures mixed with execution errors', () => { @@ -85,7 +85,7 @@ describe('formatEvaluationSummary — inconclusive verdict', () => { const output = formatEvaluationSummary(summary, { threshold: 0.8 }); expect(output).toContain('RESULT: FAIL'); - expect(output).not.toContain('INCONCLUSIVE'); + expect(output).not.toContain('RESULT: ERROR'); }); it('shows PASS when all tests pass and none are errors', () => { @@ -98,6 +98,6 @@ describe('formatEvaluationSummary — inconclusive verdict', () => { const output = formatEvaluationSummary(summary); expect(output).toContain('RESULT: PASS'); - expect(output).not.toContain('INCONCLUSIVE'); + expect(output).not.toContain('RESULT: ERROR'); }); }); From 2b3f9d7db89cf9650d119b9a62e7b1f57d671927 Mon Sep 17 00:00:00 2001 From: Christopher Date: Thu, 9 Apr 2026 04:31:31 +0000 Subject: [PATCH 2/2] fix test: narrow ERROR assertion to RESULT: ERROR --- apps/cli/test/commands/eval/statistics-inconclusive.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/cli/test/commands/eval/statistics-inconclusive.test.ts b/apps/cli/test/commands/eval/statistics-inconclusive.test.ts index 736b78361..bec68aedb 100644 --- a/apps/cli/test/commands/eval/statistics-inconclusive.test.ts +++ b/apps/cli/test/commands/eval/statistics-inconclusive.test.ts @@ -67,7 +67,7 @@ describe('formatEvaluationSummary — error verdict', () => { // Should show PASS (the one graded test passed) not ERROR expect(output).toContain('RESULT: PASS'); - expect(output).not.toContain('ERROR'); + expect(output).not.toContain('RESULT: ERROR'); }); it('shows FAIL when there are quality failures mixed with execution errors', () => {