Skip to content

Commit cdc01e3

Browse files
authored
fix(stats): rename INCONCLUSIVE verdict to ERROR (#1002)
* fix(stats): rename INCONCLUSIVE verdict to ERROR INCONCLUSIVE implies uncertainty; ERROR is shorter and accurately describes the situation — all tests crashed before grading. Closes #1000 * fix test: narrow ERROR assertion to RESULT: ERROR
1 parent d414916 commit cdc01e3

2 files changed

Lines changed: 9 additions & 9 deletions

File tree

apps/cli/src/commands/eval/statistics.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,9 +220,9 @@ export function formatEvaluationSummary(
220220
let verdictColor: string;
221221
let verdictText: string;
222222
if (allExecutionErrors) {
223-
overallVerdict = 'INCONCLUSIVE';
223+
overallVerdict = 'ERROR';
224224
verdictColor = '\x1b[33m'; // yellow
225-
verdictText = `RESULT: INCONCLUSIVE (all ${summary.total} test(s) had execution errors — no evaluation was performed)`;
225+
verdictText = `RESULT: ERROR (all ${summary.total} test(s) had execution errors — no evaluation was performed)`;
226226
} else {
227227
overallVerdict = overallPassed ? 'PASS' : 'FAIL';
228228
verdictColor = overallPassed ? '\x1b[32m' : '\x1b[31m';

apps/cli/test/commands/eval/statistics-inconclusive.test.ts

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ function makeResult(overrides: Partial<EvaluationResult> = {}): EvaluationResult
2020
};
2121
}
2222

23-
describe('formatEvaluationSummary — inconclusive verdict', () => {
24-
it('shows INCONCLUSIVE when all tests are execution errors', () => {
23+
describe('formatEvaluationSummary — error verdict', () => {
24+
it('shows ERROR when all tests are execution errors', () => {
2525
const results = [
2626
makeResult({
2727
testId: 'err-1',
@@ -46,7 +46,7 @@ describe('formatEvaluationSummary — inconclusive verdict', () => {
4646
const summary = calculateEvaluationSummary(results);
4747
const output = formatEvaluationSummary(summary);
4848

49-
expect(output).toContain('RESULT: INCONCLUSIVE');
49+
expect(output).toContain('RESULT: ERROR');
5050
expect(output).toContain('all 3 test(s) had execution errors');
5151
expect(output).toContain('no evaluation was performed');
5252
});
@@ -65,9 +65,9 @@ describe('formatEvaluationSummary — inconclusive verdict', () => {
6565
const summary = calculateEvaluationSummary(results);
6666
const output = formatEvaluationSummary(summary);
6767

68-
// Should show PASS (the one graded test passed) not INCONCLUSIVE
68+
// Should show PASS (the one graded test passed) not ERROR
6969
expect(output).toContain('RESULT: PASS');
70-
expect(output).not.toContain('INCONCLUSIVE');
70+
expect(output).not.toContain('RESULT: ERROR');
7171
});
7272

7373
it('shows FAIL when there are quality failures mixed with execution errors', () => {
@@ -85,7 +85,7 @@ describe('formatEvaluationSummary — inconclusive verdict', () => {
8585
const output = formatEvaluationSummary(summary, { threshold: 0.8 });
8686

8787
expect(output).toContain('RESULT: FAIL');
88-
expect(output).not.toContain('INCONCLUSIVE');
88+
expect(output).not.toContain('RESULT: ERROR');
8989
});
9090

9191
it('shows PASS when all tests pass and none are errors', () => {
@@ -98,6 +98,6 @@ describe('formatEvaluationSummary — inconclusive verdict', () => {
9898
const output = formatEvaluationSummary(summary);
9999

100100
expect(output).toContain('RESULT: PASS');
101-
expect(output).not.toContain('INCONCLUSIVE');
101+
expect(output).not.toContain('RESULT: ERROR');
102102
});
103103
});

0 commit comments

Comments
 (0)