Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions apps/cli/src/commands/eval/statistics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -220,9 +220,9 @@ export function formatEvaluationSummary(
let verdictColor: string;
let verdictText: string;
if (allExecutionErrors) {
overallVerdict = 'INCONCLUSIVE';
overallVerdict = 'ERROR';
verdictColor = '\x1b[33m'; // yellow
verdictText = `RESULT: INCONCLUSIVE (all ${summary.total} test(s) had execution errors — no evaluation was performed)`;
verdictText = `RESULT: ERROR (all ${summary.total} test(s) had execution errors — no evaluation was performed)`;
} else {
overallVerdict = overallPassed ? 'PASS' : 'FAIL';
verdictColor = overallPassed ? '\x1b[32m' : '\x1b[31m';
Expand Down
14 changes: 7 additions & 7 deletions apps/cli/test/commands/eval/statistics-inconclusive.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ function makeResult(overrides: Partial<EvaluationResult> = {}): EvaluationResult
};
}

describe('formatEvaluationSummary — inconclusive verdict', () => {
it('shows INCONCLUSIVE when all tests are execution errors', () => {
describe('formatEvaluationSummary — error verdict', () => {
it('shows ERROR when all tests are execution errors', () => {
const results = [
makeResult({
testId: 'err-1',
Expand All @@ -46,7 +46,7 @@ describe('formatEvaluationSummary — inconclusive verdict', () => {
const summary = calculateEvaluationSummary(results);
const output = formatEvaluationSummary(summary);

expect(output).toContain('RESULT: INCONCLUSIVE');
expect(output).toContain('RESULT: ERROR');
expect(output).toContain('all 3 test(s) had execution errors');
expect(output).toContain('no evaluation was performed');
});
Expand All @@ -65,9 +65,9 @@ describe('formatEvaluationSummary — inconclusive verdict', () => {
const summary = calculateEvaluationSummary(results);
const output = formatEvaluationSummary(summary);

// Should show PASS (the one graded test passed) not INCONCLUSIVE
// Should show PASS (the one graded test passed) not ERROR
expect(output).toContain('RESULT: PASS');
expect(output).not.toContain('INCONCLUSIVE');
expect(output).not.toContain('RESULT: ERROR');
});

it('shows FAIL when there are quality failures mixed with execution errors', () => {
Expand All @@ -85,7 +85,7 @@ describe('formatEvaluationSummary — inconclusive verdict', () => {
const output = formatEvaluationSummary(summary, { threshold: 0.8 });

expect(output).toContain('RESULT: FAIL');
expect(output).not.toContain('INCONCLUSIVE');
expect(output).not.toContain('RESULT: ERROR');
});

it('shows PASS when all tests pass and none are errors', () => {
Expand All @@ -98,6 +98,6 @@ describe('formatEvaluationSummary — inconclusive verdict', () => {
const output = formatEvaluationSummary(summary);

expect(output).toContain('RESULT: PASS');
expect(output).not.toContain('INCONCLUSIVE');
expect(output).not.toContain('RESULT: ERROR');
});
});
Loading