Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions apps/cli/src/commands/eval/statistics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ export function calculateEvaluationSummary(
}

function formatScore(value: number): string {
return value.toFixed(3);
return `${Math.round(value * 100)}%`;
}

export function formatEvaluationSummary(
Expand Down Expand Up @@ -226,7 +226,7 @@ export function formatEvaluationSummary(
} else {
overallVerdict = overallPassed ? 'PASS' : 'FAIL';
verdictColor = overallPassed ? '\x1b[32m' : '\x1b[31m';
verdictText = `RESULT: ${overallVerdict} (${summary.passedCount}/${summary.total} scored >= ${threshold}, mean: ${formatScore(summary.mean)})`;
verdictText = `RESULT: ${overallVerdict} (${summary.passedCount}/${summary.total} scored >= ${Math.round(threshold * 100)}%, mean: ${formatScore(summary.mean)})`;
}

lines.push('\n==================================================');
Expand Down Expand Up @@ -267,7 +267,7 @@ export function formatEvaluationSummary(
lines.push('\nScore distribution:');
for (const bin of summary.histogram) {
const [start, end] = bin.range;
lines.push(` ${start.toFixed(1)}-${end.toFixed(1)}: ${bin.count}`);
lines.push(` ${Math.round(start * 100)}%-${Math.round(end * 100)}%: ${bin.count}`);
}

lines.push('\nTop performing tests:');
Expand Down
2 changes: 1 addition & 1 deletion apps/cli/test/eval.integration.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ describe('agentv eval CLI', () => {

// Don't check stderr - it may contain stack traces or other diagnostics
expect(stdout).toContain('Using target (test-file): file-target [provider=mock]');
expect(stdout).toContain('Mean score: 0.750');
expect(stdout).toContain('Mean score: 75%');
// Std deviation is an implementation detail - don't check it

const outputPath = extractOutputPath(stdout);
Expand Down
4 changes: 2 additions & 2 deletions apps/cli/test/unit/matrix-summary.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ describe('formatMatrixSummary', () => {
expect(output).toContain('test-1');
expect(output).toContain('test-2');
expect(output).toContain('Average');
expect(output).toContain('0.900');
expect(output).toContain('0.700');
expect(output).toContain('90%');
expect(output).toContain('70%');
});

it('handles missing test-target pairs with dash', () => {
Expand Down
Loading