diff --git a/apps/cli/src/commands/eval/statistics.ts b/apps/cli/src/commands/eval/statistics.ts index 2d584c55d..54d6d373c 100644 --- a/apps/cli/src/commands/eval/statistics.ts +++ b/apps/cli/src/commands/eval/statistics.ts @@ -181,7 +181,7 @@ export function calculateEvaluationSummary( } function formatScore(value: number): string { - return value.toFixed(3); + return `${Math.round(value * 100)}%`; } export function formatEvaluationSummary( @@ -226,7 +226,7 @@ export function formatEvaluationSummary( } else { overallVerdict = overallPassed ? 'PASS' : 'FAIL'; verdictColor = overallPassed ? '\x1b[32m' : '\x1b[31m'; - verdictText = `RESULT: ${overallVerdict} (${summary.passedCount}/${summary.total} scored >= ${threshold}, mean: ${formatScore(summary.mean)})`; + verdictText = `RESULT: ${overallVerdict} (${summary.passedCount}/${summary.total} scored >= ${Math.round(threshold * 100)}%, mean: ${formatScore(summary.mean)})`; } lines.push('\n=================================================='); @@ -267,7 +267,7 @@ export function formatEvaluationSummary( lines.push('\nScore distribution:'); for (const bin of summary.histogram) { const [start, end] = bin.range; - lines.push(` ${start.toFixed(1)}-${end.toFixed(1)}: ${bin.count}`); + lines.push(` ${Math.round(start * 100)}%-${Math.round(end * 100)}%: ${bin.count}`); } lines.push('\nTop performing tests:'); diff --git a/apps/cli/test/eval.integration.test.ts b/apps/cli/test/eval.integration.test.ts index 51901c844..c97599fed 100644 --- a/apps/cli/test/eval.integration.test.ts +++ b/apps/cli/test/eval.integration.test.ts @@ -208,7 +208,7 @@ describe('agentv eval CLI', () => { // Don't check stderr - it may contain stack traces or other diagnostics expect(stdout).toContain('Using target (test-file): file-target [provider=mock]'); - expect(stdout).toContain('Mean score: 0.750'); + expect(stdout).toContain('Mean score: 75%'); // Std deviation is an implementation detail - don't check it const outputPath = extractOutputPath(stdout); diff --git a/apps/cli/test/unit/matrix-summary.test.ts b/apps/cli/test/unit/matrix-summary.test.ts index 61a2a2f27..69e842041 100644 --- a/apps/cli/test/unit/matrix-summary.test.ts +++ b/apps/cli/test/unit/matrix-summary.test.ts @@ -34,8 +34,8 @@ describe('formatMatrixSummary', () => { expect(output).toContain('test-1'); expect(output).toContain('test-2'); expect(output).toContain('Average'); - expect(output).toContain('0.900'); - expect(output).toContain('0.700'); + expect(output).toContain('90%'); + expect(output).toContain('70%'); }); it('handles missing test-target pairs with dash', () => {