diff --git a/apps/cli/src/commands/results/manifest.ts b/apps/cli/src/commands/results/manifest.ts
index 7a4e3d721..fb3b4e7a4 100644
--- a/apps/cli/src/commands/results/manifest.ts
+++ b/apps/cli/src/commands/results/manifest.ts
@@ -193,6 +193,7 @@ export function loadManifestResults(sourceFile: string): EvaluationResult[] {
 
 export interface LightweightResultRecord {
   readonly testId: string;
+  readonly dataset?: string;
   readonly target?: string;
   readonly experiment?: string;
   readonly score: number;
@@ -209,6 +210,7 @@ export function loadLightweightResults(sourceFile: string): LightweightResultRec
   if (isIndexManifestPath(resolvedSourceFile)) {
     return parseResultManifest(content).map((record) => ({
       testId: record.test_id ?? record.eval_id ?? 'unknown',
+      dataset: record.dataset,
       target: record.target,
       experiment: record.experiment,
       score: record.score,
@@ -244,6 +246,7 @@ export function loadLightweightResults(sourceFile: string): LightweightResultRec
 
     records.push({
       testId: rawTestId,
+      dataset: typeof record.dataset === 'string' ? record.dataset : undefined,
       target: typeof record.target === 'string' ? record.target : undefined,
       score: record.score,
       scores: Array.isArray(record.scores)
diff --git a/apps/cli/src/commands/trend/index.ts b/apps/cli/src/commands/trend/index.ts
new file mode 100644
index 000000000..edd616d77
--- /dev/null
+++ b/apps/cli/src/commands/trend/index.ts
@@ -0,0 +1,500 @@
+import path from 'node:path';
+
+import { command, flag, number, oneOf, option, optional, restPositionals, string } from 'cmd-ts';
+
+import { toSnakeCaseDeep } from '../../utils/case-conversion.js';
+import { RESULT_INDEX_FILENAME } from '../eval/result-layout.js';
+import {
+  type LightweightResultRecord,
+  loadLightweightResults,
+  resolveResultSourcePath,
+} from '../results/manifest.js';
+import { listResultFiles } from '../trace/utils.js';
+
+const colors = {
+  reset: '\x1b[0m',
+  bold: '\x1b[1m',
+  dim: '\x1b[2m',
+  green: '\x1b[32m',
+  red: '\x1b[31m',
+  yellow: '\x1b[33m',
+  cyan: '\x1b[36m',
+  gray: '\x1b[90m',
+};
+
+const noColor = process.env.NO_COLOR !== undefined || !process.stdout.isTTY;
+const c = noColor ? Object.fromEntries(Object.keys(colors).map((k) => [k, ''])) : colors;
+const ansiPattern = new RegExp(`${String.fromCharCode(27)}\\[[0-9;]*m`, 'g');
+
+export interface TrendRunRecord extends LightweightResultRecord {
+  readonly sourcePath: string;
+}
+
+export interface TrendRunPoint {
+  readonly label: string;
+  readonly path: string;
+  readonly timestamp?: string;
+  readonly matchedTestCount: number;
+  readonly meanScore: number;
+}
+
+export interface TrendFilters {
+  readonly dataset?: string;
+  readonly target?: string;
+  readonly allowMissingTests: boolean;
+}
+
+export interface TrendSummary {
+  readonly runCount: number;
+  readonly matchedTestCount: number;
+  readonly dateRange: {
+    readonly start?: string;
+    readonly end?: string;
+  };
+  readonly slope: number;
+  readonly intercept: number;
+  readonly rSquared: number;
+  readonly direction: 'degrading' | 'improving' | 'stable';
+}
+
+export interface TrendRegression {
+  readonly slopeThreshold: number;
+  readonly failOnDegrading: boolean;
+  readonly triggered: boolean;
+}
+
+export interface TrendOutput {
+  readonly runs: readonly TrendRunPoint[];
+  readonly filters: TrendFilters;
+  readonly summary: TrendSummary;
+  readonly regression: TrendRegression;
+}
+
+interface RegressionStats {
+  readonly slope: number;
+  readonly intercept: number;
+  readonly rSquared: number;
+}
+
+function stripAnsi(str: string): string {
+  return str.replace(ansiPattern, '');
+}
+
+function padRight(str: string, len: number): string {
+  const plainLen = stripAnsi(str).length;
+  return str + ' '.repeat(Math.max(0, len - plainLen));
+}
+
+function padLeft(str: string, len: number): string {
+  const plainLen = stripAnsi(str).length;
+  return ' '.repeat(Math.max(0, len - plainLen)) + str;
+}
+
+function formatSignedNumber(value: number, digits = 3): string {
+  const sign = value >= 0 ? '+' : '';
+  return `${sign}${value.toFixed(digits)}`;
+}
+
+function colorizeDirection(direction: TrendSummary['direction']): string {
+  switch (direction) {
+    case 'improving':
+      return `${c.green}${direction}${c.reset}`;
+    case 'degrading':
+      return `${c.red}${direction}${c.reset}`;
+    case 'stable':
+      return `${c.gray}${direction}${c.reset}`;
+  }
+}
+
+function colorizeSlope(value: number): string {
+  if (value > 0) {
+    return `${c.green}${formatSignedNumber(value)}${c.reset}`;
+  }
+  if (value < 0) {
+    return `${c.red}${formatSignedNumber(value)}${c.reset}`;
+  }
+  return `${c.gray}${formatSignedNumber(value)}${c.reset}`;
+}
+
+function ensureTrendIndexPath(source: string, cwd: string): string {
+  const resolved = resolveResultSourcePath(source, cwd);
+  if (path.basename(resolved) !== RESULT_INDEX_FILENAME) {
+    throw new Error(
+      `Unsupported result source for trend: ${source}. Use a run workspace directory or ${RESULT_INDEX_FILENAME} manifest.`,
+    );
+  }
+  return resolved;
+}
+
+export function resolveTrendSources(
+  cwd: string,
+  sources: readonly string[],
+  last?: number,
+): string[] {
+  if (sources.length > 0 && last !== undefined) {
+    throw new Error('Use either explicit run sources or --last, not both');
+  }
+
+  if (sources.length > 0) {
+    return sources.map((source) => ensureTrendIndexPath(source, cwd));
+  }
+
+  if (last === undefined) {
+    throw new Error('Provide one or more run workspaces or use --last <n>');
+  }
+
+  if (last < 2) {
+    throw new Error('--last must be at least 2');
+  }
+
+  const metas = listResultFiles(cwd)
+    .filter((meta) => path.basename(meta.path) === RESULT_INDEX_FILENAME)
+    .slice(0, last);
+
+  if (metas.length < 2) {
+    throw new Error(
+      'Trend analysis requires at least 2 canonical run workspaces in .agentv/results/runs/',
+    );
+  }
+
+  return metas.map((meta) => meta.path).reverse();
+}
+
+function filterRunRecords(
+  records: readonly LightweightResultRecord[],
+  sourcePath: string,
+  dataset?: string,
+  target?: string,
+): TrendRunRecord[] {
+  return records
+    .filter((record) => (dataset ? record.dataset === dataset : true))
+    .filter((record) => (target ? record.target === target : true))
+    .map((record) => ({ ...record, sourcePath }));
+}
+
+function getRunLabel(sourcePath: string, timestamp?: string): string {
+  if (timestamp) {
+    return timestamp;
+  }
+  return path.basename(path.dirname(sourcePath));
+}
+
+function getRunSortKey(sourcePath: string, timestamp?: string): string {
+  return timestamp ?? path.basename(path.dirname(sourcePath));
+}
+
+function mean(values: readonly number[]): number {
+  return values.reduce((sum, value) => sum + value, 0) / values.length;
+}
+
+function roundMetric(value: number, digits = 6): number {
+  return Number(value.toFixed(digits));
+}
+
+export function computeMatchedTestIds(
+  runs: readonly TrendRunRecord[][],
+  allowMissingTests: boolean,
+): string[] | undefined {
+  if (allowMissingTests) {
+    return undefined;
+  }
+
+  const [firstRun, ...rest] = runs;
+  const intersection = new Set(firstRun.map((record) => record.testId));
+
+  for (const run of rest) {
+    const runIds = new Set(run.map((record) => record.testId));
+    for (const testId of intersection) {
+      if (!runIds.has(testId)) {
+        intersection.delete(testId);
+      }
+    }
+  }
+
+  return [...intersection].sort();
+}
+
+export function computeRegressionStats(values: readonly number[]): RegressionStats {
+  if (values.length < 2) {
+    throw new Error('Trend analysis requires at least 2 runs');
+  }
+
+  const n = values.length;
+  const meanX = (n - 1) / 2;
+  const meanY = mean(values);
+
+  let numerator = 0;
+  let denominator = 0;
+  for (let i = 0; i < n; i++) {
+    const dx = i - meanX;
+    numerator += dx * (values[i] - meanY);
+    denominator += dx * dx;
+  }
+
+  const slope = denominator === 0 ? 0 : numerator / denominator;
+  const intercept = meanY - slope * meanX;
+
+  let ssTot = 0;
+  let ssRes = 0;
+  for (let i = 0; i < n; i++) {
+    const predicted = intercept + slope * i;
+    ssTot += (values[i] - meanY) ** 2;
+    ssRes += (values[i] - predicted) ** 2;
+  }
+
+  const rSquared = ssTot === 0 ? 1 : 1 - ssRes / ssTot;
+  return { slope, intercept, rSquared };
+}
+
+export function classifyTrendDirection(
+  slope: number,
+  slopeThreshold: number,
+): TrendSummary['direction'] {
+  if (slope <= -slopeThreshold) {
+    return 'degrading';
+  }
+  if (slope >= slopeThreshold) {
+    return 'improving';
+  }
+  return 'stable';
+}
+
+export function determineTrendExitCode(
+  direction: TrendSummary['direction'],
+  failOnDegrading: boolean,
+): number {
+  return failOnDegrading && direction === 'degrading' ? 1 : 0;
+}
+
+export function analyzeTrend(params: {
+  readonly sourcePaths: readonly string[];
+  readonly dataset?: string;
+  readonly target?: string;
+  readonly slopeThreshold: number;
+  readonly allowMissingTests: boolean;
+  readonly failOnDegrading: boolean;
+}): TrendOutput {
+  const { sourcePaths, dataset, target, slopeThreshold, allowMissingTests, failOnDegrading } =
+    params;
+
+  if (sourcePaths.length < 2) {
+    throw new Error('Trend analysis requires at least 2 runs');
+  }
+
+  const filteredRuns = sourcePaths.map((sourcePath) => {
+    const records = filterRunRecords(
+      loadLightweightResults(sourcePath),
+      sourcePath,
+      dataset,
+      target,
+    );
+    if (records.length === 0) {
+      const filters = [dataset ? `dataset=${dataset}` : '', target ? `target=${target}` : '']
+        .filter(Boolean)
+        .join(', ');
+      const suffix = filters ? ` after filtering by ${filters}` : '';
+      throw new Error(`Run has no matching records${suffix}: ${sourcePath}`);
+    }
+    return records;
+  });
+
+  const chronologicalRuns = filteredRuns
+    .map((records, index) => ({
+      sourcePath: sourcePaths[index],
+      records,
+      sortKey: getRunSortKey(sourcePaths[index], records[0]?.timestamp),
+    }))
+    .sort((a, b) => a.sortKey.localeCompare(b.sortKey));
+
+  const matchedTestIds = computeMatchedTestIds(
+    chronologicalRuns.map((run) => run.records),
+    allowMissingTests,
+  );
+  if (!allowMissingTests && (!matchedTestIds || matchedTestIds.length === 0)) {
+    throw new Error('No shared test IDs remain across the selected runs after filtering');
+  }
+
+  const runs = chronologicalRuns.map(({ records, sourcePath }) => {
+    const applicableRecords =
+      matchedTestIds === undefined
+        ? records
+        : records.filter((record) => matchedTestIds.includes(record.testId));
+
+    if (applicableRecords.length === 0) {
+      throw new Error(`Run has no matched tests after intersection: ${sourcePath}`);
+    }
+
+    return {
+      label: getRunLabel(sourcePath, applicableRecords[0]?.timestamp ?? records[0]?.timestamp),
+      path: sourcePath,
+      timestamp: applicableRecords[0]?.timestamp ?? records[0]?.timestamp,
+      matchedTestCount: applicableRecords.length,
+      meanScore: roundMetric(mean(applicableRecords.map((record) => record.score))),
+    } satisfies TrendRunPoint;
+  });
+
+  const regressionStats = computeRegressionStats(runs.map((run) => run.meanScore));
+  const direction = classifyTrendDirection(regressionStats.slope, slopeThreshold);
+
+  return {
+    runs,
+    filters: {
+      dataset,
+      target,
+      allowMissingTests,
+    },
+    summary: {
+      runCount: runs.length,
+      matchedTestCount:
+        matchedTestIds?.length ?? Math.min(...runs.map((run) => run.matchedTestCount)),
+      dateRange: {
+        start: runs[0]?.timestamp,
+        end: runs.at(-1)?.timestamp,
+      },
+      slope: roundMetric(regressionStats.slope),
+      intercept: roundMetric(regressionStats.intercept),
+      rSquared: roundMetric(regressionStats.rSquared),
+      direction,
+    },
+    regression: {
+      slopeThreshold,
+      failOnDegrading,
+      triggered: failOnDegrading && direction === 'degrading',
+    },
+  };
+}
+
+export function formatTrendTable(output: TrendOutput): string {
+  const lines: string[] = [];
+  const runLabelWidth = Math.max(3, ...output.runs.map((run) => run.label.length));
+  const scoreWidth = Math.max(10, ...output.runs.map((run) => run.meanScore.toFixed(3).length));
+  const matchWidth = Math.max(7, ...output.runs.map((run) => String(run.matchedTestCount).length));
+
+  lines.push('');
+  lines.push(`${c.bold}Trend Analysis${c.reset}`);
+  lines.push('');
+  lines.push(
+    `${c.bold}Runs:${c.reset} ${output.summary.runCount} | ${c.bold}Range:${c.reset} ${output.summary.dateRange.start ?? 'unknown'} → ${output.summary.dateRange.end ?? 'unknown'}`,
+  );
+  lines.push(
+    `${c.bold}Filters:${c.reset} dataset=${output.filters.dataset ?? '*'} target=${output.filters.target ?? '*'} mode=${output.filters.allowMissingTests ? 'independent' : 'matched-tests'}`,
+  );
+  lines.push(
+    `${c.bold}Matched Tests:${c.reset} ${output.summary.matchedTestCount} | ${c.bold}Verdict:${c.reset} ${colorizeDirection(output.summary.direction)}`,
+  );
+  lines.push('');
+
+  const header = `  ${padRight('Run', runLabelWidth)}  ${padLeft('Tests', matchWidth)}  ${padLeft('Mean Score', scoreWidth)}`;
+  lines.push(`${c.dim}${header}${c.reset}`);
+  lines.push(
+    `${c.dim}  ${'─'.repeat(runLabelWidth)}  ${'─'.repeat(matchWidth)}  ${'─'.repeat(scoreWidth)}${c.reset}`,
+  );
+
+  for (const run of output.runs) {
+    lines.push(
+      `  ${padRight(run.label, runLabelWidth)}  ${padLeft(String(run.matchedTestCount), matchWidth)}  ${padLeft(run.meanScore.toFixed(3), scoreWidth)}`,
+    );
+  }
+
+  lines.push('');
+  lines.push(
+    `${c.bold}Summary:${c.reset} slope=${colorizeSlope(output.summary.slope)} intercept=${output.summary.intercept.toFixed(3)} r²=${output.summary.rSquared.toFixed(3)}`,
+  );
+  lines.push(
+    `${c.bold}Regression Gate:${c.reset} threshold=${output.regression.slopeThreshold.toFixed(3)} fail_on_degrading=${output.regression.failOnDegrading ? 'true' : 'false'} triggered=${output.regression.triggered ? `${c.red}true${c.reset}` : 'false'}`,
+  );
+  lines.push('');
+
+  return lines.join('\n');
+}
+
+export const trendCommand = command({
+  name: 'trend',
+  description: 'Analyze score drift across multiple historical run manifests',
+  args: {
+    runs: restPositionals({
+      type: string,
+      displayName: 'runs',
+      description: 'Run workspace directories or index.jsonl manifest paths',
+    }),
+    last: option({
+      type: optional(number),
+      long: 'last',
+      description: 'Use the most recent N runs from .agentv/results/runs/',
+    }),
+    dataset: option({
+      type: optional(string),
+      long: 'dataset',
+      description: 'Filter records to a dataset name',
+    }),
+    target: option({
+      type: optional(string),
+      long: 'target',
+      description: 'Filter records to a target name',
+    }),
+    slopeThreshold: option({
+      type: optional(number),
+      long: 'slope-threshold',
+      description: 'Minimum absolute slope required to classify improving or degrading',
+    }),
+    failOnDegrading: flag({
+      long: 'fail-on-degrading',
+      description: 'Exit non-zero when the detected trend is degrading beyond the slope threshold',
+    }),
+    allowMissingTests: flag({
+      long: 'allow-missing-tests',
+      description: 'Aggregate each run independently instead of intersecting test IDs across runs',
+    }),
+    format: option({
+      type: optional(oneOf(['table', 'json'])),
+      long: 'format',
+      short: 'f',
+      description: 'Output format: table (default) or json',
+    }),
+    json: flag({
+      long: 'json',
+      description: 'Output JSON format (shorthand for --format=json)',
+    }),
+  },
+  handler: async ({
+    runs,
+    last,
+    dataset,
+    target,
+    slopeThreshold,
+    failOnDegrading,
+    allowMissingTests,
+    format,
+    json,
+  }) => {
+    const outputFormat = json ? 'json' : (format ?? 'table');
+    const effectiveSlopeThreshold = slopeThreshold ?? 0.01;
+
+    try {
+      if (effectiveSlopeThreshold < 0) {
+        throw new Error('--slope-threshold must be non-negative');
+      }
+
+      const sourcePaths = resolveTrendSources(process.cwd(), runs, last);
+      const output = analyzeTrend({
+        sourcePaths,
+        dataset,
+        target,
+        slopeThreshold: effectiveSlopeThreshold,
+        allowMissingTests,
+        failOnDegrading,
+      });
+
+      if (outputFormat === 'json') {
+        console.log(JSON.stringify(toSnakeCaseDeep(output), null, 2));
+      } else {
+        console.log(formatTrendTable(output));
+      }
+
+      process.exit(determineTrendExitCode(output.summary.direction, failOnDegrading));
+    } catch (error) {
+      console.error(`Error: ${(error as Error).message}`);
+      process.exit(1);
+    }
+  },
+});
diff --git a/apps/cli/src/index.ts b/apps/cli/src/index.ts
index a1dea9d5c..18cf70feb 100644
--- a/apps/cli/src/index.ts
+++ b/apps/cli/src/index.ts
@@ -13,6 +13,7 @@ import { resultsServeCommand } from './commands/results/serve.js';
 import { selfCommand } from './commands/self/index.js';
 import { traceCommand } from './commands/trace/index.js';
 import { transpileCommand } from './commands/transpile/index.js';
+import { trendCommand } from './commands/trend/index.js';
 import { trimCommand } from './commands/trim/index.js';
 import { validateCommand } from './commands/validate/index.js';
 import { workspaceCommand } from './commands/workspace/index.js';
@@ -35,6 +36,7 @@ export const app = subcommands({
     serve: resultsServeCommand,
     studio: resultsServeCommand,
     trace: traceCommand,
+    trend: trendCommand,
     transpile: transpileCommand,
     trim: trimCommand,
     validate: validateCommand,
@@ -64,6 +66,7 @@ const TOP_LEVEL_COMMANDS = new Set([
   'serve',
   'studio',
   'trace',
+  'trend',
   'transpile',
   'trim',
   'validate',
diff --git a/apps/cli/test/commands/trend/trend.test.ts b/apps/cli/test/commands/trend/trend.test.ts
new file mode 100644
index 000000000..2f32e184e
--- /dev/null
+++ b/apps/cli/test/commands/trend/trend.test.ts
@@ -0,0 +1,546 @@
+import { afterEach, describe, expect, it } from 'bun:test';
+import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { execa } from 'execa';
+
+import {
+  analyzeTrend,
+  classifyTrendDirection,
+  computeRegressionStats,
+  determineTrendExitCode,
+  resolveTrendSources,
+} from '../../../src/commands/trend/index.js';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+const projectRoot = path.resolve(__dirname, '../../../../..');
+const CLI_ENTRY = path.join(projectRoot, 'apps/cli/src/cli.ts');
+
+interface RunRecordInput {
+  readonly test_id: string;
+  readonly score: number;
+  readonly dataset?: string;
+  readonly target?: string;
+  readonly timestamp?: string;
+}
+
+async function createTempDir(): Promise<string> {
+  return mkdtemp(path.join(tmpdir(), 'agentv-trend-test-'));
+}
+
+async function createRunWorkspace(
+  rootDir: string,
+  runName: string,
+  records: readonly RunRecordInput[],
+): Promise<{ runDir: string; indexPath: string }> {
+  const runDir = path.join(rootDir, '.agentv', 'results', 'runs', runName);
+  await mkdir(runDir, { recursive: true });
+  const indexPath = path.join(runDir, 'index.jsonl');
+  await writeFile(
+    indexPath,
+    `${records.map((record) => JSON.stringify(record)).join('\n')}\n`,
+    'utf8',
+  );
+  return { runDir, indexPath };
+}
+
+describe('trend command', () => {
+  const cleanupDirs: string[] = [];
+
+  afterEach(async () => {
+    await Promise.all(
+      cleanupDirs.splice(0).map((dir) => rm(dir, { recursive: true, force: true })),
+    );
+  });
+
+  it('computes a degrading trend over matched tests after dataset and target filtering', async () => {
+    const cwd = await createTempDir();
+    cleanupDirs.push(cwd);
+
+    const run1 = await createRunWorkspace(cwd, '2026-03-01T10-00-00-000Z', [
+      {
+        test_id: 't1',
+        dataset: 'code-review',
+        target: 'claude-sonnet',
+        score: 0.95,
+        timestamp: '2026-03-01T10:00:00.000Z',
+      },
+      {
+        test_id: 't2',
+        dataset: 'code-review',
+        target: 'claude-sonnet',
+        score: 0.85,
+        timestamp: '2026-03-01T10:00:00.000Z',
+      },
+      {
+        test_id: 't1',
+        dataset: 'code-review',
+        target: 'gpt-5',
+        score: 0.7,
+        timestamp: '2026-03-01T10:00:00.000Z',
+      },
+    ]);
+    const run2 = await createRunWorkspace(cwd, '2026-03-08T10-00-00-000Z', [
+      {
+        test_id: 't1',
+        dataset: 'code-review',
+        target: 'claude-sonnet',
+        score: 0.85,
+        timestamp: '2026-03-08T10:00:00.000Z',
+      },
+      {
+        test_id: 't2',
+        dataset: 'code-review',
+        target: 'claude-sonnet',
+        score: 0.75,
+        timestamp: '2026-03-08T10:00:00.000Z',
+      },
+      {
+        test_id: 't1',
+        dataset: 'code-review',
+        target: 'gpt-5',
+        score: 0.8,
+        timestamp: '2026-03-08T10:00:00.000Z',
+      },
+    ]);
+    const run3 = await createRunWorkspace(cwd, '2026-03-15T10-00-00-000Z', [
+      {
+        test_id: 't1',
+        dataset: 'code-review',
+        target: 'claude-sonnet',
+        score: 0.75,
+        timestamp: '2026-03-15T10:00:00.000Z',
+      },
+      {
+        test_id: 't2',
+        dataset: 'code-review',
+        target: 'claude-sonnet',
+        score: 0.65,
+        timestamp: '2026-03-15T10:00:00.000Z',
+      },
+      {
+        test_id: 't1',
+        dataset: 'code-review',
+        target: 'gpt-5',
+        score: 0.9,
+        timestamp: '2026-03-15T10:00:00.000Z',
+      },
+    ]);
+
+    const output = analyzeTrend({
+      sourcePaths: [run1.indexPath, run2.indexPath, run3.indexPath],
+      dataset: 'code-review',
+      target: 'claude-sonnet',
+      slopeThreshold: 0.01,
+      allowMissingTests: false,
+      failOnDegrading: false,
+    });
+
+    expect(output.runs).toHaveLength(3);
+    expect(output.runs[0]?.meanScore).toBeCloseTo(0.9, 10);
+    expect(output.runs[1]?.meanScore).toBeCloseTo(0.8, 10);
+    expect(output.runs[2]?.meanScore).toBeCloseTo(0.7, 10);
+    expect(output.summary.matchedTestCount).toBe(2);
+    expect(output.summary.slope).toBeCloseTo(-0.1, 10);
+    expect(output.summary.direction).toBe('degrading');
+    expect(output.regression.triggered).toBe(false);
+  });
+
+  it('supports independent run aggregation when missing tests are allowed', async () => {
+    const cwd = await createTempDir();
+    cleanupDirs.push(cwd);
+
+    const run1 = await createRunWorkspace(cwd, '2026-03-01T10-00-00-000Z', [
+      {
+        test_id: 't1',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.8,
+        timestamp: '2026-03-01T10:00:00.000Z',
+      },
+      {
+        test_id: 't2',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.6,
+        timestamp: '2026-03-01T10:00:00.000Z',
+      },
+    ]);
+    const run2 = await createRunWorkspace(cwd, '2026-03-08T10-00-00-000Z', [
+      {
+        test_id: 't1',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.9,
+        timestamp: '2026-03-08T10:00:00.000Z',
+      },
+    ]);
+
+    const output = analyzeTrend({
+      sourcePaths: [run1.indexPath, run2.indexPath],
+      dataset: 'suite',
+      target: 'alpha',
+      slopeThreshold: 0.01,
+      allowMissingTests: true,
+      failOnDegrading: false,
+    });
+
+    expect(output.filters.allowMissingTests).toBe(true);
+    expect(output.runs.map((run) => run.matchedTestCount)).toEqual([2, 1]);
+    expect(output.summary.direction).toBe('improving');
+  });
+
+  it('rejects runs that have no matching records after target filtering', async () => {
+    const cwd = await createTempDir();
+    cleanupDirs.push(cwd);
+
+    const run1 = await createRunWorkspace(cwd, '2026-03-01T10-00-00-000Z', [
+      {
+        test_id: 't1',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.8,
+        timestamp: '2026-03-01T10:00:00.000Z',
+      },
+    ]);
+    const run2 = await createRunWorkspace(cwd, '2026-03-08T10-00-00-000Z', [
+      {
+        test_id: 't1',
+        dataset: 'suite',
+        target: 'beta',
+        score: 0.7,
+        timestamp: '2026-03-08T10:00:00.000Z',
+      },
+    ]);
+
+    expect(() =>
+      analyzeTrend({
+        sourcePaths: [run1.indexPath, run2.indexPath],
+        dataset: 'suite',
+        target: 'alpha',
+        slopeThreshold: 0.01,
+        allowMissingTests: false,
+        failOnDegrading: false,
+      }),
+    ).toThrow('Run has no matching records');
+  });
+
+  it('rejects legacy flat jsonl inputs', async () => {
+    const cwd = await createTempDir();
+    cleanupDirs.push(cwd);
+
+    const flatFile = path.join(cwd, 'results.jsonl');
+    await writeFile(flatFile, `${JSON.stringify({ test_id: 't1', score: 0.9 })}\n`, 'utf8');
+
+    expect(() => resolveTrendSources(cwd, [flatFile])).toThrow(
+      'Unsupported result source for trend',
+    );
+  });
+
+  it('discovers canonical run workspaces with --last ordering oldest to newest', async () => {
+    const cwd = await createTempDir();
+    cleanupDirs.push(cwd);
+
+    await createRunWorkspace(cwd, '2026-03-01T10-00-00-000Z', [
+      { test_id: 't1', score: 0.8, timestamp: '2026-03-01T10:00:00.000Z' },
+    ]);
+    await createRunWorkspace(cwd, '2026-03-08T10-00-00-000Z', [
+      { test_id: 't1', score: 0.85, timestamp: '2026-03-08T10:00:00.000Z' },
+    ]);
+    await createRunWorkspace(cwd, '2026-03-15T10-00-00-000Z', [
+      { test_id: 't1', score: 0.9, timestamp: '2026-03-15T10:00:00.000Z' },
+    ]);
+
+    const sources = resolveTrendSources(cwd, [], 2);
+    expect(sources).toHaveLength(2);
+    expect(sources[0]).toContain('2026-03-08T10-00-00-000Z');
+    expect(sources[1]).toContain('2026-03-15T10-00-00-000Z');
+  });
+
+  it('classifies direction and exit code using the slope threshold', () => {
+    const stats = computeRegressionStats([0.9, 0.8, 0.7]);
+    const direction = classifyTrendDirection(stats.slope, 0.01);
+
+    expect(direction).toBe('degrading');
+    expect(determineTrendExitCode(direction, false)).toBe(0);
+    expect(determineTrendExitCode(direction, true)).toBe(1);
+  });
+
+  it('emits JSON output for explicit run inputs', async () => {
+    const cwd = await createTempDir();
+    cleanupDirs.push(cwd);
+
+    const run1 = await createRunWorkspace(cwd, '2026-03-01T10-00-00-000Z', [
+      {
+        test_id: 't1',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.9,
+        timestamp: '2026-03-01T10:00:00.000Z',
+      },
+      {
+        test_id: 't2',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.8,
+        timestamp: '2026-03-01T10:00:00.000Z',
+      },
+    ]);
+    const run2 = await createRunWorkspace(cwd, '2026-03-08T10-00-00-000Z', [
+      {
+        test_id: 't1',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.8,
+        timestamp: '2026-03-08T10:00:00.000Z',
+      },
+      {
+        test_id: 't2',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.7,
+        timestamp: '2026-03-08T10:00:00.000Z',
+      },
+    ]);
+    const run3 = await createRunWorkspace(cwd, '2026-03-15T10-00-00-000Z', [
+      {
+        test_id: 't1',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.7,
+        timestamp: '2026-03-15T10:00:00.000Z',
+      },
+      {
+        test_id: 't2',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.6,
+        timestamp: '2026-03-15T10:00:00.000Z',
+      },
+    ]);
+
+    const result = await execa(
+      'bun',
+      [
+        '--no-env-file',
+        CLI_ENTRY,
+        'trend',
+        run1.runDir,
+        run2.indexPath,
+        run3.runDir,
+        '--dataset',
+        'suite',
+        '--target',
+        'alpha',
+        '--json',
+      ],
+      { cwd, reject: false },
+    );
+
+    expect(result.exitCode).toBe(0);
+    const parsed = JSON.parse(result.stdout) as Record<string, unknown>;
+    expect(parsed.filters).toEqual({
+      dataset: 'suite',
+      target: 'alpha',
+      allow_missing_tests: false,
+    });
+    expect((parsed.summary as Record<string, unknown>).direction).toBe('degrading');
+    expect((parsed.summary as Record<string, unknown>).matched_test_count).toBe(2);
+  });
+
+  it('normalizes explicit run inputs to chronological order before analysis', async () => {
+    const cwd = await createTempDir();
+    cleanupDirs.push(cwd);
+
+    const run1 = await createRunWorkspace(cwd, '2026-03-01T10-00-00-000Z', [
+      {
+        test_id: 't1',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.9,
+        timestamp: '2026-03-01T10:00:00.000Z',
+      },
+      {
+        test_id: 't2',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.8,
+        timestamp: '2026-03-01T10:00:00.000Z',
+      },
+    ]);
+    const run2 = await createRunWorkspace(cwd, '2026-03-08T10-00-00-000Z', [
+      {
+        test_id: 't1',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.8,
+        timestamp: '2026-03-08T10:00:00.000Z',
+      },
+      {
+        test_id: 't2',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.7,
+        timestamp: '2026-03-08T10:00:00.000Z',
+      },
+    ]);
+    const run3 = await createRunWorkspace(cwd, '2026-03-15T10-00-00-000Z', [
+      {
+        test_id: 't1',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.7,
+        timestamp: '2026-03-15T10:00:00.000Z',
+      },
+      {
+        test_id: 't2',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.6,
+        timestamp: '2026-03-15T10:00:00.000Z',
+      },
+    ]);
+
+    const output = analyzeTrend({
+      sourcePaths: [run3.runDir, run1.indexPath, run2.runDir],
+      dataset: 'suite',
+      target: 'alpha',
+      slopeThreshold: 0.01,
+      allowMissingTests: false,
+      failOnDegrading: false,
+    });
+
+    expect(output.runs.map((run) => run.timestamp)).toEqual([
+      '2026-03-01T10:00:00.000Z',
+      '2026-03-08T10:00:00.000Z',
+      '2026-03-15T10:00:00.000Z',
+    ]);
+    expect(output.summary.dateRange).toEqual({
+      start: '2026-03-01T10:00:00.000Z',
+      end: '2026-03-15T10:00:00.000Z',
+    });
+    expect(output.summary.direction).toBe('degrading');
+  });
+
+  it('uses --last discovery and fails CI gating on sustained degradation', async () => {
+    const cwd = await createTempDir();
+    cleanupDirs.push(cwd);
+
+    await createRunWorkspace(cwd, '2026-03-01T10-00-00-000Z', [
+      {
+        test_id: 't1',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.95,
+        timestamp: '2026-03-01T10:00:00.000Z',
+      },
+      {
+        test_id: 't2',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.85,
+        timestamp: '2026-03-01T10:00:00.000Z',
+      },
+    ]);
+    await createRunWorkspace(cwd, '2026-03-08T10-00-00-000Z', [
+      {
+        test_id: 't1',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.85,
+        timestamp: '2026-03-08T10:00:00.000Z',
+      },
+      {
+        test_id: 't2',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.75,
+        timestamp: '2026-03-08T10:00:00.000Z',
+      },
+    ]);
+    await createRunWorkspace(cwd, '2026-03-15T10-00-00-000Z', [
+      {
+        test_id: 't1',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.75,
+        timestamp: '2026-03-15T10:00:00.000Z',
+      },
+      {
+        test_id: 't2',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.65,
+        timestamp: '2026-03-15T10:00:00.000Z',
+      },
+    ]);
+
+    const result = await execa(
+      'bun',
+      [
+        '--no-env-file',
+        CLI_ENTRY,
+        'trend',
+        '--last',
+        '3',
+        '--dataset',
+        'suite',
+        '--target',
+        'alpha',
+        '--fail-on-degrading',
+        '--slope-threshold',
+        '0.01',
+      ],
+      { cwd, reject: false },
+    );
+
+    expect(result.exitCode).toBe(1);
+    expect(result.stdout).toContain('Trend Analysis');
+    expect(result.stdout).toContain('degrading');
+  });
+
+  it('errors when target filtering leaves a selected run empty in CLI mode', async () => {
+    const cwd = await createTempDir();
+    cleanupDirs.push(cwd);
+
+    await createRunWorkspace(cwd, '2026-03-01T10-00-00-000Z', [
+      {
+        test_id: 't1',
+        dataset: 'suite',
+        target: 'alpha',
+        score: 0.8,
+        timestamp: '2026-03-01T10:00:00.000Z',
+      },
+    ]);
+    await createRunWorkspace(cwd, '2026-03-08T10-00-00-000Z', [
+      {
+        test_id: 't1',
+        dataset: 'suite',
+        target: 'beta',
+        score: 0.7,
+        timestamp: '2026-03-08T10:00:00.000Z',
+      },
+    ]);
+
+    const result = await execa(
+      'bun',
+      [
+        '--no-env-file',
+        CLI_ENTRY,
+        'trend',
+        '--last',
+        '2',
+        '--dataset',
+        'suite',
+        '--target',
+        'alpha',
+      ],
+      { cwd, reject: false },
+    );
+
+    expect(result.exitCode).toBe(1);
+    expect(result.stderr).toContain('Run has no matching records');
+  });
+});
diff --git a/apps/web/src/content/docs/docs/tools/trend.mdx b/apps/web/src/content/docs/docs/tools/trend.mdx
new file mode 100644
index 000000000..0d832ece4
--- /dev/null
+++ b/apps/web/src/content/docs/docs/tools/trend.mdx
@@ -0,0 +1,158 @@
+---
+title: Trend
+description: Analyze score drift across multiple historical eval runs
+sidebar:
+  order: 2
+---
+
+The `trend` command analyzes score movement across multiple historical run manifests and reports whether quality is improving, degrading, or stable over time.
+
+Use it when pairwise `compare` is too narrow and you want to detect gradual drift across a sequence of runs.
+
+## Usage
+
+Analyze the last 8 canonical runs in the current workspace:
+
+```bash
+agentv trend --last 8
+```
+
+This is the primary day-to-day workflow. In most cases, users should start with `--last`.
+
+Filter to one dataset and target:
+
+```bash
+agentv trend --last 8 --dataset code-review --target claude-sonnet
+```
+
+Point directly at run workspaces or `index.jsonl` manifests when you need a specific historical slice or want a reproducible example:
+
+```bash
+agentv trend \
+  .agentv/results/runs/2026-03-01T10-00-00-000Z/ \
+  .agentv/results/runs/2026-03-08T10-00-00-000Z/index.jsonl \
+  .agentv/results/runs/2026-03-15T10-00-00-000Z/
+```
+
+Concrete regression-gating example:
+
+```bash
+agentv trend --last 8 --dataset code-review --target claude-sonnet \
+  --fail-on-degrading --slope-threshold 0.01
+```
+
+## Supported Inputs
+
+`trend` only accepts canonical run workspaces:
+
+- `.agentv/results/runs/<run-id>/`
+- `.agentv/results/runs/<run-id>/index.jsonl`
+
+Legacy flat `results.jsonl` files are rejected. The command stays on lightweight `index.jsonl` manifests and does not require per-test artifact hydration.
+
+## Options
+
+| Option | Description |
+|--------|-------------|
+| `--last <n>` | Use the most recent `n` runs from `.agentv/results/runs/` |
+| `--dataset <name>` | Filter records to one dataset |
+| `--target <name>` | Filter records to one target inside each run |
+| `--slope-threshold <n>` | Minimum absolute slope required to classify improving or degrading (default: `0.01`) |
+| `--fail-on-degrading` | Exit non-zero when the detected trend is degrading beyond the threshold |
+| `--allow-missing-tests` | Aggregate each run independently instead of intersecting test IDs across runs |
+| `--format`, `-f` | Output format: `table` (default) or `json` |
+| `--json` | Shorthand for `--format=json` |
+
+## How It Works
+
+1. Loads each selected `index.jsonl` manifest.
+2. Applies `dataset` and `target` filters per record.
+3. By default, reduces every run to the intersection of test IDs present in all selected runs.
+4. Computes one mean score per run.
+5. Fits a simple linear regression over run index `0..N-1`.
+6. Classifies the slope as `improving`, `degrading`, or `stable`.
+
+Strict matched-test analysis is the default because changing test composition across runs can create false drift signals.
+
+## Worked Example
+
+Suppose three historical runs for `dataset=code-review` and `target=claude-sonnet` produce matched mean scores of `0.92`, `0.86`, and `0.80`.
+
+- The slope is negative.
+- The command reports `direction=degrading`.
+- With `--fail-on-degrading --slope-threshold 0.01`, the command exits with code `1`.
+
+This is the intended CI workflow for detecting slow drift that a single pairwise comparison can miss.
+
+## Output
+
+### Table format
+
+```text
+Trend Analysis
+
+Runs: 3 | Range: 2026-03-01T10:00:00.000Z → 2026-03-15T10:00:00.000Z
+Filters: dataset=code-review target=claude-sonnet mode=matched-tests
+Matched Tests: 42 | Verdict: degrading
+
+  Run                           Tests  Mean Score
+  ----------------------------  -----  ----------
+  2026-03-01T10:00:00.000Z         42       0.920
+  2026-03-08T10:00:00.000Z         42       0.905
+  2026-03-15T10:00:00.000Z         42       0.892
+
+Summary: slope=-0.014 intercept=0.920 r²=0.943
+Regression Gate: threshold=0.010 fail_on_degrading=true triggered=true
+```
+
+### JSON format
+
+```json
+{
+  "runs": [
+    {
+      "label": "2026-03-01T10:00:00.000Z",
+      "path": "/repo/.agentv/results/runs/2026-03-01T10-00-00-000Z/index.jsonl",
+      "timestamp": "2026-03-01T10:00:00.000Z",
+      "matched_test_count": 42,
+      "mean_score": 0.92
+    }
+  ],
+  "filters": {
+    "dataset": "code-review",
+    "target": "claude-sonnet",
+    "allow_missing_tests": false
+  },
+  "summary": {
+    "run_count": 8,
+    "matched_test_count": 42,
+    "date_range": {
+      "start": "2026-03-01T10:00:00.000Z",
+      "end": "2026-03-15T10:00:00.000Z"
+    },
+    "slope": -0.014,
+    "intercept": 0.923,
+    "r_squared": 0.943,
+    "direction": "degrading"
+  },
+  "regression": {
+    "slope_threshold": 0.01,
+    "fail_on_degrading": true,
+    "triggered": true
+  }
+}
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| `0` | Informational mode, or no degrading trend triggered |
+| `1` | Invalid input, analysis error, or `--fail-on-degrading` detected a degrading trend |
+
+## Compare vs Trend
+
+- `compare` answers: "Did this run beat that run?"
+- `trend` answers: "Across many runs, are scores drifting up or down?"
+
+Use `compare` for pairwise regressions. Use `trend` for longitudinal drift detection.
diff --git a/examples/features/trend/README.md b/examples/features/trend/README.md
new file mode 100644
index 000000000..83c70afe2
--- /dev/null
+++ b/examples/features/trend/README.md
@@ -0,0 +1,95 @@
+# Trend Analysis Example
+
+This example demonstrates `agentv trend` on three historical runs for the same dataset and target.
+
+Scenario:
+
+- Dataset: `code-review`
+- Target: `claude-sonnet`
+- Test IDs tracked across runs: `summary-accuracy`, `tool-selection`
+- Outcome: scores degrade steadily from `0.92` to `0.86` to `0.80`
+
+## Files
+
+Tracked sample runs live in:
+
+```text
+sample-runs/
+  2026-03-01T10-00-00-000Z/index.jsonl
+  2026-03-08T10-00-00-000Z/index.jsonl
+  2026-03-15T10-00-00-000Z/index.jsonl
+```
+
+These are canonical run directories with `index.jsonl`.
+
+## End-User Flow
+
+Most real users will run `trend` against their latest eval history with `--last`.
+
+To reproduce that flow from this example directory, first copy the sample runs into the normal runtime layout:
+
+```bash
+mkdir -p .agentv/results/runs
+cp -R sample-runs/* .agentv/results/runs/
+```
+
+Then run:
+
+```bash
+bun ../../../apps/cli/src/cli.ts trend --last 3 --dataset code-review --target claude-sonnet
+```
+
+Expected output:
+
+```text
+Trend Analysis
+
+Runs: 3 | Range: 2026-03-01T10:00:00.000Z → 2026-03-15T10:00:00.000Z
+Filters: dataset=code-review target=claude-sonnet mode=matched-tests
+Matched Tests: 2 | Verdict: degrading
+
+  Run                         Tests  Mean Score
+  ────────────────────────  ───────  ──────────
+  2026-03-01T10:00:00.000Z        2       0.920
+  2026-03-08T10:00:00.000Z        2       0.860
+  2026-03-15T10:00:00.000Z        2       0.800
+
+Summary: slope=-0.060 intercept=0.920 r²=1.000
+Regression Gate: threshold=0.010 fail_on_degrading=false triggered=false
+```
+
+Interpretation:
+
+- The command auto-discovers the most recent three runs.
+- It filters to `dataset=code-review` and `target=claude-sonnet`.
+- It intersects matched test IDs across runs and detects a steady downward score trend.
+
+## Explicit Inputs
+
+If you want to see the same analysis without copying files into `.agentv/results/runs/`, point `trend` at the sample runs directly:
+
+```bash
+bun ../../../apps/cli/src/cli.ts trend \
+  sample-runs/2026-03-01T10-00-00-000Z \
+  sample-runs/2026-03-08T10-00-00-000Z \
+  sample-runs/2026-03-15T10-00-00-000Z \
+  --dataset code-review \
+  --target claude-sonnet
+```
+
+## CI Gate Example
+
+To turn the same analysis into a failure signal:
+
+```bash
+bun ../../../apps/cli/src/cli.ts trend \
+  sample-runs/2026-03-01T10-00-00-000Z \
+  sample-runs/2026-03-08T10-00-00-000Z \
+  sample-runs/2026-03-15T10-00-00-000Z \
+  --dataset code-review \
+  --target claude-sonnet \
+  --fail-on-degrading \
+  --slope-threshold 0.01
+```
+
+This exits with code `1` because the degrading slope magnitude exceeds `0.01`.
diff --git a/examples/features/trend/sample-runs/2026-03-01T10-00-00-000Z/index.jsonl b/examples/features/trend/sample-runs/2026-03-01T10-00-00-000Z/index.jsonl
new file mode 100644
index 000000000..8379d80e6
--- /dev/null
+++ b/examples/features/trend/sample-runs/2026-03-01T10-00-00-000Z/index.jsonl
@@ -0,0 +1,3 @@
+{"timestamp":"2026-03-01T10:00:00.000Z","test_id":"summary-accuracy","dataset":"code-review","target":"claude-sonnet","score":0.94}
+{"timestamp":"2026-03-01T10:00:00.000Z","test_id":"tool-selection","dataset":"code-review","target":"claude-sonnet","score":0.90}
+{"timestamp":"2026-03-01T10:00:00.000Z","test_id":"summary-accuracy","dataset":"code-review","target":"gpt-5","score":0.88}
diff --git a/examples/features/trend/sample-runs/2026-03-08T10-00-00-000Z/index.jsonl b/examples/features/trend/sample-runs/2026-03-08T10-00-00-000Z/index.jsonl
new file mode 100644
index 000000000..3a41da3b0
--- /dev/null
+++ b/examples/features/trend/sample-runs/2026-03-08T10-00-00-000Z/index.jsonl
@@ -0,0 +1,3 @@
+{"timestamp":"2026-03-08T10:00:00.000Z","test_id":"summary-accuracy","dataset":"code-review","target":"claude-sonnet","score":0.88}
+{"timestamp":"2026-03-08T10:00:00.000Z","test_id":"tool-selection","dataset":"code-review","target":"claude-sonnet","score":0.84}
+{"timestamp":"2026-03-08T10:00:00.000Z","test_id":"summary-accuracy","dataset":"code-review","target":"gpt-5","score":0.90}
diff --git a/examples/features/trend/sample-runs/2026-03-15T10-00-00-000Z/index.jsonl b/examples/features/trend/sample-runs/2026-03-15T10-00-00-000Z/index.jsonl
new file mode 100644
index 000000000..75dc05a21
--- /dev/null
+++ b/examples/features/trend/sample-runs/2026-03-15T10-00-00-000Z/index.jsonl
@@ -0,0 +1,3 @@
+{"timestamp":"2026-03-15T10:00:00.000Z","test_id":"summary-accuracy","dataset":"code-review","target":"claude-sonnet","score":0.82}
+{"timestamp":"2026-03-15T10:00:00.000Z","test_id":"tool-selection","dataset":"code-review","target":"claude-sonnet","score":0.78}
+{"timestamp":"2026-03-15T10:00:00.000Z","test_id":"summary-accuracy","dataset":"code-review","target":"gpt-5","score":0.91}
diff --git a/packages/core/test/evaluation/providers/cli-schema.test.ts b/packages/core/test/evaluation/providers/cli-schema.test.ts
index acff93e1e..a2646b7d4 100644
--- a/packages/core/test/evaluation/providers/cli-schema.test.ts
+++ b/packages/core/test/evaluation/providers/cli-schema.test.ts
@@ -101,7 +101,6 @@ describe('CliTargetInputSchema', () => {
     const input = { provider: 'cli', command: 'agent run {PROMPT}' };
     expect(CliTargetInputSchema.safeParse(input).success).toBe(false);
   });
-
 });
 
 describe('CliHealthcheckSchema (strict)', () => {