gregpriday · gregpriday · Feb 26, 2026 · Feb 26, 2026
diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
@@ -0,0 +1,115 @@
+name: Performance Benchmarks
+
+on:
+  # Nightly at 02:00 UTC on the default branch
+  schedule:
+    - cron: '0 2 * * *'
+
+  # Manual trigger — useful for pre-release validation or ad-hoc testing
+  workflow_dispatch:
+    inputs:
+      ref:
+        description: 'Git ref to benchmark (branch, tag, or SHA). Defaults to the workflow ref (usually the default branch).'
+        required: false
+        default: ''
+
+jobs:
+  benchmark:
+    name: Performance Budget Check
+    runs-on: ubuntu-latest
+
+    # Budget tests spin up 10k+ file trees — 30 minutes is ample
+    timeout-minutes: 30
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.inputs.ref || github.ref }}
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+          cache: 'npm'
+
+      - name: Install dependencies
+        run: npm ci
+
+      # ------------------------------------------------------------------ #
+      # Step 1: Run Jest-based budget assertion tests                        #
+      # These fail the build if any budget (duration / memory) is exceeded. #
+      # ------------------------------------------------------------------ #
+      - name: Run performance budget tests
+        id: budget_tests
+        run: npm run test:performance
+        env:
+          # Disable colour output so CI logs are clean
+          NO_COLOR: '1'
+          FORCE_COLOR: '0'
+
+      # ------------------------------------------------------------------ #
+      # Step 2: Run the file-discovery standalone benchmark                 #
+      # Produces a detailed JSON report for trend analysis.                 #
+      # Always runs even if budget tests passed, to capture baseline data.  #
+      # ------------------------------------------------------------------ #
+      - name: Run file-discovery benchmark
+        id: bench_discovery
+        if: always()
+        # Only run for 1k files in CI to keep total time manageable
+        run: |
+          mkdir -p tests/performance/results
+          node tests/performance/file-discovery.bench.js --quick --out tests/performance/results/discovery-ci.json
+        continue-on-error: true
+        env:
+          NO_COLOR: '1'
+
+      # ------------------------------------------------------------------ #
+      # Step 3: Emit GitHub Actions step summary                            #
+      # ------------------------------------------------------------------ #
+      - name: Write CI summary
+        if: always()
+        run: |
+          echo "## Performance Benchmark Results" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "| Metric | Value |" >> $GITHUB_STEP_SUMMARY
+          echo "|--------|-------|" >> $GITHUB_STEP_SUMMARY
+          echo "| Node.js | $(node --version) |" >> $GITHUB_STEP_SUMMARY
+          echo "| Runner OS | ${{ runner.os }} |" >> $GITHUB_STEP_SUMMARY
+          echo "| Ref | ${{ github.sha }} |" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+
+          # If the budget test produced result files, surface the key numbers
+          RESULT_DIR="tests/performance/results"
+          if ls "$RESULT_DIR"/*.json 2>/dev/null | grep -v discovery-ci; then
+            echo "### Budget Test Results" >> $GITHUB_STEP_SUMMARY
+            echo "" >> $GITHUB_STEP_SUMMARY
+            echo '```json' >> $GITHUB_STEP_SUMMARY
+            # Print the largest result file (most recently produced)
+            LATEST=$(ls -t "$RESULT_DIR"/budget-large-*.json 2>/dev/null | head -1)
+            if [ -n "$LATEST" ]; then
+              cat "$LATEST" >> $GITHUB_STEP_SUMMARY
+            fi
+            echo '```' >> $GITHUB_STEP_SUMMARY
+          fi
+
+          if [ -f "tests/performance/results/discovery-ci.json" ]; then
+            echo "" >> $GITHUB_STEP_SUMMARY
+            echo "### File-Discovery Benchmark" >> $GITHUB_STEP_SUMMARY
+            echo "" >> $GITHUB_STEP_SUMMARY
+            echo '```json' >> $GITHUB_STEP_SUMMARY
+            cat tests/performance/results/discovery-ci.json >> $GITHUB_STEP_SUMMARY
+            echo '```' >> $GITHUB_STEP_SUMMARY
+          fi
+
+      # ------------------------------------------------------------------ #
+      # Step 4: Upload all result JSON files as workflow artifacts           #
+      # ------------------------------------------------------------------ #
+      - name: Upload benchmark results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-results-${{ github.sha }}
+          path: tests/performance/results/
+          retention-days: 90
+          if-no-files-found: ignore
diff --git a/.gitignore b/.gitignore
@@ -51,3 +51,6 @@ output/
 # Performance profiles (generated by --profile and npm run profile:*)
 .profiles/
 !.profiles/baseline/
+
+# Performance benchmark result artifacts (generated by npm run test:performance)
+tests/performance/results/*.json
diff --git a/jest.config.js b/jest.config.js
@@ -59,6 +59,7 @@ const mockedProject = {
     '**/tests/e2e/**/*.test.js',
     '**/tests/e2e/**/*.spec.js',
     '**/tests/performance/**/*.test.js',
+    '!**/tests/performance/budget.test.js', // Requires real fs-extra — run via jest.performance.config.js
     '!**/tests/real/**/*.test.js', // Exclude real tests
     '!**/tests/integration/fileDiscoveryStage.parallel.test.js', // Requires real fs-extra and ConfigManager
     '!**/tests/integration/concurrent-operations.test.js', // Requires real ConfigManager

diff --git a/jest.performance.config.js b/jest.performance.config.js
@@ -0,0 +1,50 @@
+/**
+ * Jest configuration for performance budget tests.
+ *
+ * Intentionally does NOT use the global fs-extra mock or the mocked
+ * ConfigManager — performance tests must exercise the real filesystem so that
+ * timings are meaningful.
+ *
+ * Run with:
+ *   npm run test:performance
+ */
+
+export default {
+  testEnvironment: 'node',
+
+  // Only match files inside tests/performance/
+  testMatch: ['**/tests/performance/**/*.test.js'],
+
+  // Transform ESM files with Babel (same as main config)
+  transform: {
+    '^.+\\.(js|jsx)$': 'babel-jest',
+  },
+
+  // Allow all node_modules to be transformed so ESM packages work
+  transformIgnorePatterns: [],
+
+  // Map @/ shorthand used in some src imports
+  moduleNameMapper: {
+    '^@/(.*)$': '<rootDir>/src/$1.js',
+    // Keep lightweight UI mocks — these are never used by performance tests
+    // but prevent import errors if transitive deps pull them in.
+    '^chalk$': '<rootDir>/tests/mocks/chalk.js',
+    '^ora$': '<rootDir>/tests/mocks/ora.js',
+    '^.*/utils/logger\\.js$': '<rootDir>/tests/mocks/logger.js',
+    '^.*/config\\.js$': '<rootDir>/tests/mocks/config.js',
+  },
+
+  // Minimal setup — only env variables, no global mock overrides
+  setupFiles: ['<rootDir>/tests/setup-env.js'],
+
+  // Performance tests can take a long time (fixture generation + discovery)
+  testTimeout: 120_000, // 2 minutes per test
+
+  // Run tests sequentially so memory measurements are not skewed by concurrency
+  maxWorkers: 1,
+
+  // Clear mocks between tests (no-op here since we have no global mocks)
+  clearMocks: true,
+
+  verbose: true,
+};
diff --git a/package.json b/package.json
@@ -30,6 +30,8 @@
     "profile:heap": "node scripts/profile.js --type heap",
     "profile:compare": "node scripts/profile-compare.js",
     "benchmark": "node tests/performance/benchmark.js",
+    "benchmark:discovery": "node tests/performance/file-discovery.bench.js",
+    "test:performance": "jest --config jest.performance.config.js --runInBand",
     "lint": "eslint src/ bin/",
     "lint:fix": "eslint src/ bin/ --fix",
     "format": "prettier --write \"src/**/*.{js,jsx}\" \"bin/**/*.js\" \"tests/**/*.{js,jsx}\"",

diff --git a/tests/performance/budget.test.js b/tests/performance/budget.test.js
@@ -0,0 +1,165 @@
+/**
+ * Performance Budget Tests
+ *
+ * Enforces documented performance targets from CLAUDE.md:
+ *   - Process 10,000 files in < 30 seconds
+ *   - Memory usage < 500MB for large projects
+ *
+ * These tests use the real filesystem and real walkers — they MUST NOT
+ * run under jest.config.js's "mocked" project because fs-extra is mocked
+ * there. Use jest.performance.config.js instead.
+ *
+ * Run with:
+ *   npm run test:performance
+ */
+
+import { performance } from 'node:perf_hooks';
+import os from 'os';
+import path from 'path';
+import { readFileSync } from 'fs';
+import fs from 'fs-extra';
+import { fileURLToPath } from 'url';
+import { walkWithIgnore } from '../../src/utils/ignoreWalker.js';
+import { generateFixture, cleanupFixtures } from '../helpers/fixtureGenerator.js';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const BUDGETS_PATH = path.join(__dirname, 'budgets.json');
+const RESULTS_DIR = path.join(__dirname, 'results');
+
+// Load budgets synchronously — top-level await is not supported by babel-jest
+const budgets = JSON.parse(readFileSync(BUDGETS_PATH, 'utf8'));
+
+/**
+ * Measure file discovery performance for a given fixture.
+ *
+ * Returns duration, file count, and the DELTA RSS (how much RSS grew during
+ * the walk). Using delta rather than absolute RSS avoids false failures due
+ * to the Node.js/Jest process baseline (typically 150–200 MB).
+ *
+ * @param {string} fixturePath - Path to the synthetic project
+ * @returns {Promise<{duration: number, fileCount: number, deltaRssMb: number, baselineRssMb: number}>}
+ */
+async function measureDiscovery(fixturePath) {
+  // Optionally trigger GC before measurement if --expose-gc is set
+  if (typeof global.gc === 'function') {
+    global.gc();
+  }
+
+  const memBefore = process.memoryUsage();
+  const baselineRss = memBefore.rss;
+  const startTime = performance.now();
+  let fileCount = 0;
+  let peakRss = baselineRss;
+
+  for await (const _file of walkWithIgnore(fixturePath, {
+    ignoreFileName: '.copytreeignore',
+    includeDirectories: false,
+    followSymlinks: false,
+  })) {
+    fileCount++;
+    // Sample memory every 100 files to minimise overhead
+    if (fileCount % 100 === 0) {
+      const current = process.memoryUsage().rss;
+      if (current > peakRss) peakRss = current;
+    }
+  }
+
+  const duration = performance.now() - startTime;
+  // Final sample
+  const rssAfter = process.memoryUsage().rss;
+  if (rssAfter > peakRss) peakRss = rssAfter;
+
+  return {
+    duration,
+    fileCount,
+    deltaRssMb: (peakRss - baselineRss) / (1024 * 1024),
+    baselineRssMb: baselineRss / (1024 * 1024),
+  };
+}
+
+/**
+ * Save results to the results directory as a JSON artifact.
+ */
+async function saveResults(label, metrics) {
+  await fs.ensureDir(RESULTS_DIR);
+  const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
+  const filename = `budget-${label}-${timestamp}.json`;
+  const filepath = path.join(RESULTS_DIR, filename);
+  const payload = {
+    label,
+    timestamp: new Date().toISOString(),
+    platform: os.platform(),
+    arch: os.arch(),
+    nodeVersion: process.version,
+    metrics,
+    budgets: budgets.fileDiscovery[label],
+  };
+  await fs.writeJson(filepath, payload, { spaces: 2 });
+}
+
+// Shared fixture paths — generated once per test size, cleaned up after all
+const fixtures = {};
+
+beforeAll(async () => {
+  // Small fixture is fast to generate; medium takes a few seconds; large up to ~20s.
+  // We generate them all upfront so individual tests aren't penalised.
+  //
+  // withIgnoreFiles: false avoids random .copytreeignore files that can make the
+  // generator stop early — improving consistency of the generated file count.
+  const [small, medium, large] = await Promise.all([
+    generateFixture({ fileCount: 100, name: 'budget-small', withIgnoreFiles: false }),
+    generateFixture({ fileCount: 1000, name: 'budget-medium', withIgnoreFiles: false }),
+    generateFixture({ fileCount: 10000, name: 'budget-large', withIgnoreFiles: false }),
+  ]);
+  fixtures.small = small;
+  fixtures.medium = medium;
+  fixtures.large = large;
+}, 120_000); // Allow up to 2 minutes for fixture generation
+
+afterAll(async () => {
+  await cleanupFixtures();
+});
+
+describe('Performance Budgets — File Discovery', () => {
+  test('small project (100 files) completes within budget', async () => {
+    const budget = budgets.fileDiscovery.small;
+    const metrics = await measureDiscovery(fixtures.small.path);
+
+    await saveResults('small', metrics);
+
+    // At least 50% of generated files must be discovered (guards against degenerate fixture)
+    expect(metrics.fileCount).toBeGreaterThan(fixtures.small.fileCount * 0.5);
+    expect(metrics.duration).toBeLessThan(budget.maxDurationMs);
+    expect(metrics.deltaRssMb).toBeLessThan(budget.maxMemoryDeltaMb);
+  }, 30_000);
+
+  test('medium project (1k files) completes within budget', async () => {
+    const budget = budgets.fileDiscovery.medium;
+    const metrics = await measureDiscovery(fixtures.medium.path);
+
+    await saveResults('medium', metrics);
+
+    expect(metrics.fileCount).toBeGreaterThan(fixtures.medium.fileCount * 0.5);
+    expect(metrics.duration).toBeLessThan(budget.maxDurationMs);
+    expect(metrics.deltaRssMb).toBeLessThan(budget.maxMemoryDeltaMb);
+  }, 30_000);
+
+  test('large project (10k files) completes within budget', async () => {
+    const budget = budgets.fileDiscovery.large;
+    const metrics = await measureDiscovery(fixtures.large.path);
+
+    await saveResults('large', metrics);
+
+    // Use process.stdout.write directly — setup-env.js replaces console.log with
+    // jest.fn() which swallows output, but we want CI metric lines to surface.
+    process.stdout.write(
+      `[perf] large discovery: ${metrics.duration.toFixed(0)}ms | ` +
+        `${metrics.fileCount} files (of ${fixtures.large.fileCount} generated) | ` +
+        `+${metrics.deltaRssMb.toFixed(1)} MB RSS delta\n`,
+    );
+
+    expect(metrics.fileCount).toBeGreaterThan(fixtures.large.fileCount * 0.5);
+    expect(metrics.duration).toBeLessThan(budget.maxDurationMs);
+    expect(metrics.deltaRssMb).toBeLessThan(budget.maxMemoryDeltaMb);
+  }, 60_000);
+});
diff --git a/tests/performance/budgets.json b/tests/performance/budgets.json
@@ -0,0 +1,29 @@
+{
+  "fileDiscovery": {
+    "small": {
+      "fileCount": 100,
+      "maxDurationMs": 500,
+      "maxMemoryDeltaMb": 30,
+      "description": "Small project (100 files) should complete in < 500ms with < 30 MB RSS growth"
+    },
+    "medium": {
+      "fileCount": 1000,
+      "maxDurationMs": 3000,
+      "maxMemoryDeltaMb": 80,
+      "description": "Medium project (1k files) should complete in < 3s with < 80 MB RSS growth"
+    },
+    "large": {
+      "fileCount": 10000,
+      "maxDurationMs": 30000,
+      "maxMemoryDeltaMb": 350,
+      "description": "Large project (10k files) should complete in < 30s with < 350 MB RSS growth (total < 500 MB)"
+    }
+  },
+  "regressionThresholdPercent": 10,
+  "notes": {
+    "memoryMeasurement": "Delta RSS (peak RSS minus baseline before walk) in MB — excludes Node.js/Jest process overhead",
+    "durationMeasurement": "Wall-clock time from walk start to last file yielded (ms)",
+    "regressionPolicy": "A build fails if measured metric exceeds budget by more than regressionThresholdPercent",
+    "totalMemoryTarget": "Process target from CLAUDE.md is < 500 MB total RSS; delta budget of 350 MB leaves headroom for the ~150 MB baseline"
+  }
+}