From 02d3b3548d3d1b580f81639c220bdd0c38a35f8b Mon Sep 17 00:00:00 2001 From: titanh3art <18174614+titanh3art@users.noreply.github.com> Date: Mon, 30 Mar 2026 16:10:42 +0530 Subject: [PATCH 1/5] perf test initial commit --- .gitignore | 5 +- .mocharc.json | 1 + package.json | 2 + test/performance/.mocharc.performance.json | 7 + .../PERFORMANCE-TESTING-STRATEGY.md | 178 +++ .../performance/ProcessingPerformance.test.ts | 1002 +++++++++++++++++ test/performance/performance-baseline.ci.json | 62 + test/tsconfig.json | 2 +- 8 files changed, 1257 insertions(+), 2 deletions(-) create mode 100644 test/performance/.mocharc.performance.json create mode 100644 test/performance/PERFORMANCE-TESTING-STRATEGY.md create mode 100644 test/performance/ProcessingPerformance.test.ts create mode 100644 test/performance/performance-baseline.ci.json diff --git a/.gitignore b/.gitignore index e026a18..27cc349 100644 --- a/.gitignore +++ b/.gitignore @@ -39,4 +39,7 @@ mta_archives/ # Tests coverage/ TEST-mocha.xml -.nyc_output/ \ No newline at end of file +.nyc_output/ + +# Performance baselines (machine-specific) +test/performance/performance-baseline.json diff --git a/.mocharc.json b/.mocharc.json index 647522c..6306198 100644 --- a/.mocharc.json +++ b/.mocharc.json @@ -1,5 +1,6 @@ { "spec": "test/**/*.test.ts", + "ignore": ["test/performance/**"], "recursive": true, "timeout": 600000, "require": ["ts-node/register/transpile-only"], diff --git a/package.json b/package.json index 38faeb3..ae9c8cf 100644 --- a/package.json +++ b/package.json @@ -23,6 +23,8 @@ "tar": "npm run build && cd gen && npm pack", "watch-data-inspector-ui": "cds watch --open data-inspector-ui/webapp/index.html?sap-ui-xx-viewCache=false", "test": "cross-env CDS_TYPESCRIPT=true mocha", + "test:performance": "cross-env CDS_TYPESCRIPT=true PERF_TESTS=1 mocha --config test/performance/.mocharc.performance.json", + "test:performance:update-baseline": "cross-env CDS_TYPESCRIPT=true PERF_TESTS=1 PERF_UPDATE_BASELINE=1 mocha --config test/performance/.mocharc.performance.json", "coverage": "cross-env CDS_TYPESCRIPT=true c8 mocha" }, "peerDependencies": { diff --git a/test/performance/.mocharc.performance.json b/test/performance/.mocharc.performance.json new file mode 100644 index 0000000..d0f5895 --- /dev/null +++ b/test/performance/.mocharc.performance.json @@ -0,0 +1,7 @@ +{ + "spec": "test/performance/**/*.test.ts", + "recursive": true, + "timeout": 600000, + "require": ["ts-node/register/transpile-only"], + "exit": true +} diff --git a/test/performance/PERFORMANCE-TESTING-STRATEGY.md b/test/performance/PERFORMANCE-TESTING-STRATEGY.md new file mode 100644 index 0000000..fd60c54 --- /dev/null +++ b/test/performance/PERFORMANCE-TESTING-STRATEGY.md @@ -0,0 +1,178 @@ +# Performance Testing Strategy — @cap-js/data-inspector + +## 1. Overview + +This document describes the performance testing strategy for the `@cap-js/data-inspector` CAP plugin. The strategy focuses on **local processing benchmarks** — measuring the CPU/memory cost of in-process data transformations performed by the plugin's core classes, with external I/O (database, network) stubbed out. + +### Why not end-to-end? + +`data-inspector` is a CDS plugin that is consumed by host CAP applications. End-to-end latency depends heavily on the host application's database, network, and authentication stack — none of which are under this plugin's control. Testing at the class/method level isolates the plugin's own computational work and produces **stable, reproducible, CI-friendly** measurements. + +## 2. Product Standards Coverage + +This testing strategy addresses the following SAP Performance Product Standards: + +| Standard | Title | How Addressed | +| ----------- | ----------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **PERF-01** | Prohibit quadratic or worse scaling | Slope-ratio analysis across 5 input sizes (10→1000) detects O(n²) growth patterns. R² coefficient verifies linearity. | +| **PERF-03** | Monitor for performance regressions | Baseline comparison with configurable regression threshold (default 30%). CI workflow runs on every PR. | +| **PERF-05** | Avoid hidden allocations | Memory delta tracking (heap usage before/after) per benchmark identifies unexpected allocation growth. | +| **PERF-11** | Use caching where appropriate | Caching effectiveness is indirectly validated via EntityDefinitionReader benchmarks — repeated entity reads exercise the WeakMap cache in CsnRuntimeExtensions; per-item cost should remain flat. | + +### Standards not applicable to first release + +| Standard | Title | Reason | +| ----------- | ------------------------------- | ------------------------------------------------------------------------------------------------------- | +| **PERF-07** | Network round-trip optimization | Plugin does not make outbound network calls; DB access is delegated to the CAP runtime. | +| **PERF-09** | Concurrent request handling | As a CDS service handler plugin, concurrency is managed by the CAP Node.js runtime, not by this plugin. | + +## 3. Architecture + +### 3.1 Test location + +``` +test/performance/ +├── .mocharc.performance.json # Mocha config (perf tests only) +├── ProcessingPerformance.test.ts # All benchmarks +├── performance-baseline.json # Local developer baseline (gitignored) +├── performance-baseline.ci.json # CI baseline (committed to repo) +└── PERFORMANCE-TESTING-STRATEGY.md # This file +``` + +### 3.2 What is benchmarked + +| Group | Benchmark | What it measures | +| ------ | ------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------- | +| **A1** | `EntityDefinitionReader.read (collection)` | Full collection read: iterate entities, build element metadata, filter hidden entities, paginate, sort, construct response | +| **A2** | `EntityDefinitionReader.read (filtered)` | Same as A1 but with `$filter=contains(name, ...)` to measure filter parsing overhead | +| **A3** | `EntityDefinitionReader._getEntityElements (via read)` | Element extraction scaling: one entity with N elements (N = 10→1000) | +| **B1** | `DataReader.read (response construction, DB stubbed)` | Response loop after DB query: entity resolution, key construction, record transformation. DB returns pre-built synthetic records. | +| **B2** | `DataReader._emitAuditlogs (stubbed audit-log)` | Audit log emission with sensitive data fields. Audit-log service is stubbed; measures per-record processing overhead. | + +### 3.3 Measurement methodology + +For each benchmark, measurements are taken across 5 input sizes: **10, 50, 100, 500, 1000**. + +For each size: +1. **Warmup** — 10 runs (configurable) to stabilize JIT +2. **Measurement** — 30 total runs (20 kept + 10 extra for outlier trimming) +3. **Outlier removal** — Runs are sorted by distance from preliminary mean; the 50% extra runs furthest from the mean are discarded +4. **Statistics** — Median, mean, standard deviation, 95% confidence interval, CV% + +### 3.4 Scaling analysis + +Three complementary metrics detect non-linear scaling: + +| Metric | What it detects | Threshold | +| ------------------------------------- | ---------------------------------------------------------- | ------------------------------------ | +| **Slope ratio** | Ratio of last slope segment to first. O(n) = ~1.0 | 🟢 ≤ 2.0 / 🟡 2.0–4.0 / 🔴 > 4.0 | +| **R² (coefficient of determination)** | How well medians fit a straight line. 1.0 = perfect linear | 🟢 ≥ 0.995 / 🟡 0.98–0.995 / 🔴 < 0.98 | +| **Per-item time** | Time per item at max size; detects absolute overhead | Compared to baseline (30% tolerance) | + +### 3.5 Baseline management + +Two baselines are maintained, following the same pattern as `ai-log-analyzer`: + +| File | Git status | Purpose | +| ------------------------------ | -------------- | ------------------------------------------- | +| `performance-baseline.json` | **gitignored** | Local developer baseline (machine-specific) | +| `performance-baseline.ci.json` | **committed** | CI baseline (shared, versioned reference) | + +- **Local**: Run `npm run test:performance:update-baseline` to create `performance-baseline.json` for your machine +- **CI**: The rebaseline workflow (`performance-rebaseline.yml`) runs benchmarks on CI hardware and commits `performance-baseline.ci.json` back to the repo. The PR workflow reads this committed file via `PERF_BASELINE_FILE=performance-baseline.ci.json` +- Local baselines are **machine-specific** (gitignored) because absolute timings vary by hardware +- The CI baseline is **committed** so it is reproducible, auditable via `git log`, and immune to cache eviction +- The first run without a baseline gracefully skips (no failure) + +### 3.6 Regression detection + +When a baseline exists, each benchmark result is compared: + +1. **Per-item time** at maximum size must not exceed `baseline × (1 + MAX_REGRESSION)` (default: +30%) +2. **Slope ratio** must not exceed `baseline × (1 + MAX_SLOPE_VARIANCE)` (default: +30%) + +#### Warn-only behavior (by design) + +Regressions are surfaced via `console.warn` — **they do not fail the test**. The test only fails if no benchmarks run at all. This is intentional and consistent with [ai-log-analyzer](https://github.tools.sap/erp4sme/ai-log-analyzer)'s approach, for the following reasons: + +- **CI hardware variance**: GitHub Actions shared runners have noisy neighbors, variable CPU clock speeds, and occasional GC pauses. Even with a 30% threshold and outlier trimming, hard failures would produce flaky CI. +- **Primary value is scaling detection**: The slope ratio and R² metrics detect O(n²) bugs, which produce dramatic regressions (10x+). These are obvious even in warn-only mode. +- **Per-item regression is informational**: Absolute timing depends on hardware; a 30% regression on CI may not reproduce locally. + +Warnings appear in the CI console output and in the performance report files (`coverage/performance-report.md`), so PR reviewers can investigate if they see them. + +#### Evolving to a hard gate (future) + +If a hard gate is desired in the future: +1. Change `console.warn` to `expect` assertions in the regression checks +2. Consider increasing the threshold to 50% for CI to absorb more noise +3. Alternatively, add a separate CI job with `continue-on-error: true` so it shows as a yellow check (not a red X) — signaling "review needed" without blocking merge + +## 4. Running the Tests + +### Local development + +```bash +# First time: create your machine's baseline +npm run test:performance:update-baseline + +# Subsequent runs: compare against baseline +npm run test:performance +``` + +### Environment variables + +| Variable | Default | Description | +| --------------------------- | --------------------------- | ---------------------------------------------- | +| `PERF_TESTS` | `0` | Set to `1` to enable performance tests | +| `PERF_UPDATE_BASELINE` | `0` | Set to `1` to write new baseline after run | +| `PERF_MAX_REGRESSION` | `0.3` | Maximum allowed per-item time regression (30%) | +| `PERF_MAX_SLOPE_VARIANCE` | `0.3` | Maximum allowed slope ratio increase (30%) | +| `PERF_WARMUP_RUNS` | `10` | Warmup iterations before measurement | +| `PERF_MEASUREMENT_RUNS` | `20` | Measurement iterations (kept after trimming) | +| `PERF_OUTLIER_TRIM_PERCENT` | `0.5` | Extra runs as fraction of measurement runs | +| `PERF_BASELINE_FILE` | `performance-baseline.json` | Baseline filename | + +### CI workflows + +| Workflow | Trigger | Purpose | +| ---------------------------- | -------------------------------------------------------- | -------------------------------------------------------------- | +| `performance-tests.yml` | PR to `main` (when srv/, lib/, test/performance/ change) | Run benchmarks, compare to committed CI baseline, log warnings | +| `performance-rebaseline.yml` | Manual dispatch | Run benchmarks on CI and commit `performance-baseline.ci.json` | + +## 5. Reports + +After each run, two report files are generated in `coverage/`: + +- **`performance-report.json`** — Machine-readable full results +- **`performance-report.md`** — Human-readable markdown with emoji indicators + +The markdown report includes: +- Environment details (Node version, CPU, memory, load) +- Test configuration (warmup, measurement, trim settings) +- System warnings (high CPU load, memory pressure) +- Results table with timing medians, CV%, per-item times, memory deltas, slope ratios, R², and baseline comparisons +- Legend explaining all indicators + +## 6. Synthetic Data Design + +All benchmarks use **synthetic data** rather than real CDS models: + +- **Entities**: Generated with configurable element counts, including keys, typed fields, hidden elements, associations, and various annotations (`@HideFromDataInspector`, `@PersonalData.IsPotentiallySensitive`, `@Core.Computed`) +- **Records**: Generated with configurable field counts, simulating realistic DB query results with UUIDs, strings, integers, and booleans +- **CDS Runtime**: `cds.model.all()`, `cds.services.db.run()`, `cds.parse.expr()`, and `cds.connect.to()` are monkey-patched per benchmark to return synthetic data, isolating the plugin's processing from actual CDS bootstrapping + +This approach ensures: +- No dependency on database state +- Deterministic, reproducible inputs +- Configurable scaling (the `sizes` array can be adjusted) +- Fast execution (no CDS server boot required) + +## 7. Future Enhancements + +As the plugin evolves, consider adding: + +1. **Memory profiling benchmarks** — Track heap growth across repeated operations to detect memory leaks (PERF-05 deeper coverage) +2. **Concurrent simulation** — If the plugin adds stateful processing, add benchmarks that simulate concurrent request patterns +3. **Larger scale tests** — Extend the sizes array to [100, 500, 1000, 5000, 10000] if real-world deployments involve very large CDS models +4. **UI rendering benchmarks** — If the SAPUI5 frontend becomes a performance concern, add browser-based benchmarks using Puppeteer \ No newline at end of file diff --git a/test/performance/ProcessingPerformance.test.ts b/test/performance/ProcessingPerformance.test.ts new file mode 100644 index 0000000..76a42db --- /dev/null +++ b/test/performance/ProcessingPerformance.test.ts @@ -0,0 +1,1002 @@ +import cds from "@sap/cds"; +import { performance } from "perf_hooks"; +import fs from "fs"; +import path from "path"; +import os from "os"; +import { expect } from "chai"; + +import { EntityDefinitionReader } from "../../srv/EntityDefinitionReader"; +import { DataReader } from "../../srv/DataReader"; + +// --------------------------------------------------------------------------- +// Configuration (env-overridable) +// --------------------------------------------------------------------------- +const PERF_ENABLED = process.env.PERF_TESTS === "1"; +const UPDATE_BASELINE = process.env.PERF_UPDATE_BASELINE === "1"; +const MAX_REGRESSION = Number(process.env.PERF_MAX_REGRESSION ?? "0.3"); +const MAX_SLOPE_VARIANCE = Number(process.env.PERF_MAX_SLOPE_VARIANCE ?? "0.3"); +const WARMUP_RUNS = Number(process.env.PERF_WARMUP_RUNS ?? "10"); +const MEASUREMENT_RUNS = Number(process.env.PERF_MEASUREMENT_RUNS ?? "20"); +const OUTLIER_TRIM_PERCENT = Number(process.env.PERF_OUTLIER_TRIM_PERCENT ?? "0.5"); + +const BASELINE_FILENAME = process.env.PERF_BASELINE_FILE ?? "performance-baseline.json"; +const BASELINE_PATH = path.resolve(__dirname, BASELINE_FILENAME); +const REPORT_PATH = path.resolve(__dirname, "..", "..", "coverage", "performance-report.json"); +const REPORT_MD_PATH = path.resolve(__dirname, "..", "..", "coverage", "performance-report.md"); + +const describePerf = PERF_ENABLED ? describe : describe.skip; + +// Sizes: number of synthetic entities for EntityDefinitionReader, +// number of synthetic records for DataReader +const sizes = [10, 50, 100, 500, 1000]; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- +type MeasurementStats = { + median: number; + mean: number; + stdDev: number; + min: number; + max: number; + confidenceInterval: number; +}; + +type BenchmarkResult = { + name: string; + sizes: number[]; + timingsMs: number[]; + timingStats: MeasurementStats[]; + perItemMs: number[]; + slopes: number[]; + slopeRatio: number; + r2: number; + memoryDeltaMB: number[]; +}; + +type BaselineEntry = { + sizes: number[]; + perItemMsAtMax: number; + slopeRatio: number; + r2?: number; +}; + +type BaselineData = Record; + +type TestConfig = { + warmupRuns: number; + measurementRuns: number; + outlierTrimPercent: number; + totalRunsPerSize: number; +}; + +type Report = { + timestamp: string; + sizes: number[]; + results: BenchmarkResult[]; + baseline?: BaselineData; + regressionThreshold: number; + slopeVarianceThreshold: number; + testConfig: TestConfig; + environment: { + node: string; + platform: string; + cpus: string; + totalMemoryGB: number; + cpuLoad: number[]; + }; + systemWarnings: string[]; +}; + +// --------------------------------------------------------------------------- +// Statistics helpers +// --------------------------------------------------------------------------- +const median = (values: number[]): number => { + const sorted = [...values].sort((a, b) => a - b); + const mid = Math.floor(sorted.length / 2); + return sorted.length % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid]; +}; + +const mean = (values: number[]): number => + values.reduce((sum, val) => sum + val, 0) / values.length; + +const stdDev = (values: number[]): number => { + const avg = mean(values); + const squareDiffs = values.map((value) => Math.pow(value - avg, 2)); + return Math.sqrt(mean(squareDiffs)); +}; + +const calculateStats = (values: number[]): MeasurementStats => { + const sorted = [...values].sort((a, b) => a - b); + const avg = mean(values); + const sd = stdDev(values); + const ci = 1.96 * (sd / Math.sqrt(values.length)); + return { + median: median(values), + mean: avg, + stdDev: sd, + min: sorted[0], + max: sorted[sorted.length - 1], + confidenceInterval: ci, + }; +}; + +const checkSystemState = (): string[] => { + const warnings: string[] = []; + const loadAvg = os.loadavg(); + const cpuCount = os.cpus().length; + if (loadAvg[0] > cpuCount * 0.7) { + warnings.push( + `High CPU load detected: ${loadAvg[0].toFixed(2)} (${cpuCount} CPUs). Results may be unreliable.` + ); + } + const freeMemGB = os.freemem() / 1024 ** 3; + const totalMemGB = os.totalmem() / 1024 ** 3; + const memUsagePercent = ((totalMemGB - freeMemGB) / totalMemGB) * 100; + if (memUsagePercent > 85) { + warnings.push( + `High memory usage: ${memUsagePercent.toFixed(1)}% (${freeMemGB.toFixed(1)}GB free of ${totalMemGB.toFixed(1)}GB).` + ); + } + return warnings; +}; + +// --------------------------------------------------------------------------- +// Measurement +// --------------------------------------------------------------------------- +const measureAsync = async ( + fn: () => Promise, + runs: number +): Promise<{ timings: number[]; stats: MeasurementStats; memoryDeltaMB: number }> => { + const extraRuns = Math.ceil(runs * OUTLIER_TRIM_PERCENT); + const totalRuns = runs + extraRuns; + const allTimings: number[] = []; + const memBefore = process.memoryUsage(); + + for (let i = 0; i < totalRuns; i++) { + const start = performance.now(); + await fn(); + const end = performance.now(); + allTimings.push(end - start); + } + + const memAfter = process.memoryUsage(); + const memoryDeltaMB = (memAfter.heapUsed - memBefore.heapUsed) / (1024 * 1024); + + const preliminaryMean = mean(allTimings); + const timingsWithDistance = allTimings.map((timing) => ({ + timing, + distance: Math.abs(timing - preliminaryMean), + })); + timingsWithDistance.sort((a, b) => a.distance - b.distance); + const trimmedTimings = timingsWithDistance + .slice(0, runs) + .map((t) => t.timing) + .sort((a, b) => a - b); + + return { timings: trimmedTimings, stats: calculateStats(trimmedTimings), memoryDeltaMB }; +}; + +const measureSync = ( + fn: () => void, + runs: number +): { timings: number[]; stats: MeasurementStats; memoryDeltaMB: number } => { + const extraRuns = Math.ceil(runs * OUTLIER_TRIM_PERCENT); + const totalRuns = runs + extraRuns; + const allTimings: number[] = []; + const memBefore = process.memoryUsage(); + + for (let i = 0; i < totalRuns; i++) { + const start = performance.now(); + fn(); + const end = performance.now(); + allTimings.push(end - start); + } + + const memAfter = process.memoryUsage(); + const memoryDeltaMB = (memAfter.heapUsed - memBefore.heapUsed) / (1024 * 1024); + + const preliminaryMean = mean(allTimings); + const timingsWithDistance = allTimings.map((timing) => ({ + timing, + distance: Math.abs(timing - preliminaryMean), + })); + timingsWithDistance.sort((a, b) => a.distance - b.distance); + const trimmedTimings = timingsWithDistance + .slice(0, runs) + .map((t) => t.timing) + .sort((a, b) => a - b); + + return { timings: trimmedTimings, stats: calculateStats(trimmedTimings), memoryDeltaMB }; +}; + +const computeSlopes = (times: number[], sizeValues: number[]): number[] => { + const slopes: number[] = []; + for (let i = 1; i < times.length; i++) { + const deltaT = times[i] - times[i - 1]; + const deltaN = sizeValues[i] - sizeValues[i - 1]; + slopes.push(deltaT / deltaN); + } + return slopes; +}; + +const computeR2 = (times: number[], sizeValues: number[]): number => { + const n = times.length; + if (n < 2) return 1; + const meanX = sizeValues.reduce((sum, x) => sum + x, 0) / n; + const meanY = times.reduce((sum, y) => sum + y, 0) / n; + let numerator = 0; + let denominator = 0; + for (let i = 0; i < n; i++) { + const dx = sizeValues[i] - meanX; + numerator += dx * (times[i] - meanY); + denominator += dx * dx; + } + const slope = denominator === 0 ? 0 : numerator / denominator; + const intercept = meanY - slope * meanX; + let ssRes = 0; + let ssTot = 0; + for (let i = 0; i < n; i++) { + const predicted = slope * sizeValues[i] + intercept; + ssRes += (times[i] - predicted) ** 2; + ssTot += (times[i] - meanY) ** 2; + } + return ssTot === 0 ? 1 : 1 - ssRes / ssTot; +}; + +// --------------------------------------------------------------------------- +// Report building +// --------------------------------------------------------------------------- +const slopeRatioEmoji = (ratio: number): string => { + if (ratio <= 2.0) return "🟢"; + if (ratio <= 4.0) return "🟡"; + return "🔴"; +}; + +const r2Emoji = (r2: number): string => { + if (r2 >= 0.995) return "🟢"; + if (r2 >= 0.98) return "🟡"; + return "🔴"; +}; + +const cvEmoji = (cv: number): string => { + if (cv <= 5) return "🟢"; + if (cv <= 15) return "🟡"; + return "🔴"; +}; + +const formatNumber = (value: number, digits: number): string => value.toFixed(digits); +const formatList = (values: number[], digits: number): string => + values.map((v) => formatNumber(v, digits)).join(", "); + +const buildMarkdownReport = (report: Report): string => { + const lines: string[] = []; + lines.push(`# Performance Report (${report.timestamp})`); + lines.push(""); + lines.push("## Environment"); + lines.push(""); + lines.push(`- Node: ${report.environment.node}`); + lines.push(`- Platform: ${report.environment.platform}`); + lines.push(`- CPU: ${report.environment.cpus}`); + lines.push(`- Memory: ${report.environment.totalMemoryGB.toFixed(1)} GB`); + lines.push(`- CPU Load: ${report.environment.cpuLoad.map((l) => l.toFixed(2)).join(", ")}`); + lines.push(""); + lines.push("## Test Configuration"); + lines.push(""); + lines.push(`- Warmup runs: ${report.testConfig.warmupRuns}`); + lines.push(`- Measurement runs: ${report.testConfig.measurementRuns}`); + lines.push( + `- Outlier trim: ${(report.testConfig.outlierTrimPercent * 100).toFixed(0)}% extra (${report.testConfig.totalRunsPerSize - report.testConfig.measurementRuns} trimmed)` + ); + lines.push(`- Total runs per size: ${report.testConfig.totalRunsPerSize}`); + + if (report.systemWarnings.length > 0) { + lines.push(""); + lines.push("### System Warnings"); + lines.push(""); + report.systemWarnings.forEach((w) => lines.push(`- ${w}`)); + } + + lines.push(""); + lines.push("## Results"); + lines.push(""); + lines.push( + "| Benchmark | Timings ms (median) | Variance (CV%) | Per-item ms | Memory ΔMB | Slope ratio | R² | Baseline per-item max | Baseline slope ratio | Baseline R² |" + ); + lines.push("| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |"); + + for (const result of report.results) { + const baseline = report.baseline?.[result.name]; + const baselinePerItem = baseline ? formatNumber(baseline.perItemMsAtMax, 7) : "n/a"; + const baselineSlope = baseline ? formatNumber(baseline.slopeRatio, 4) : "n/a"; + const baselineR2 = baseline?.r2 !== undefined ? formatNumber(baseline.r2, 4) : "n/a"; + const avgCV = + result.timingStats.map((s) => (s.stdDev / s.mean) * 100).reduce((sum, cv) => sum + cv, 0) / + result.timingStats.length; + + lines.push( + [ + result.name, + formatList(result.timingsMs, 2), + formatNumber(avgCV, 1) + "% " + cvEmoji(avgCV), + formatList(result.perItemMs, 7), + formatList(result.memoryDeltaMB, 2), + formatNumber(result.slopeRatio, 4) + " " + slopeRatioEmoji(result.slopeRatio), + formatNumber(result.r2, 4) + " " + r2Emoji(result.r2), + baselinePerItem, + baselineSlope, + baselineR2, + ].join(" | ") + ); + } + + lines.push(""); + lines.push("## Legend"); + lines.push(""); + lines.push("### Slope ratio"); + lines.push(""); + lines.push( + "Ratio of the last slope segment to the first. A perfectly linear O(n) function scores 1.0." + ); + lines.push(""); + lines.push("| Indicator | Range | Meaning |"); + lines.push("| --- | --- | --- |"); + lines.push("| 🟢 | ≤ 2.0 | Consistent with O(n) linear scaling |"); + lines.push("| 🟡 | 2.0 – 4.0 | Suspicious — possible mild super-linear growth |"); + lines.push("| 🔴 | > 4.0 | Clearly non-linear (O(n²) or worse) |"); + lines.push(""); + lines.push("### CV% (Coefficient of Variation)"); + lines.push(""); + lines.push("Average CV across all measured sizes. Measures measurement stability."); + lines.push(""); + lines.push("| Indicator | Range | Meaning |"); + lines.push("| --- | --- | --- |"); + lines.push("| 🟢 | ≤ 5% | Stable — measurements are repeatable |"); + lines.push("| 🟡 | 5% – 15% | Acceptable for Node.js |"); + lines.push("| 🔴 | > 15% | High noise — results unreliable |"); + lines.push(""); + lines.push("### R² (Coefficient of Determination)"); + lines.push(""); + lines.push("1.0 = medians fall perfectly on a straight line."); + lines.push(""); + lines.push("| Indicator | Range | Meaning |"); + lines.push("| --- | --- | --- |"); + lines.push("| 🟢 | ≥ 0.995 | Excellent linear fit |"); + lines.push("| 🟡 | 0.980 – 0.995 | Minor deviation from linearity |"); + lines.push("| 🔴 | < 0.980 | Clearly non-linear scaling |"); + + return lines.join("\n"); +}; + +// --------------------------------------------------------------------------- +// Synthetic data generators +// --------------------------------------------------------------------------- + +/** + * Builds an array of synthetic CDS-like entity definitions for EntityDefinitionReader benchmarks. + * Each entity has a configurable number of elements (default 10) to simulate realistic CDS models. + */ +function buildSyntheticEntities(count: number, elementsPerEntity: number = 10): any[] { + const entities: any[] = []; + for (let i = 0; i < count; i++) { + const elements: Record = {}; + // First element is always the key + elements[`id_${i}`] = { + type: "cds.UUID", + key: true, + "@HideFromDataInspector": false, + }; + for (let j = 1; j < elementsPerEntity; j++) { + elements[`field_${i}_${j}`] = { + type: j % 3 === 0 ? "cds.Integer" : j % 3 === 1 ? "cds.String" : "cds.Boolean", + key: false, + length: j % 3 === 1 ? 255 : undefined, + default: j % 5 === 0 ? { val: "default" } : undefined, + notNull: j % 4 === 0, + "@PersonalData.IsPotentiallySensitive": j % 7 === 0, + "@Core.Computed": j % 9 === 0, + "@HideFromDataInspector": false, + }; + } + // Add a hidden element (should be filtered out) + elements[`hidden_${i}`] = { + type: "cds.String", + "@HideFromDataInspector": true, + }; + // Add an association (should be filtered out) + elements[`assoc_${i}`] = { + type: "cds.Association", + }; + + entities.push({ + name: `perf.test.Entity_${i}`, + "@title": i % 3 === 0 ? `Entity ${i} Title` : undefined, + "@HideFromDataInspector": false, + elements, + // Simulate the CsnRuntimeExtensions properties + get dataSource4DataInspector() { + return i % 2 === 0 ? "db" : "service"; + }, + get keyElements4DataInspector() { + return [`id_${i}`]; + }, + }); + } + return entities; +} + +/** + * Builds an array of synthetic database records for DataReader response-transformation benchmarks. + */ +function buildSyntheticRecords(count: number, fieldsPerRecord: number = 10): any[] { + const records: any[] = []; + for (let i = 0; i < count; i++) { + const record: Record = { id: `uuid-${i}` }; + for (let j = 1; j < fieldsPerRecord; j++) { + record[`field_${j}`] = j % 3 === 0 ? i * j : j % 3 === 1 ? `value_${i}_${j}` : i % 2 === 0; + } + records.push(record); + } + // Simulate the CDS $count property on the array + (records as any).$count = count; + return records; +} + +/** + * Creates a mock cds.Request object for EntityDefinitionReader.read() benchmarks. + * Simulates a collection GET with $select=* and optional $filter. + */ +function buildEntityDefinitionRequest(options?: { + filter?: string; + orderby?: string; + skip?: number; + top?: number; +}): any { + const columns = ["*"]; + const req: any = { + params: [], + query: { + SELECT: { + columns, + count: true, + orderBy: options?.orderby + ? [{ ref: [options.orderby.split(" ")[0]], sort: options.orderby.split(" ")[1] || "asc" }] + : undefined, + }, + }, + req: { + query: { + $filter: options?.filter, + $orderby: options?.orderby, + $skip: options?.skip !== undefined ? String(options.skip) : undefined, + $top: options?.top !== undefined ? String(options.top) : undefined, + }, + }, + reject: (code: number, msg: string) => { + throw new Error(`Request rejected: ${code} ${msg}`); + }, + }; + return req; +} + +/** + * Creates a mock cds.Request object for DataReader.read() response-construction benchmarks. + */ +function buildDataReadRequest(entityName: string): any { + const columns = ["*"]; + return { + params: [], + query: { + SELECT: { + columns, + count: true, + }, + }, + req: { + query: { + $filter: `entityName = '${entityName}'`, + $skip: "0", + $top: "1000", + }, + }, + reject: (code: number, msg: string) => { + throw new Error(`Request rejected: ${code} ${msg}`); + }, + }; +} + +// --------------------------------------------------------------------------- +// Benchmark runner +// --------------------------------------------------------------------------- +const benchmarkSync = (name: string, runFn: (size: number) => void): BenchmarkResult => { + const timingsMs: number[] = []; + const timingStats: MeasurementStats[] = []; + const memoryDeltaMB: number[] = []; + + console.log(` Benchmarking ${name}...`); + for (const size of sizes) { + const totalRuns = MEASUREMENT_RUNS + Math.ceil(MEASUREMENT_RUNS * OUTLIER_TRIM_PERCENT); + process.stdout.write(` Size ${size}: warmup (${WARMUP_RUNS} runs)...`); + + for (let w = 0; w < WARMUP_RUNS; w++) { + runFn(size); + } + + process.stdout.write(` measuring (${totalRuns} runs)...`); + const measurement = measureSync(() => runFn(size), MEASUREMENT_RUNS); + + timingsMs.push(measurement.stats.median); + timingStats.push(measurement.stats); + memoryDeltaMB.push(measurement.memoryDeltaMB); + + const cv = (measurement.stats.stdDev / measurement.stats.mean) * 100; + const cvWarning = cv > 20 ? " ! HIGH VARIANCE" : ""; + console.log( + ` ✓ (${measurement.stats.median.toFixed(2)}ms ±${measurement.stats.confidenceInterval.toFixed(2)}ms, CV: ${cv.toFixed(1)}%${cvWarning})` + ); + } + + const perItemMs = timingsMs.map((time, index) => time / sizes[index]); + const slopes = computeSlopes(timingsMs, sizes); + const slopeRatio = slopes.length >= 2 ? slopes[slopes.length - 1] / slopes[0] : 1; + const r2 = computeR2(timingsMs, sizes); + + return { + name, + sizes: [...sizes], + timingsMs, + timingStats, + perItemMs, + slopes, + slopeRatio, + r2, + memoryDeltaMB, + }; +}; + +const benchmarkAsync = async ( + name: string, + runFn: (size: number) => Promise +): Promise => { + const timingsMs: number[] = []; + const timingStats: MeasurementStats[] = []; + const memoryDeltaMB: number[] = []; + + console.log(` Benchmarking ${name}...`); + for (const size of sizes) { + const totalRuns = MEASUREMENT_RUNS + Math.ceil(MEASUREMENT_RUNS * OUTLIER_TRIM_PERCENT); + process.stdout.write(` Size ${size}: warmup (${WARMUP_RUNS} runs)...`); + + for (let w = 0; w < WARMUP_RUNS; w++) { + await runFn(size); + } + + process.stdout.write(` measuring (${totalRuns} runs)...`); + const measurement = await measureAsync(() => runFn(size), MEASUREMENT_RUNS); + + timingsMs.push(measurement.stats.median); + timingStats.push(measurement.stats); + memoryDeltaMB.push(measurement.memoryDeltaMB); + + const cv = (measurement.stats.stdDev / measurement.stats.mean) * 100; + const cvWarning = cv > 20 ? " ! HIGH VARIANCE" : ""; + console.log( + ` ✓ (${measurement.stats.median.toFixed(2)}ms ±${measurement.stats.confidenceInterval.toFixed(2)}ms, CV: ${cv.toFixed(1)}%${cvWarning})` + ); + } + + const perItemMs = timingsMs.map((time, index) => time / sizes[index]); + const slopes = computeSlopes(timingsMs, sizes); + const slopeRatio = slopes.length >= 2 ? slopes[slopes.length - 1] / slopes[0] : 1; + const r2 = computeR2(timingsMs, sizes); + + return { + name, + sizes: [...sizes], + timingsMs, + timingStats, + perItemMs, + slopes, + slopeRatio, + r2, + memoryDeltaMB, + }; +}; + +// --------------------------------------------------------------------------- +// Test suite +// --------------------------------------------------------------------------- +describePerf("Performance - Data Inspector Processing", function () { + this.timeout(300000); // 5 minutes + + // Pre-built synthetic data per size + const entitiesBySize = new Map(); + const recordsBySize = new Map(); + + let report: Report; + + // Load CDS model from the test project so cds.model, cds.parse, cds.ql are available + before(async function () { + const csn = await cds.load(path.resolve(__dirname, "..", "..")); + cds.model = cds.compile.for.nodejs(csn); + if (!UPDATE_BASELINE && !fs.existsSync(BASELINE_PATH)) { + const isCI = process.env.CI === "true" || !!process.env.GITHUB_ACTIONS; + const message = isCI + ? `Performance baseline not found at ${BASELINE_FILENAME}.\n` + + " To establish the CI baseline, run the 'Update CI Performance Baseline' workflow.\n" + + " See: .github/workflows/performance-rebaseline.yml" + : `Performance baseline not found at ${BASELINE_FILENAME}.\n` + + " Run 'npm run test:performance:update-baseline' to create a baseline for your machine."; + console.log(`\n ⚠️ Skipping performance tests: ${message}\n`); + this.skip(); + } + }); + + before(() => { + // Pre-generate synthetic data for all sizes + for (const size of sizes) { + entitiesBySize.set(size, buildSyntheticEntities(size)); + recordsBySize.set(size, buildSyntheticRecords(size)); + } + }); + + after(() => { + if (!report) return; + + const reportDir = path.dirname(REPORT_PATH); + fs.mkdirSync(reportDir, { recursive: true }); + fs.writeFileSync(REPORT_PATH, JSON.stringify(report, null, 2), "utf8"); + fs.writeFileSync(REPORT_MD_PATH, buildMarkdownReport(report), "utf8"); + + if (UPDATE_BASELINE) { + fs.writeFileSync( + BASELINE_PATH, + JSON.stringify( + report.results.reduce((acc, result) => { + acc[result.name] = { + sizes: result.sizes, + perItemMsAtMax: result.perItemMs[result.perItemMs.length - 1], + slopeRatio: result.slopeRatio, + r2: result.r2, + }; + return acc; + }, {} as BaselineData), + null, + 2 + ), + "utf8" + ); + } + }); + + it("should keep local processing roughly linear", async () => { + const results: BenchmarkResult[] = []; + + // ----------------------------------------------------------------------- + // Group A: EntityDefinitionReader — pure in-memory, no DB + // ----------------------------------------------------------------------- + + // A1: EntityDefinitionReader.read() — collection request (filter + sort + paginate + build response) + // We mock cds.model.all() to return our synthetic entities. + results.push( + benchmarkSync("EntityDefinitionReader.read (collection)", (size) => { + const entities = entitiesBySize.get(size)!; + const originalAll = cds.model.all; + cds.model.all = ((kind: string) => { + if (kind === "entity") return entities; + if (kind === "service") return []; + return originalAll.call(cds.model, kind); + }) as any; + + try { + const reader = new EntityDefinitionReader(); + const req = buildEntityDefinitionRequest({ top: size }); + reader.read(req as any); + } finally { + cds.model.all = originalAll; + } + }) + ); + + // A2: EntityDefinitionReader.read() — collection request with $filter contains + results.push( + benchmarkSync("EntityDefinitionReader.read (filtered)", (size) => { + const entities = entitiesBySize.get(size)!; + const originalAll = cds.model.all; + cds.model.all = ((kind: string) => { + if (kind === "entity") return entities; + if (kind === "service") return []; + return originalAll.call(cds.model, kind); + }) as any; + + try { + const reader = new EntityDefinitionReader(); + const req = buildEntityDefinitionRequest({ + filter: `contains(name, 'Entity')`, + top: size, + }); + reader.read(req as any); + } finally { + cds.model.all = originalAll; + } + }) + ); + + // A3: EntityDefinitionReader._getEntityElements() — isolated element extraction + // We call the reader with a single entity request to measure per-entity element processing + results.push( + benchmarkSync("EntityDefinitionReader._getEntityElements (via read)", (size) => { + // Build one entity with 'size' elements to measure element iteration scaling + const entity = buildSyntheticEntities(1, size)[0]; + const entities = [entity]; + const originalAll = cds.model.all; + cds.model.all = ((kind: string) => { + if (kind === "entity") return entities; + if (kind === "service") return []; + return originalAll.call(cds.model, kind); + }) as any; + + try { + const reader = new EntityDefinitionReader(); + const req: any = { + params: [{ name: entity.name }], + query: { + SELECT: { + columns: ["*"], + }, + }, + req: { query: {} }, + reject: (code: number, msg: string) => { + throw new Error(`${code} ${msg}`); + }, + }; + reader.read(req as any); + } finally { + cds.model.all = originalAll; + } + }) + ); + + // ----------------------------------------------------------------------- + // Group B: DataReader — response construction (DB stubbed) + // ----------------------------------------------------------------------- + + // B1: DataReader response construction — _constructRecordKey + response loop + // We isolate the response-building portion by directly invoking the private methods + // through a controlled flow. We stub dataSource.run() to return pre-built records. + results.push( + await benchmarkAsync("DataReader.read (response construction, DB stubbed)", async (size) => { + const records = recordsBySize.get(size)!; + const entityName = "perf.test.Entity_0"; + + // Build a synthetic entity definition + const syntheticEntity: any = { + name: entityName, + "@HideFromDataInspector": false, + "@cds.query.limit.default": 1000, + "@cds.query.limit.max": 1000, + elements: { + id: { type: "cds.UUID", key: true }, + ...Object.fromEntries( + Array.from({ length: 9 }, (_, j) => [ + `field_${j + 1}`, + { type: "cds.String", key: false }, + ]) + ), + }, + get keyElements4DataInspector() { + return ["id"]; + }, + get dataSource4DataInspector() { + return "db"; + }, + }; + + // Mock cds.model.all to return our synthetic entity + const originalAll = cds.model.all; + cds.model.all = ((kind: string) => { + if (kind === "entity") { + return [syntheticEntity]; + } + if (kind === "service") return []; + return originalAll.call(cds.model, kind); + }) as any; + + // Mock cds.services.db.run to return our synthetic records + const originalDb = cds.services.db; + const mockDb = { + run: async () => { + const result = [...records]; + (result as any).$count = records.length; + return result; + }, + }; + (cds.services as any).db = mockDb; + + // Mock cds.ql.SELECT to return a chainable builder + const originalQL = cds.ql; + const mockSelect = { + from: () => { + const builder: any = { + columns: () => builder, + where: () => builder, + orderBy: () => builder, + limit: (l: number, o: number) => { + builder.SELECT = { limit: { offset: { val: o } }, count: true }; + return builder; + }, + SELECT: { limit: { offset: { val: 0 } }, count: true }, + }; + return builder; + }, + }; + (cds as any).ql = { ...originalQL, SELECT: mockSelect }; + + // Mock cds.parse.expr + const originalParse = cds.parse; + (cds as any).parse = { + ...originalParse, + expr: (expr: string) => ({ + xpr: [{ ref: ["entityName"] }, "=", { val: entityName }], + }), + }; + + // Mock audit-log: cds.env.requires does not include audit-log by default + const originalEnv = cds.env; + + try { + const reader = new DataReader(); + const req = buildDataReadRequest(entityName); + await reader.read(req as any); + } finally { + cds.model.all = originalAll; + (cds.services as any).db = originalDb; + (cds as any).ql = originalQL; + (cds as any).parse = originalParse; + } + }) + ); + + // B2: DataReader._emitAuditlogs — audit log emission with stubbed service + results.push( + await benchmarkAsync("DataReader._emitAuditlogs (stubbed audit-log)", async (size) => { + const records = recordsBySize.get(size)!; + + // Build a synthetic entity with sensitive elements + const syntheticEntity: any = { + name: "perf.test.SensitiveEntity", + "@PersonalData.DataSubjectRole": "Customer", + elements: { + id: { type: "cds.UUID", key: true }, + email: { + type: "cds.String", + key: false, + "@PersonalData.IsPotentiallySensitive": true, + }, + phone: { + type: "cds.String", + key: false, + "@PersonalData.IsPotentiallySensitive": true, + }, + name: { type: "cds.String", key: false }, + }, + get keyElements4DataInspector() { + return ["id"]; + }, + // _service is undefined for db entities => audit logging is triggered + }; + + // Build records that include sensitive fields + const sensitiveRecords = records.map((r: any) => ({ + ...r, + email: `user_${r.id}@example.com`, + phone: `+1-555-${String(records.indexOf(r)).padStart(4, "0")}`, + name: `User ${r.id}`, + })); + + // Mock cds.env.requires to include audit-log + const originalEnv = { ...cds.env }; + (cds.env as any).requires = { + ...cds.env.requires, + "audit-log": { kind: "audit-log-to-console" }, + }; + + // Mock cds.connect.to to return a stubbed audit-log service + const originalConnect = cds.connect; + const stubbedAuditLog = { log: async () => {} }; + (cds as any).connect = { + ...originalConnect, + to: async (serviceName: string) => { + if (serviceName === "audit-log") return stubbedAuditLog; + return originalConnect.to(serviceName); + }, + }; + + try { + // Call _emitAuditlogs directly via prototype + const reader = new DataReader(); + await (reader as any)._emitAuditlogs(syntheticEntity, sensitiveRecords); + } finally { + (cds as any).env = originalEnv; + (cds as any).connect = originalConnect; + } + }) + ); + + // ----------------------------------------------------------------------- + // Build report + // ----------------------------------------------------------------------- + report = { + timestamp: new Date().toISOString(), + sizes: [...sizes], + results, + regressionThreshold: MAX_REGRESSION, + slopeVarianceThreshold: MAX_SLOPE_VARIANCE, + testConfig: { + warmupRuns: WARMUP_RUNS, + measurementRuns: MEASUREMENT_RUNS, + outlierTrimPercent: OUTLIER_TRIM_PERCENT, + totalRunsPerSize: MEASUREMENT_RUNS + Math.ceil(MEASUREMENT_RUNS * OUTLIER_TRIM_PERCENT), + }, + environment: { + node: process.version, + platform: `${process.platform} ${os.release()}`, + cpus: os.cpus()[0].model, + totalMemoryGB: os.totalmem() / 1024 ** 3, + cpuLoad: os.loadavg(), + }, + systemWarnings: checkSystemState(), + }; + + // Log system warnings + if (report.systemWarnings.length > 0) { + console.log("\n System Warnings:"); + report.systemWarnings.forEach((w) => console.log(` ! ${w}`)); + console.log(""); + } + + // Regression check against baseline + let baseline: BaselineData | undefined; + if (fs.existsSync(BASELINE_PATH)) { + baseline = JSON.parse(fs.readFileSync(BASELINE_PATH, "utf8")) as BaselineData; + report.baseline = baseline; + } + + expect(results).to.have.length.greaterThan(0); + + for (const result of results) { + if (baseline && !UPDATE_BASELINE) { + const entry = baseline[result.name]; + if (!entry) { + console.warn( + ` ⚠️ WARNING: ${result.name} baseline entry missing — skipping regression check` + ); + continue; + } + + // Slope ratio regression check + if (entry.slopeRatio > 0.5 && result.slopeRatio > 0) { + const slopeAllowed = entry.slopeRatio * (1 + MAX_SLOPE_VARIANCE); + if (result.slopeRatio > slopeAllowed) { + console.warn( + ` ⚠️ WARNING: ${result.name} slope ratio regression: ` + + `${result.slopeRatio.toFixed(4)} > allowed ${slopeAllowed.toFixed(4)} ` + + `(baseline: ${entry.slopeRatio.toFixed(4)}, threshold: +${(MAX_SLOPE_VARIANCE * 100).toFixed(0)}%)` + ); + } + } + + // Per-item time regression check + const currentPerItem = result.perItemMs[result.perItemMs.length - 1]; + const allowed = entry.perItemMsAtMax * (1 + MAX_REGRESSION); + if (currentPerItem > allowed) { + console.warn( + ` ⚠️ WARNING: ${result.name} per-item time regression: ` + + `${currentPerItem.toFixed(7)}ms > allowed ${allowed.toFixed(7)}ms ` + + `(baseline: ${entry.perItemMsAtMax.toFixed(7)}ms, threshold: +${(MAX_REGRESSION * 100).toFixed(0)}%)` + ); + } + } + } + }); +}); diff --git a/test/performance/performance-baseline.ci.json b/test/performance/performance-baseline.ci.json new file mode 100644 index 0000000..e7855f9 --- /dev/null +++ b/test/performance/performance-baseline.ci.json @@ -0,0 +1,62 @@ +{ + "EntityDefinitionReader.read (collection)": { + "sizes": [ + 10, + 50, + 100, + 500, + 1000 + ], + "perItemMsAtMax": 0.0008664794999999686, + "slopeRatio": 1.0563725392233307, + "r2": 0.9991454243695702 + }, + "EntityDefinitionReader.read (filtered)": { + "sizes": [ + 10, + 50, + 100, + 500, + 1000 + ], + "perItemMsAtMax": 0.000915937500000041, + "slopeRatio": 0.8465581902233116, + "r2": 0.9997151636234601 + }, + "EntityDefinitionReader._getEntityElements (via read)": { + "sizes": [ + 10, + 50, + 100, + 500, + 1000 + ], + "perItemMsAtMax": 0.00030279100000001334, + "slopeRatio": 0.9524803034682242, + "r2": 0.997822499293361 + }, + "DataReader.read (response construction, DB stubbed)": { + "sizes": [ + 10, + 50, + 100, + 500, + 1000 + ], + "perItemMsAtMax": 0.00016691650000001345, + "slopeRatio": 0.5997224934519662, + "r2": 0.9979562674639578 + }, + "DataReader._emitAuditlogs (stubbed audit-log)": { + "sizes": [ + 10, + 50, + 100, + 500, + 1000 + ], + "perItemMsAtMax": 0.0007755829999999832, + "slopeRatio": 0.40819046558119343, + "r2": 0.9743144804610471 + } +} \ No newline at end of file diff --git a/test/tsconfig.json b/test/tsconfig.json index 93e467b..c833933 100644 --- a/test/tsconfig.json +++ b/test/tsconfig.json @@ -3,6 +3,6 @@ "include": ["**/*.ts"], "compilerOptions": { "noEmit": true, - "rootDir": "." + "rootDir": ".." } } From cca79b6c009a4f3303d379a81469a32b36908458 Mon Sep 17 00:00:00 2001 From: titanh3art <18174614+titanh3art@users.noreply.github.com> Date: Mon, 30 Mar 2026 16:11:25 +0530 Subject: [PATCH 2/5] refactored --- package.json | 1 + .../PERFORMANCE-TESTING-STRATEGY.md | 460 ++++++++- .../performance/ProcessingPerformance.test.ts | 895 ++++-------------- test/performance/check-baseline-drift.js | 270 ++++++ test/performance/helpers/index.ts | 12 + test/performance/helpers/measurement.ts | 259 +++++ test/performance/helpers/reporting.ts | 161 ++++ test/performance/helpers/statistics.ts | 70 ++ test/performance/helpers/synthetic-data.ts | 178 ++++ test/performance/helpers/types.ts | 75 ++ 10 files changed, 1653 insertions(+), 728 deletions(-) create mode 100644 test/performance/check-baseline-drift.js create mode 100644 test/performance/helpers/index.ts create mode 100644 test/performance/helpers/measurement.ts create mode 100644 test/performance/helpers/reporting.ts create mode 100644 test/performance/helpers/statistics.ts create mode 100644 test/performance/helpers/synthetic-data.ts create mode 100644 test/performance/helpers/types.ts diff --git a/package.json b/package.json index ae9c8cf..97d2d46 100644 --- a/package.json +++ b/package.json @@ -25,6 +25,7 @@ "test": "cross-env CDS_TYPESCRIPT=true mocha", "test:performance": "cross-env CDS_TYPESCRIPT=true PERF_TESTS=1 mocha --config test/performance/.mocharc.performance.json", "test:performance:update-baseline": "cross-env CDS_TYPESCRIPT=true PERF_TESTS=1 PERF_UPDATE_BASELINE=1 mocha --config test/performance/.mocharc.performance.json", + "test:performance:check-drift": "node test/performance/check-baseline-drift.js", "coverage": "cross-env CDS_TYPESCRIPT=true c8 mocha" }, "peerDependencies": { diff --git a/test/performance/PERFORMANCE-TESTING-STRATEGY.md b/test/performance/PERFORMANCE-TESTING-STRATEGY.md index fd60c54..9bdc1a0 100644 --- a/test/performance/PERFORMANCE-TESTING-STRATEGY.md +++ b/test/performance/PERFORMANCE-TESTING-STRATEGY.md @@ -34,6 +34,7 @@ This testing strategy addresses the following SAP Performance Product Standards: test/performance/ ├── .mocharc.performance.json # Mocha config (perf tests only) ├── ProcessingPerformance.test.ts # All benchmarks +├── check-baseline-drift.js # Long-term drift detection across git history ├── performance-baseline.json # Local developer baseline (gitignored) ├── performance-baseline.ci.json # CI baseline (committed to repo) └── PERFORMANCE-TESTING-STRATEGY.md # This file @@ -118,6 +119,9 @@ npm run test:performance:update-baseline # Subsequent runs: compare against baseline npm run test:performance + +# Check for gradual drift across CI baseline git history +npm run test:performance:check-drift ``` ### Environment variables @@ -168,11 +172,463 @@ This approach ensures: - Configurable scaling (the `sizes` array can be adjusted) - Fast execution (no CDS server boot required) -## 7. Future Enhancements +## 7. Baseline Drift Detection + +The `check-baseline-drift.js` script detects **gradual performance degradation** that no single run would catch. It reads the git history of `performance-baseline.ci.json` and analyzes how `perItemMsAtMax` values have changed across commits. + +### What it detects + +| Condition | Default Threshold | Severity | +| ------------------------------------------------------- | -------------------------------- | ------------------- | +| Total per-item cost increase across the examined window | 20% (`DRIFT_MAX_TOTAL_INCREASE`) | **FAIL** | +| Consecutive per-item cost increases | 3 (`DRIFT_CONSECUTIVE_WARN`) | **WARN** (advisory) | + +### How it works + +1. Queries `git log` for commits that touched `performance-baseline.ci.json` +2. Loads up to 10 historical snapshots (configurable via `DRIFT_WINDOW`) +3. For each benchmark, computes total increase, consecutive-increase streak, and OLS trend slope +4. Outputs a history table and per-benchmark analysis + +### Configuration (env vars) + +| Variable | Default | Description | +| -------------------------- | ----------------------------------------------- | --------------------------------------------- | +| `DRIFT_BASELINE_FILE` | `test/performance/performance-baseline.ci.json` | Git path of the baseline file to inspect | +| `DRIFT_WINDOW` | `10` | Number of recent commits to examine | +| `DRIFT_MAX_TOTAL_INCREASE` | `0.20` | Max allowed total increase (fraction) | +| `DRIFT_CONSECUTIVE_WARN` | `3` | Consecutive increases before advisory warning | + +### When to use + +- After accumulating 2+ CI baseline snapshots in git history (requires running the rebaseline workflow at least twice) +- As part of periodic performance health checks +- Before major releases, to verify no gradual cost drift has occurred + +## 8. Future Enhancements As the plugin evolves, consider adding: 1. **Memory profiling benchmarks** — Track heap growth across repeated operations to detect memory leaks (PERF-05 deeper coverage) 2. **Concurrent simulation** — If the plugin adds stateful processing, add benchmarks that simulate concurrent request patterns 3. **Larger scale tests** — Extend the sizes array to [100, 500, 1000, 5000, 10000] if real-world deployments involve very large CDS models -4. **UI rendering benchmarks** — If the SAPUI5 frontend becomes a performance concern, add browser-based benchmarks using Puppeteer \ No newline at end of file +4. **UI rendering benchmarks** — If the SAPUI5 frontend becomes a performance concern, add browser-based benchmarks using Puppeteer + +--- + +## Appendix: Performance Testing 101 — Concepts & KPIs Explained + +This appendix explains every statistical concept and KPI used in this testing strategy from first principles. If you've never done performance benchmarking before, start here. + +--- + +### A.1 Why do we measure performance at all? + +Software can be "correct" (produces the right answer) yet still unusable if it's too slow. Performance testing answers two questions: + +1. **Does it scale?** — If the input doubles, does the time roughly double (good) or quadruple (bad)? +2. **Did it get slower?** — Compared to last week's version, is the same operation taking longer? + +Question 1 is about **algorithmic complexity**. Question 2 is about **regression detection**. + +--- + +### A.2 Big-O Notation + +Big-O describes how an algorithm's cost grows as input size *n* increases: + +| Notation | Name | Example | Doubling *n* does what? | +| -------------- | ---------- | -------------------------------- | --------------------------------- | +| **O(1)** | Constant | Hash table lookup | Time stays the same | +| **O(n)** | Linear | Scanning every item in a list | Time doubles | +| **O(n²)** | Quadratic | Nested loop over all pairs | Time quadruples (4×) | +| **O(n³)** | Cubic | Triple nested loop | Time increases 8× | +| **O(n·log n)** | Log-linear | Good sort algorithms (mergesort) | Time roughly doubles (a bit more) | + +**Our goal**: every operation in data-inspector should be **O(n)** or better. If we accidentally introduce an O(n²) algorithm (e.g., a nested loop that compares every entity to every other entity), the benchmarks will catch it. + +--- + +### A.3 Median vs. Mean — Which "average" to use? + +Both are measures of central tendency, but they behave differently with outliers: + +- **Mean** (arithmetic average): Sum all values, divide by count. Sensitive to outliers — one very slow run pulls the mean up dramatically. +- **Median**: Sort all values, pick the middle one. Robust to outliers — even if one run was 100× slower, the median barely moves. + +**Why we use the median for benchmark reporting**: In benchmarking, you occasionally get "hiccup" runs where the garbage collector fires, the OS scheduler intervenes, or the CPU thermal-throttles. The median naturally ignores these glitches without requiring you to manually identify and remove them. + +We still report the mean (and use it internally for outlier detection), but the **median is the primary metric** in our results. + +--- + +### A.4 Standard Deviation (σ) and Coefficient of Variation (CV%) + +Imagine you time a function 20 times and get these results (in ms): + +``` +Run 1: 5.1 Run 2: 4.9 Run 3: 5.0 Run 4: 5.2 Run 5: 5.0 ... +``` + +The **mean** is 5.04ms. But how *consistent* are these numbers? That's what standard deviation tells you. + +#### Standard Deviation (σ) — "How spread out are my measurements?" + +Think of σ as the "average distance from the mean." Here's the intuition: + +1. Take each measurement and ask: "How far is this from the mean?" + - Run 1: |5.1 - 5.04| = 0.06 + - Run 2: |4.9 - 5.04| = 0.14 + - Run 3: |5.0 - 5.04| = 0.04 + - ...and so on for all 20 runs +2. Square those distances (so negative and positive don't cancel out) +3. Average the squared distances +4. Take the square root (to get back to the original units — milliseconds) + +The result is σ. A small σ (say 0.08ms when the mean is 5ms) means your measurements are very consistent. A large σ (say 2.5ms when the mean is 5ms) means they're all over the place. + +#### Coefficient of Variation (CV%) — "Is that spread *relatively* big or small?" + +Here's the problem with σ alone: is σ = 2ms "good" or "bad"? It depends on context: + +- If the mean is **1000ms**, then σ = 2ms is tiny (0.2% of the mean) → very stable +- If the mean is **5ms**, then σ = 2ms is huge (40% of the mean) → extremely noisy + +CV% solves this by expressing σ as a percentage of the mean: + +``` +CV% = (σ / mean) × 100 +``` + +This lets you compare the stability of a 5ms benchmark to a 500ms benchmark on equal footing. + +**Real-world example from our tests**: +- Benchmark A: mean = 0.04ms, σ = 0.008ms → CV = 20% 🔴 (noisy — the function is so fast that GC jitter dominates) +- Benchmark B: mean = 3.85ms, σ = 0.12ms → CV = 3.1% 🟢 (stable — the function takes long enough that noise is negligible) + +**Our thresholds**: + +| CV% | Indicator | Meaning | +| ----- | --------- | ------------------------------------------------------ | +| ≤ 5% | 🟢 | Stable — measurements are repeatable | +| 5–15% | 🟡 | Acceptable for Node.js (GC pauses cause some variance) | +| > 15% | 🔴 | High noise — consider more warmup or runs | + +--- + +### A.5 Confidence Interval (CI) + +Imagine you measured a function 20 times and got a median of 5.23ms. If you ran those 20 measurements again tomorrow, would you get exactly 5.23ms again? Probably not — maybe 5.18ms, or 5.31ms. The **confidence interval** tells you the range where the "true" value most likely lives. + +#### The analogy + +Think of it like measuring your height with a wobbly ruler. You measure yourself 5 times and get: 175.2cm, 174.8cm, 175.1cm, 175.5cm, 174.9cm. You're probably not exactly 175.1cm tall, but you're pretty confident you're somewhere between 174.8cm and 175.5cm. That range is your confidence interval. + +#### The math (simplified) + +``` +CI = ±1.96 × (σ / √n) +``` + +Breaking this down: +- **σ** = standard deviation (how noisy your measurements are — see A.4) +- **√n** = square root of the number of runs (more runs = narrower interval, because more data = more certainty) +- **1.96** = a magic number from statistics that gives you 95% confidence (you can think of it as "about 2") + +So the formula says: *"Take the noise level (σ), shrink it by how many measurements you took (√n), and multiply by ~2."* + +#### A worked example + +- You measured 20 runs. Median = 5.23ms. σ = 0.22ms. +- CI = ±1.96 × (0.22 / √20) = ±1.96 × (0.22 / 4.47) = ±1.96 × 0.049 = **±0.097ms** +- So we report: **5.23ms ±0.10ms** +- Meaning: "We're 95% confident the true typical time is between 5.13ms and 5.33ms." + +#### Why it matters for us + +When comparing two benchmark results (e.g., before vs. after a code change), if their confidence intervals overlap, the difference is probably just measurement noise — not a real performance change. For example: +- Before: 5.23ms ±0.10ms → range [5.13, 5.33] +- After: 5.28ms ±0.12ms → range [5.16, 5.40] +- The ranges overlap heavily → **no meaningful difference** (don't panic!) + +But if: +- Before: 5.23ms ±0.10ms → range [5.13, 5.33] +- After: 6.80ms ±0.15ms → range [6.65, 6.95] +- No overlap at all → **real regression** (investigate!) + +--- + +### A.6 Outlier Trimming + +Raw benchmark timings often contain outliers — unusually slow (or fast) runs caused by GC pauses, OS scheduling, background processes, etc. + +**Our approach** (mean-distance trimming): +1. Run 30 iterations (20 to keep + 10 extra) +2. Compute the preliminary mean of all 30 +3. For each run, compute its distance from the mean +4. Sort by distance (closest to mean first) +5. Keep the 20 closest; discard the 10 furthest + +This is more nuanced than simple "remove top/bottom 10%" trimming, because it removes outliers on *both* ends that are far from the central tendency, regardless of which direction they're in. + +--- + +### A.7 Warmup Runs + +JavaScript engines (V8 in Node.js) use **Just-In-Time (JIT) compilation**. The first few calls to a function are interpreted (slow), then V8 compiles them to optimized machine code (fast). This process is called "warming up." + +If you measure the first 5 runs, you're measuring the interpreter, not the optimized code that will run in production. That's why we run 10 warmup iterations (discarded) before starting measurements. + +**Think of it like warming up a car engine** — you don't measure fuel efficiency during the first 30 seconds after a cold start. + +--- + +### A.8 Slope and Slope Ratio + +These are the core metrics for detecting whether an algorithm is O(n) or worse. The key idea is surprisingly simple: **if adding more items always costs the same amount of extra time, the algorithm is linear. If adding more items costs *increasingly* more time, it's not.** + +#### Slope — "How much extra time does each additional item cost?" + +Imagine you're timing a function with different input sizes and you get: + +``` +Size 10 → took 1ms +Size 50 → took 5ms +Size 100 → took 10ms +Size 500 → took 50ms +Size 1000 → took 100ms +``` + +The **slope** between any two points is the "price per additional item": + +``` +slope = (time₂ - time₁) / (size₂ - size₁) +``` + +For the data above: +- Between size 10→50: slope = (5 - 1) / (50 - 10) = 4 / 40 = **0.1ms per item** +- Between size 500→1000: slope = (100 - 50) / (1000 - 500) = 50 / 500 = **0.1ms per item** + +The slope is the same! Each additional item always costs 0.1ms, regardless of whether you have 10 items or 1000. This is classic **O(n) linear** behavior. + +Now imagine a *bad* function: + +``` +Size 10 → took 1ms +Size 50 → took 5ms +Size 100 → took 20ms +Size 500 → took 250ms +Size 1000 → took 1000ms +``` + +- Between size 10→50: slope = (5 - 1) / 40 = **0.1ms per item** +- Between size 500→1000: slope = (1000 - 250) / 500 = **1.5ms per item** + +The slope grew 15× ! Adding items at large scale is much more expensive than at small scale. This screams **O(n²)**. + +#### Slope Ratio — "Did the slope stay the same or grow?" + +Instead of eyeballing slopes, we compute a single number: + +``` +slope_ratio = last_slope / first_slope +``` + +Using the examples above: +- Good function: 0.1 / 0.1 = **1.0** (perfect — the cost per item never changed) +- Bad function: 1.5 / 0.1 = **15.0** (terrible — the cost per item grew 15×) + +**Think of it like a road trip**: If driving the first 100km takes 1 hour, and the last 100km also takes 1 hour, the "slope" (time per km) is constant — that's a straight highway (linear). If the last 100km takes 5 hours, the road got progressively worse — that's like a quadratic algorithm bogging down as data grows. + +**Interpretation**: + +| Slope ratio | What it means | Big-O | +| ----------- | ------------------------------------------------------ | --------------- | +| ~1.0 | Each additional item costs the same regardless of size | **O(n)** | +| ~2.0 | Cost per item roughly doubles at larger scale | **~O(n·log n)** | +| ~4.0+ | Cost per item grows dramatically — likely quadratic | **O(n²)** | +| ~10.0+ | Severe super-linear scaling | **O(n²)+** | + +**Our thresholds**: + +| Range | Indicator | Assessment | +| ------- | --------- | ------------------------------------------------ | +| ≤ 2.0 | 🟢 | Consistent with O(n) linear scaling | +| 2.0–4.0 | 🟡 | Suspicious — investigate for hidden nested loops | +| > 4.0 | 🔴 | Clearly non-linear (O(n²) or worse) | + +--- + +### A.9 R² — Coefficient of Determination + +R² answers a simple question: **"If I draw the best possible straight line through my data, how well does it fit?"** + +#### The school analogy + +Imagine you're a teacher plotting students' study hours (x-axis) vs. exam scores (y-axis). If every student who studied twice as long scored exactly twice as high, all the dots would fall on a perfect straight line — R² = 1.0. + +In reality, some students score higher or lower than the line predicts. R² tells you what fraction of the pattern is explained by the straight line vs. what fraction is "random scatter." + +#### Visually + +``` +R² ≈ 1.0 (linear) R² ≈ 0.7 (curved/noisy) + +Time ↑ Time ↑ + | • | • + | • | • + | • | • + | • | • + | • | • + +----------→ Size +----------→ Size + Points hug the line Points curve away from the line +``` + +#### How it works (no math degree needed) + +1. **Draw the best straight line** through your 5 data points (the computer finds the line that minimizes the total distance from all points) +2. **Measure the "misses"**: For each point, how far is it from the line? Square those distances and add them up. Call this **"unexplained scatter."** +3. **Measure the "baseline scatter"**: How far is each point from the simple average (a flat horizontal line)? Square and sum. Call this **"total scatter."** +4. **Compute R²**: + +``` +R² = 1 - (unexplained scatter / total scatter) +``` + +- If the line explains everything → unexplained scatter = 0 → R² = 1.0 +- If the line explains nothing (data is random) → unexplained = total → R² = 0.0 + +#### What R² values mean for our benchmarks + +| R² | Meaning | +| ---------- | --------------------------------------------------------------------------- | +| 1.000 | All points fall exactly on a straight line — perfectly linear | +| 0.995+ | Excellent linear fit — minor measurement noise only | +| 0.98–0.995 | Mostly linear with some deviation — could be noise or mild non-linearity | +| < 0.98 | Clearly not linear — the relationship curves (quadratic, exponential, etc.) | + +#### Why do we need BOTH slope ratio and R²? + +They catch **different types of problems**: + +**Slope ratio** only looks at the first and last segments — like checking the start and end of a road trip. **R²** looks at every point along the way. + +Consider this scenario: +``` +Size: 10 50 100 500 1000 +Time: 1ms 5ms 30ms 50ms 100ms +``` + +- Slope ratio = (100-50)/(1000-500) ÷ (5-1)/(50-10) = 0.1 / 0.1 = **1.0** → looks perfect! +- But R² = **0.93** → wait, something's off! + +What happened? The function has a "hump" at size 100 (30ms is way above the straight line). The slope ratio missed it because it only compared the endpoints, but R² caught it because it checks every point. + +That's why we use both: **slope ratio catches endpoint divergence, R² catches mid-range curvature.** + +--- + +### A.10 Per-Item Time + +This is the simplest metric — just divide total time by input size: + +``` +per_item_ms = median_time_ms / size +``` + +For a truly O(n) algorithm, per-item time should be roughly constant regardless of size. If per-item time grows with size, you have a scaling problem. + +**Per-item time at max size** (the value stored in the baseline) is the most important data point because it amplifies any scaling issues. At size 10, even an O(n²) algorithm might only add 0.001ms overhead. At size 1000, that same O(n²) adds 1.0ms — visible and measurable. + +--- + +### A.11 Memory Delta (Heap ΔMB) + +We measure `process.memoryUsage().heapUsed` before and after each benchmark: + +``` +ΔMB = (heapAfter - heapBefore) / (1024 × 1024) +``` + +This catches: +- **Hidden allocations** — Creating intermediate arrays, string concatenations, or object copies that scale with input size +- **Memory leaks** — Objects that survive garbage collection because they're accidentally retained + +**Note**: JavaScript GC is non-deterministic, so memory deltas are noisier than timing measurements. They're included as an advisory signal, not a hard gate. + +--- + +### A.12 Baseline and Regression Detection + +A **baseline** is a snapshot of your benchmark results at a known-good point in time. It records, for each benchmark: +- `perItemMsAtMax` — per-item time at maximum size +- `slopeRatio` — scaling behavior +- `r2` — linearity score + +**Regression detection** compares current results to the baseline: + +``` +allowed = baseline_value × (1 + threshold) + +# Example with 30% threshold: +# If baseline per-item time = 0.005ms +# allowed = 0.005 × 1.30 = 0.0065ms +# If current = 0.007ms → REGRESSION WARNING +``` + +**Why 30% threshold?** Benchmark noise on shared CI runners (GitHub Actions) typically causes 5–15% variance. A 30% threshold means only genuine code-level regressions trigger warnings, not hardware noise. + +--- + +### A.13 Putting It All Together — Reading a Result Row + +Here's how to read a line from the performance report: + +``` +| EntityDefReader.read | 0.05, 0.19, 0.38, 1.92, 3.85 | 4.2% 🟢 | 0.0050, 0.0038, 0.0038, 0.0038, 0.0039 | 0.12, 0.15, 0.18, 0.22, 0.25 | 1.0234 🟢 | 0.9998 🟢 | 0.0040 | 1.0100 | 0.9995 | +``` + +Reading left to right: +1. **Timings** [0.05→3.85ms]: Time grows ~77× as input grows 100× → slightly sub-linear (good) +2. **CV% 4.2% 🟢**: Low variance — stable measurements +3. **Per-item** [0.005→0.0039ms]: Cost per item stays flat → O(n) confirmed +4. **Memory** [0.12→0.25MB]: Slight growth — proportional to input (expected) +5. **Slope ratio 1.0234 🟢**: Almost exactly 1.0 → perfectly linear +6. **R² 0.9998 🟢**: Nearly perfect straight line +7. **Baseline columns**: Previous per-item=0.004ms, slope=1.01, R²=0.9995 — no regression + +**Verdict**: This benchmark is healthy — linear scaling, stable measurements, no regression. + +--- + +### A.14 Quick Reference: All Emoji Indicators + +| Metric | 🟢 Good | 🟡 Watch | 🔴 Problem | +| ----------- | ------- | ------------- | --------- | +| Slope ratio | ≤ 2.0 | 2.0 – 4.0 | > 4.0 | +| R² | ≥ 0.995 | 0.980 – 0.995 | < 0.980 | +| CV% | ≤ 5% | 5% – 15% | > 15% | + +### A.15 Glossary + +| Term | Definition | +| ----------------------- | ------------------------------------------------------------------------------------------------------ | +| **Benchmark** | A controlled, repeatable experiment measuring one specific operation | +| **Warmup** | Discarded initial runs that let the JIT compiler optimize the code path | +| **Outlier** | A measurement far from the typical value, usually caused by GC/OS interference | +| **Trimming** | Removing outlier measurements before computing statistics | +| **Median** | The middle value when measurements are sorted; our primary metric | +| **Mean** | The arithmetic average of all measurements | +| **Standard deviation** | How spread out measurements are from the mean | +| **CV%** | Standard deviation as a percentage of the mean — normalized measure of noise | +| **Confidence interval** | Range within which the true value likely falls (95% probability) | +| **Slope** | Rate of time change per unit of input size between two measurement points | +| **Slope ratio** | Last slope ÷ first slope; 1.0 = perfectly linear growth | +| **R²** | Coefficient of determination; 1.0 = data falls perfectly on a straight line | +| **Per-item time** | Total time ÷ input size; should stay constant for O(n) algorithms | +| **Baseline** | Stored snapshot of benchmark results used as the reference for regression detection | +| **Regression** | A statistically significant increase in cost compared to the baseline | +| **Drift** | Gradual, incremental performance degradation across many commits (no single commit triggers a warning) | +| **Heap delta** | Change in V8 heap memory usage during a benchmark run | +| **JIT** | Just-In-Time compilation — V8's process of compiling JavaScript to machine code at runtime | +| **GC** | Garbage Collection — V8's automatic memory reclamation process | diff --git a/test/performance/ProcessingPerformance.test.ts b/test/performance/ProcessingPerformance.test.ts index 76a42db..a1cff54 100644 --- a/test/performance/ProcessingPerformance.test.ts +++ b/test/performance/ProcessingPerformance.test.ts @@ -1,5 +1,18 @@ +/** + * Performance benchmarks for @cap-js/data-inspector. + * + * Measures local processing cost of EntityDefinitionReader and DataReader + * across multiple input sizes (10→1000) to detect non-linear scaling and + * regressions against a stored baseline. + * + * Run: + * npm run test:performance # compare against baseline + * npm run test:performance:update-baseline # create/update baseline + * + * See PERFORMANCE-TESTING-STRATEGY.md for full documentation. + */ + import cds from "@sap/cds"; -import { performance } from "perf_hooks"; import fs from "fs"; import path from "path"; import os from "os"; @@ -8,6 +21,21 @@ import { expect } from "chai"; import { EntityDefinitionReader } from "../../srv/EntityDefinitionReader"; import { DataReader } from "../../srv/DataReader"; +import { + type BenchmarkResult, + type BaselineData, + type Report, + sizes, + checkSystemState, + benchmarkSync, + benchmarkAsync, + buildMarkdownReport, + buildSyntheticEntities, + buildSyntheticRecords, + buildEntityDefinitionRequest, + buildDataReadRequest, +} from "./helpers"; + // --------------------------------------------------------------------------- // Configuration (env-overridable) // --------------------------------------------------------------------------- @@ -26,599 +54,23 @@ const REPORT_MD_PATH = path.resolve(__dirname, "..", "..", "coverage", "performa const describePerf = PERF_ENABLED ? describe : describe.skip; -// Sizes: number of synthetic entities for EntityDefinitionReader, -// number of synthetic records for DataReader -const sizes = [10, 50, 100, 500, 1000]; - -// --------------------------------------------------------------------------- -// Types -// --------------------------------------------------------------------------- -type MeasurementStats = { - median: number; - mean: number; - stdDev: number; - min: number; - max: number; - confidenceInterval: number; -}; - -type BenchmarkResult = { - name: string; - sizes: number[]; - timingsMs: number[]; - timingStats: MeasurementStats[]; - perItemMs: number[]; - slopes: number[]; - slopeRatio: number; - r2: number; - memoryDeltaMB: number[]; -}; - -type BaselineEntry = { - sizes: number[]; - perItemMsAtMax: number; - slopeRatio: number; - r2?: number; -}; - -type BaselineData = Record; - -type TestConfig = { - warmupRuns: number; - measurementRuns: number; - outlierTrimPercent: number; - totalRunsPerSize: number; -}; - -type Report = { - timestamp: string; - sizes: number[]; - results: BenchmarkResult[]; - baseline?: BaselineData; - regressionThreshold: number; - slopeVarianceThreshold: number; - testConfig: TestConfig; - environment: { - node: string; - platform: string; - cpus: string; - totalMemoryGB: number; - cpuLoad: number[]; - }; - systemWarnings: string[]; -}; - -// --------------------------------------------------------------------------- -// Statistics helpers -// --------------------------------------------------------------------------- -const median = (values: number[]): number => { - const sorted = [...values].sort((a, b) => a - b); - const mid = Math.floor(sorted.length / 2); - return sorted.length % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid]; -}; - -const mean = (values: number[]): number => - values.reduce((sum, val) => sum + val, 0) / values.length; - -const stdDev = (values: number[]): number => { - const avg = mean(values); - const squareDiffs = values.map((value) => Math.pow(value - avg, 2)); - return Math.sqrt(mean(squareDiffs)); -}; - -const calculateStats = (values: number[]): MeasurementStats => { - const sorted = [...values].sort((a, b) => a - b); - const avg = mean(values); - const sd = stdDev(values); - const ci = 1.96 * (sd / Math.sqrt(values.length)); - return { - median: median(values), - mean: avg, - stdDev: sd, - min: sorted[0], - max: sorted[sorted.length - 1], - confidenceInterval: ci, - }; -}; - -const checkSystemState = (): string[] => { - const warnings: string[] = []; - const loadAvg = os.loadavg(); - const cpuCount = os.cpus().length; - if (loadAvg[0] > cpuCount * 0.7) { - warnings.push( - `High CPU load detected: ${loadAvg[0].toFixed(2)} (${cpuCount} CPUs). Results may be unreliable.` - ); - } - const freeMemGB = os.freemem() / 1024 ** 3; - const totalMemGB = os.totalmem() / 1024 ** 3; - const memUsagePercent = ((totalMemGB - freeMemGB) / totalMemGB) * 100; - if (memUsagePercent > 85) { - warnings.push( - `High memory usage: ${memUsagePercent.toFixed(1)}% (${freeMemGB.toFixed(1)}GB free of ${totalMemGB.toFixed(1)}GB).` - ); - } - return warnings; -}; - -// --------------------------------------------------------------------------- -// Measurement -// --------------------------------------------------------------------------- -const measureAsync = async ( - fn: () => Promise, - runs: number -): Promise<{ timings: number[]; stats: MeasurementStats; memoryDeltaMB: number }> => { - const extraRuns = Math.ceil(runs * OUTLIER_TRIM_PERCENT); - const totalRuns = runs + extraRuns; - const allTimings: number[] = []; - const memBefore = process.memoryUsage(); - - for (let i = 0; i < totalRuns; i++) { - const start = performance.now(); - await fn(); - const end = performance.now(); - allTimings.push(end - start); - } - - const memAfter = process.memoryUsage(); - const memoryDeltaMB = (memAfter.heapUsed - memBefore.heapUsed) / (1024 * 1024); - - const preliminaryMean = mean(allTimings); - const timingsWithDistance = allTimings.map((timing) => ({ - timing, - distance: Math.abs(timing - preliminaryMean), - })); - timingsWithDistance.sort((a, b) => a.distance - b.distance); - const trimmedTimings = timingsWithDistance - .slice(0, runs) - .map((t) => t.timing) - .sort((a, b) => a - b); - - return { timings: trimmedTimings, stats: calculateStats(trimmedTimings), memoryDeltaMB }; -}; - -const measureSync = ( - fn: () => void, - runs: number -): { timings: number[]; stats: MeasurementStats; memoryDeltaMB: number } => { - const extraRuns = Math.ceil(runs * OUTLIER_TRIM_PERCENT); - const totalRuns = runs + extraRuns; - const allTimings: number[] = []; - const memBefore = process.memoryUsage(); - - for (let i = 0; i < totalRuns; i++) { - const start = performance.now(); - fn(); - const end = performance.now(); - allTimings.push(end - start); - } - - const memAfter = process.memoryUsage(); - const memoryDeltaMB = (memAfter.heapUsed - memBefore.heapUsed) / (1024 * 1024); - - const preliminaryMean = mean(allTimings); - const timingsWithDistance = allTimings.map((timing) => ({ - timing, - distance: Math.abs(timing - preliminaryMean), - })); - timingsWithDistance.sort((a, b) => a.distance - b.distance); - const trimmedTimings = timingsWithDistance - .slice(0, runs) - .map((t) => t.timing) - .sort((a, b) => a - b); - - return { timings: trimmedTimings, stats: calculateStats(trimmedTimings), memoryDeltaMB }; -}; - -const computeSlopes = (times: number[], sizeValues: number[]): number[] => { - const slopes: number[] = []; - for (let i = 1; i < times.length; i++) { - const deltaT = times[i] - times[i - 1]; - const deltaN = sizeValues[i] - sizeValues[i - 1]; - slopes.push(deltaT / deltaN); - } - return slopes; -}; - -const computeR2 = (times: number[], sizeValues: number[]): number => { - const n = times.length; - if (n < 2) return 1; - const meanX = sizeValues.reduce((sum, x) => sum + x, 0) / n; - const meanY = times.reduce((sum, y) => sum + y, 0) / n; - let numerator = 0; - let denominator = 0; - for (let i = 0; i < n; i++) { - const dx = sizeValues[i] - meanX; - numerator += dx * (times[i] - meanY); - denominator += dx * dx; - } - const slope = denominator === 0 ? 0 : numerator / denominator; - const intercept = meanY - slope * meanX; - let ssRes = 0; - let ssTot = 0; - for (let i = 0; i < n; i++) { - const predicted = slope * sizeValues[i] + intercept; - ssRes += (times[i] - predicted) ** 2; - ssTot += (times[i] - meanY) ** 2; - } - return ssTot === 0 ? 1 : 1 - ssRes / ssTot; -}; - -// --------------------------------------------------------------------------- -// Report building -// --------------------------------------------------------------------------- -const slopeRatioEmoji = (ratio: number): string => { - if (ratio <= 2.0) return "🟢"; - if (ratio <= 4.0) return "🟡"; - return "🔴"; -}; - -const r2Emoji = (r2: number): string => { - if (r2 >= 0.995) return "🟢"; - if (r2 >= 0.98) return "🟡"; - return "🔴"; -}; - -const cvEmoji = (cv: number): string => { - if (cv <= 5) return "🟢"; - if (cv <= 15) return "🟡"; - return "🔴"; -}; - -const formatNumber = (value: number, digits: number): string => value.toFixed(digits); -const formatList = (values: number[], digits: number): string => - values.map((v) => formatNumber(v, digits)).join(", "); - -const buildMarkdownReport = (report: Report): string => { - const lines: string[] = []; - lines.push(`# Performance Report (${report.timestamp})`); - lines.push(""); - lines.push("## Environment"); - lines.push(""); - lines.push(`- Node: ${report.environment.node}`); - lines.push(`- Platform: ${report.environment.platform}`); - lines.push(`- CPU: ${report.environment.cpus}`); - lines.push(`- Memory: ${report.environment.totalMemoryGB.toFixed(1)} GB`); - lines.push(`- CPU Load: ${report.environment.cpuLoad.map((l) => l.toFixed(2)).join(", ")}`); - lines.push(""); - lines.push("## Test Configuration"); - lines.push(""); - lines.push(`- Warmup runs: ${report.testConfig.warmupRuns}`); - lines.push(`- Measurement runs: ${report.testConfig.measurementRuns}`); - lines.push( - `- Outlier trim: ${(report.testConfig.outlierTrimPercent * 100).toFixed(0)}% extra (${report.testConfig.totalRunsPerSize - report.testConfig.measurementRuns} trimmed)` - ); - lines.push(`- Total runs per size: ${report.testConfig.totalRunsPerSize}`); - - if (report.systemWarnings.length > 0) { - lines.push(""); - lines.push("### System Warnings"); - lines.push(""); - report.systemWarnings.forEach((w) => lines.push(`- ${w}`)); - } - - lines.push(""); - lines.push("## Results"); - lines.push(""); - lines.push( - "| Benchmark | Timings ms (median) | Variance (CV%) | Per-item ms | Memory ΔMB | Slope ratio | R² | Baseline per-item max | Baseline slope ratio | Baseline R² |" - ); - lines.push("| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |"); - - for (const result of report.results) { - const baseline = report.baseline?.[result.name]; - const baselinePerItem = baseline ? formatNumber(baseline.perItemMsAtMax, 7) : "n/a"; - const baselineSlope = baseline ? formatNumber(baseline.slopeRatio, 4) : "n/a"; - const baselineR2 = baseline?.r2 !== undefined ? formatNumber(baseline.r2, 4) : "n/a"; - const avgCV = - result.timingStats.map((s) => (s.stdDev / s.mean) * 100).reduce((sum, cv) => sum + cv, 0) / - result.timingStats.length; - - lines.push( - [ - result.name, - formatList(result.timingsMs, 2), - formatNumber(avgCV, 1) + "% " + cvEmoji(avgCV), - formatList(result.perItemMs, 7), - formatList(result.memoryDeltaMB, 2), - formatNumber(result.slopeRatio, 4) + " " + slopeRatioEmoji(result.slopeRatio), - formatNumber(result.r2, 4) + " " + r2Emoji(result.r2), - baselinePerItem, - baselineSlope, - baselineR2, - ].join(" | ") - ); - } - - lines.push(""); - lines.push("## Legend"); - lines.push(""); - lines.push("### Slope ratio"); - lines.push(""); - lines.push( - "Ratio of the last slope segment to the first. A perfectly linear O(n) function scores 1.0." - ); - lines.push(""); - lines.push("| Indicator | Range | Meaning |"); - lines.push("| --- | --- | --- |"); - lines.push("| 🟢 | ≤ 2.0 | Consistent with O(n) linear scaling |"); - lines.push("| 🟡 | 2.0 – 4.0 | Suspicious — possible mild super-linear growth |"); - lines.push("| 🔴 | > 4.0 | Clearly non-linear (O(n²) or worse) |"); - lines.push(""); - lines.push("### CV% (Coefficient of Variation)"); - lines.push(""); - lines.push("Average CV across all measured sizes. Measures measurement stability."); - lines.push(""); - lines.push("| Indicator | Range | Meaning |"); - lines.push("| --- | --- | --- |"); - lines.push("| 🟢 | ≤ 5% | Stable — measurements are repeatable |"); - lines.push("| 🟡 | 5% – 15% | Acceptable for Node.js |"); - lines.push("| 🔴 | > 15% | High noise — results unreliable |"); - lines.push(""); - lines.push("### R² (Coefficient of Determination)"); - lines.push(""); - lines.push("1.0 = medians fall perfectly on a straight line."); - lines.push(""); - lines.push("| Indicator | Range | Meaning |"); - lines.push("| --- | --- | --- |"); - lines.push("| 🟢 | ≥ 0.995 | Excellent linear fit |"); - lines.push("| 🟡 | 0.980 – 0.995 | Minor deviation from linearity |"); - lines.push("| 🔴 | < 0.980 | Clearly non-linear scaling |"); - - return lines.join("\n"); -}; - -// --------------------------------------------------------------------------- -// Synthetic data generators -// --------------------------------------------------------------------------- - -/** - * Builds an array of synthetic CDS-like entity definitions for EntityDefinitionReader benchmarks. - * Each entity has a configurable number of elements (default 10) to simulate realistic CDS models. - */ -function buildSyntheticEntities(count: number, elementsPerEntity: number = 10): any[] { - const entities: any[] = []; - for (let i = 0; i < count; i++) { - const elements: Record = {}; - // First element is always the key - elements[`id_${i}`] = { - type: "cds.UUID", - key: true, - "@HideFromDataInspector": false, - }; - for (let j = 1; j < elementsPerEntity; j++) { - elements[`field_${i}_${j}`] = { - type: j % 3 === 0 ? "cds.Integer" : j % 3 === 1 ? "cds.String" : "cds.Boolean", - key: false, - length: j % 3 === 1 ? 255 : undefined, - default: j % 5 === 0 ? { val: "default" } : undefined, - notNull: j % 4 === 0, - "@PersonalData.IsPotentiallySensitive": j % 7 === 0, - "@Core.Computed": j % 9 === 0, - "@HideFromDataInspector": false, - }; - } - // Add a hidden element (should be filtered out) - elements[`hidden_${i}`] = { - type: "cds.String", - "@HideFromDataInspector": true, - }; - // Add an association (should be filtered out) - elements[`assoc_${i}`] = { - type: "cds.Association", - }; - - entities.push({ - name: `perf.test.Entity_${i}`, - "@title": i % 3 === 0 ? `Entity ${i} Title` : undefined, - "@HideFromDataInspector": false, - elements, - // Simulate the CsnRuntimeExtensions properties - get dataSource4DataInspector() { - return i % 2 === 0 ? "db" : "service"; - }, - get keyElements4DataInspector() { - return [`id_${i}`]; - }, - }); - } - return entities; -} - -/** - * Builds an array of synthetic database records for DataReader response-transformation benchmarks. - */ -function buildSyntheticRecords(count: number, fieldsPerRecord: number = 10): any[] { - const records: any[] = []; - for (let i = 0; i < count; i++) { - const record: Record = { id: `uuid-${i}` }; - for (let j = 1; j < fieldsPerRecord; j++) { - record[`field_${j}`] = j % 3 === 0 ? i * j : j % 3 === 1 ? `value_${i}_${j}` : i % 2 === 0; - } - records.push(record); - } - // Simulate the CDS $count property on the array - (records as any).$count = count; - return records; -} - -/** - * Creates a mock cds.Request object for EntityDefinitionReader.read() benchmarks. - * Simulates a collection GET with $select=* and optional $filter. - */ -function buildEntityDefinitionRequest(options?: { - filter?: string; - orderby?: string; - skip?: number; - top?: number; -}): any { - const columns = ["*"]; - const req: any = { - params: [], - query: { - SELECT: { - columns, - count: true, - orderBy: options?.orderby - ? [{ ref: [options.orderby.split(" ")[0]], sort: options.orderby.split(" ")[1] || "asc" }] - : undefined, - }, - }, - req: { - query: { - $filter: options?.filter, - $orderby: options?.orderby, - $skip: options?.skip !== undefined ? String(options.skip) : undefined, - $top: options?.top !== undefined ? String(options.top) : undefined, - }, - }, - reject: (code: number, msg: string) => { - throw new Error(`Request rejected: ${code} ${msg}`); - }, - }; - return req; -} - -/** - * Creates a mock cds.Request object for DataReader.read() response-construction benchmarks. - */ -function buildDataReadRequest(entityName: string): any { - const columns = ["*"]; - return { - params: [], - query: { - SELECT: { - columns, - count: true, - }, - }, - req: { - query: { - $filter: `entityName = '${entityName}'`, - $skip: "0", - $top: "1000", - }, - }, - reject: (code: number, msg: string) => { - throw new Error(`Request rejected: ${code} ${msg}`); - }, - }; -} - -// --------------------------------------------------------------------------- -// Benchmark runner -// --------------------------------------------------------------------------- -const benchmarkSync = (name: string, runFn: (size: number) => void): BenchmarkResult => { - const timingsMs: number[] = []; - const timingStats: MeasurementStats[] = []; - const memoryDeltaMB: number[] = []; - - console.log(` Benchmarking ${name}...`); - for (const size of sizes) { - const totalRuns = MEASUREMENT_RUNS + Math.ceil(MEASUREMENT_RUNS * OUTLIER_TRIM_PERCENT); - process.stdout.write(` Size ${size}: warmup (${WARMUP_RUNS} runs)...`); - - for (let w = 0; w < WARMUP_RUNS; w++) { - runFn(size); - } - - process.stdout.write(` measuring (${totalRuns} runs)...`); - const measurement = measureSync(() => runFn(size), MEASUREMENT_RUNS); - - timingsMs.push(measurement.stats.median); - timingStats.push(measurement.stats); - memoryDeltaMB.push(measurement.memoryDeltaMB); - - const cv = (measurement.stats.stdDev / measurement.stats.mean) * 100; - const cvWarning = cv > 20 ? " ! HIGH VARIANCE" : ""; - console.log( - ` ✓ (${measurement.stats.median.toFixed(2)}ms ±${measurement.stats.confidenceInterval.toFixed(2)}ms, CV: ${cv.toFixed(1)}%${cvWarning})` - ); - } - - const perItemMs = timingsMs.map((time, index) => time / sizes[index]); - const slopes = computeSlopes(timingsMs, sizes); - const slopeRatio = slopes.length >= 2 ? slopes[slopes.length - 1] / slopes[0] : 1; - const r2 = computeR2(timingsMs, sizes); - - return { - name, - sizes: [...sizes], - timingsMs, - timingStats, - perItemMs, - slopes, - slopeRatio, - r2, - memoryDeltaMB, - }; -}; - -const benchmarkAsync = async ( - name: string, - runFn: (size: number) => Promise -): Promise => { - const timingsMs: number[] = []; - const timingStats: MeasurementStats[] = []; - const memoryDeltaMB: number[] = []; - - console.log(` Benchmarking ${name}...`); - for (const size of sizes) { - const totalRuns = MEASUREMENT_RUNS + Math.ceil(MEASUREMENT_RUNS * OUTLIER_TRIM_PERCENT); - process.stdout.write(` Size ${size}: warmup (${WARMUP_RUNS} runs)...`); - - for (let w = 0; w < WARMUP_RUNS; w++) { - await runFn(size); - } - - process.stdout.write(` measuring (${totalRuns} runs)...`); - const measurement = await measureAsync(() => runFn(size), MEASUREMENT_RUNS); - - timingsMs.push(measurement.stats.median); - timingStats.push(measurement.stats); - memoryDeltaMB.push(measurement.memoryDeltaMB); - - const cv = (measurement.stats.stdDev / measurement.stats.mean) * 100; - const cvWarning = cv > 20 ? " ! HIGH VARIANCE" : ""; - console.log( - ` ✓ (${measurement.stats.median.toFixed(2)}ms ±${measurement.stats.confidenceInterval.toFixed(2)}ms, CV: ${cv.toFixed(1)}%${cvWarning})` - ); - } - - const perItemMs = timingsMs.map((time, index) => time / sizes[index]); - const slopes = computeSlopes(timingsMs, sizes); - const slopeRatio = slopes.length >= 2 ? slopes[slopes.length - 1] / slopes[0] : 1; - const r2 = computeR2(timingsMs, sizes); - - return { - name, - sizes: [...sizes], - timingsMs, - timingStats, - perItemMs, - slopes, - slopeRatio, - r2, - memoryDeltaMB, - }; -}; - // --------------------------------------------------------------------------- // Test suite // --------------------------------------------------------------------------- describePerf("Performance - Data Inspector Processing", function () { this.timeout(300000); // 5 minutes - // Pre-built synthetic data per size + /** Pre-built synthetic data per size (populated in before hook). */ const entitiesBySize = new Map(); const recordsBySize = new Map(); let report: Report; - // Load CDS model from the test project so cds.model, cds.parse, cds.ql are available + // Load CDS model so cds.model, cds.parse, cds.ql are available before(async function () { const csn = await cds.load(path.resolve(__dirname, "..", "..")); cds.model = cds.compile.for.nodejs(csn); + if (!UPDATE_BASELINE && !fs.existsSync(BASELINE_PATH)) { const isCI = process.env.CI === "true" || !!process.env.GITHUB_ACTIONS; const message = isCI @@ -632,14 +84,15 @@ describePerf("Performance - Data Inspector Processing", function () { } }); + // Pre-generate synthetic data for all sizes before(() => { - // Pre-generate synthetic data for all sizes for (const size of sizes) { entitiesBySize.set(size, buildSyntheticEntities(size)); recordsBySize.set(size, buildSyntheticRecords(size)); } }); + // Write reports and optionally update baseline after all benchmarks after(() => { if (!report) return; @@ -672,12 +125,11 @@ describePerf("Performance - Data Inspector Processing", function () { it("should keep local processing roughly linear", async () => { const results: BenchmarkResult[] = []; - // ----------------------------------------------------------------------- + // ------------------------------------------------------------------- // Group A: EntityDefinitionReader — pure in-memory, no DB - // ----------------------------------------------------------------------- + // ------------------------------------------------------------------- - // A1: EntityDefinitionReader.read() — collection request (filter + sort + paginate + build response) - // We mock cds.model.all() to return our synthetic entities. + // A1: Collection read — iterate entities, build metadata, paginate, sort results.push( benchmarkSync("EntityDefinitionReader.read (collection)", (size) => { const entities = entitiesBySize.get(size)!; @@ -698,7 +150,7 @@ describePerf("Performance - Data Inspector Processing", function () { }) ); - // A2: EntityDefinitionReader.read() — collection request with $filter contains + // A2: Collection read with $filter — measures filter parsing overhead results.push( benchmarkSync("EntityDefinitionReader.read (filtered)", (size) => { const entities = entitiesBySize.get(size)!; @@ -722,11 +174,9 @@ describePerf("Performance - Data Inspector Processing", function () { }) ); - // A3: EntityDefinitionReader._getEntityElements() — isolated element extraction - // We call the reader with a single entity request to measure per-entity element processing + // A3: Element extraction — one entity with N elements (N = 10→1000) results.push( benchmarkSync("EntityDefinitionReader._getEntityElements (via read)", (size) => { - // Build one entity with 'size' elements to measure element iteration scaling const entity = buildSyntheticEntities(1, size)[0]; const entities = [entity]; const originalAll = cds.model.all; @@ -740,11 +190,7 @@ describePerf("Performance - Data Inspector Processing", function () { const reader = new EntityDefinitionReader(); const req: any = { params: [{ name: entity.name }], - query: { - SELECT: { - columns: ["*"], - }, - }, + query: { SELECT: { columns: ["*"] } }, req: { query: {} }, reject: (code: number, msg: string) => { throw new Error(`${code} ${msg}`); @@ -757,93 +203,66 @@ describePerf("Performance - Data Inspector Processing", function () { }) ); - // ----------------------------------------------------------------------- + // ------------------------------------------------------------------- // Group B: DataReader — response construction (DB stubbed) - // ----------------------------------------------------------------------- + // ------------------------------------------------------------------- - // B1: DataReader response construction — _constructRecordKey + response loop - // We isolate the response-building portion by directly invoking the private methods - // through a controlled flow. We stub dataSource.run() to return pre-built records. + // B1: Response loop — entity resolution, key construction, record transformation results.push( await benchmarkAsync("DataReader.read (response construction, DB stubbed)", async (size) => { const records = recordsBySize.get(size)!; const entityName = "perf.test.Entity_0"; - // Build a synthetic entity definition - const syntheticEntity: any = { - name: entityName, - "@HideFromDataInspector": false, - "@cds.query.limit.default": 1000, - "@cds.query.limit.max": 1000, - elements: { - id: { type: "cds.UUID", key: true }, - ...Object.fromEntries( - Array.from({ length: 9 }, (_, j) => [ - `field_${j + 1}`, - { type: "cds.String", key: false }, - ]) - ), - }, - get keyElements4DataInspector() { - return ["id"]; - }, - get dataSource4DataInspector() { - return "db"; - }, - }; + const syntheticEntity = buildSyntheticEntityForDataReader(entityName); - // Mock cds.model.all to return our synthetic entity + // Stub cds.model.all const originalAll = cds.model.all; cds.model.all = ((kind: string) => { - if (kind === "entity") { - return [syntheticEntity]; - } + if (kind === "entity") return [syntheticEntity]; if (kind === "service") return []; return originalAll.call(cds.model, kind); }) as any; - // Mock cds.services.db.run to return our synthetic records + // Stub cds.services.db.run → return synthetic records const originalDb = cds.services.db; - const mockDb = { + (cds.services as any).db = { run: async () => { const result = [...records]; (result as any).$count = records.length; return result; }, }; - (cds.services as any).db = mockDb; - // Mock cds.ql.SELECT to return a chainable builder + // Stub cds.ql.SELECT → chainable builder const originalQL = cds.ql; - const mockSelect = { - from: () => { - const builder: any = { - columns: () => builder, - where: () => builder, - orderBy: () => builder, - limit: (l: number, o: number) => { - builder.SELECT = { limit: { offset: { val: o } }, count: true }; - return builder; - }, - SELECT: { limit: { offset: { val: 0 } }, count: true }, - }; - return builder; + (cds as any).ql = { + ...originalQL, + SELECT: { + from: () => { + const builder: any = { + columns: () => builder, + where: () => builder, + orderBy: () => builder, + limit: (l: number, o: number) => { + builder.SELECT = { limit: { offset: { val: o } }, count: true }; + return builder; + }, + SELECT: { limit: { offset: { val: 0 } }, count: true }, + }; + return builder; + }, }, }; - (cds as any).ql = { ...originalQL, SELECT: mockSelect }; - // Mock cds.parse.expr + // Stub cds.parse.expr const originalParse = cds.parse; (cds as any).parse = { ...originalParse, - expr: (expr: string) => ({ + expr: () => ({ xpr: [{ ref: ["entityName"] }, "=", { val: entityName }], }), }; - // Mock audit-log: cds.env.requires does not include audit-log by default - const originalEnv = cds.env; - try { const reader = new DataReader(); const req = buildDataReadRequest(entityName); @@ -857,36 +276,25 @@ describePerf("Performance - Data Inspector Processing", function () { }) ); - // B2: DataReader._emitAuditlogs — audit log emission with stubbed service + // B2: Audit log emission — sensitive data fields, stubbed audit-log service results.push( await benchmarkAsync("DataReader._emitAuditlogs (stubbed audit-log)", async (size) => { const records = recordsBySize.get(size)!; - // Build a synthetic entity with sensitive elements const syntheticEntity: any = { name: "perf.test.SensitiveEntity", "@PersonalData.DataSubjectRole": "Customer", elements: { id: { type: "cds.UUID", key: true }, - email: { - type: "cds.String", - key: false, - "@PersonalData.IsPotentiallySensitive": true, - }, - phone: { - type: "cds.String", - key: false, - "@PersonalData.IsPotentiallySensitive": true, - }, + email: { type: "cds.String", key: false, "@PersonalData.IsPotentiallySensitive": true }, + phone: { type: "cds.String", key: false, "@PersonalData.IsPotentiallySensitive": true }, name: { type: "cds.String", key: false }, }, get keyElements4DataInspector() { return ["id"]; }, - // _service is undefined for db entities => audit logging is triggered }; - // Build records that include sensitive fields const sensitiveRecords = records.map((r: any) => ({ ...r, email: `user_${r.id}@example.com`, @@ -894,26 +302,24 @@ describePerf("Performance - Data Inspector Processing", function () { name: `User ${r.id}`, })); - // Mock cds.env.requires to include audit-log + // Stub cds.env.requires to include audit-log const originalEnv = { ...cds.env }; (cds.env as any).requires = { ...cds.env.requires, "audit-log": { kind: "audit-log-to-console" }, }; - // Mock cds.connect.to to return a stubbed audit-log service + // Stub cds.connect.to → return stubbed audit-log service const originalConnect = cds.connect; - const stubbedAuditLog = { log: async () => {} }; (cds as any).connect = { ...originalConnect, to: async (serviceName: string) => { - if (serviceName === "audit-log") return stubbedAuditLog; + if (serviceName === "audit-log") return { log: async () => {} }; return originalConnect.to(serviceName); }, }; try { - // Call _emitAuditlogs directly via prototype const reader = new DataReader(); await (reader as any)._emitAuditlogs(syntheticEntity, sensitiveRecords); } finally { @@ -923,39 +329,18 @@ describePerf("Performance - Data Inspector Processing", function () { }) ); - // ----------------------------------------------------------------------- - // Build report - // ----------------------------------------------------------------------- - report = { - timestamp: new Date().toISOString(), - sizes: [...sizes], - results, - regressionThreshold: MAX_REGRESSION, - slopeVarianceThreshold: MAX_SLOPE_VARIANCE, - testConfig: { - warmupRuns: WARMUP_RUNS, - measurementRuns: MEASUREMENT_RUNS, - outlierTrimPercent: OUTLIER_TRIM_PERCENT, - totalRunsPerSize: MEASUREMENT_RUNS + Math.ceil(MEASUREMENT_RUNS * OUTLIER_TRIM_PERCENT), - }, - environment: { - node: process.version, - platform: `${process.platform} ${os.release()}`, - cpus: os.cpus()[0].model, - totalMemoryGB: os.totalmem() / 1024 ** 3, - cpuLoad: os.loadavg(), - }, - systemWarnings: checkSystemState(), - }; - - // Log system warnings + // ------------------------------------------------------------------- + // Build report and check regressions + // ------------------------------------------------------------------- + report = buildReport(results); + if (report.systemWarnings.length > 0) { console.log("\n System Warnings:"); report.systemWarnings.forEach((w) => console.log(` ! ${w}`)); console.log(""); } - // Regression check against baseline + // Load baseline and check for regressions let baseline: BaselineData | undefined; if (fs.existsSync(BASELINE_PATH)) { baseline = JSON.parse(fs.readFileSync(BASELINE_PATH, "utf8")) as BaselineData; @@ -963,40 +348,98 @@ describePerf("Performance - Data Inspector Processing", function () { } expect(results).to.have.length.greaterThan(0); + checkRegressions(results, baseline); + }); +}); - for (const result of results) { - if (baseline && !UPDATE_BASELINE) { - const entry = baseline[result.name]; - if (!entry) { - console.warn( - ` ⚠️ WARNING: ${result.name} baseline entry missing — skipping regression check` - ); - continue; - } +// --------------------------------------------------------------------------- +// Helpers (test-specific, not reusable across projects) +// --------------------------------------------------------------------------- - // Slope ratio regression check - if (entry.slopeRatio > 0.5 && result.slopeRatio > 0) { - const slopeAllowed = entry.slopeRatio * (1 + MAX_SLOPE_VARIANCE); - if (result.slopeRatio > slopeAllowed) { - console.warn( - ` ⚠️ WARNING: ${result.name} slope ratio regression: ` + - `${result.slopeRatio.toFixed(4)} > allowed ${slopeAllowed.toFixed(4)} ` + - `(baseline: ${entry.slopeRatio.toFixed(4)}, threshold: +${(MAX_SLOPE_VARIANCE * 100).toFixed(0)}%)` - ); - } - } +/** Builds the Report object from benchmark results and current environment. */ +function buildReport(results: BenchmarkResult[]): Report { + return { + timestamp: new Date().toISOString(), + sizes: [...sizes], + results, + regressionThreshold: MAX_REGRESSION, + slopeVarianceThreshold: MAX_SLOPE_VARIANCE, + testConfig: { + warmupRuns: WARMUP_RUNS, + measurementRuns: MEASUREMENT_RUNS, + outlierTrimPercent: OUTLIER_TRIM_PERCENT, + totalRunsPerSize: MEASUREMENT_RUNS + Math.ceil(MEASUREMENT_RUNS * OUTLIER_TRIM_PERCENT), + }, + environment: { + node: process.version, + platform: `${process.platform} ${os.release()}`, + cpus: os.cpus()[0].model, + totalMemoryGB: os.totalmem() / 1024 ** 3, + cpuLoad: os.loadavg(), + }, + systemWarnings: checkSystemState(), + }; +} - // Per-item time regression check - const currentPerItem = result.perItemMs[result.perItemMs.length - 1]; - const allowed = entry.perItemMsAtMax * (1 + MAX_REGRESSION); - if (currentPerItem > allowed) { - console.warn( - ` ⚠️ WARNING: ${result.name} per-item time regression: ` + - `${currentPerItem.toFixed(7)}ms > allowed ${allowed.toFixed(7)}ms ` + - `(baseline: ${entry.perItemMsAtMax.toFixed(7)}ms, threshold: +${(MAX_REGRESSION * 100).toFixed(0)}%)` - ); - } +/** + * Checks each result against the baseline and emits warnings for regressions. + * Warnings are advisory only — they do not fail the test (see strategy doc §6). + */ +function checkRegressions(results: BenchmarkResult[], baseline?: BaselineData): void { + if (!baseline || UPDATE_BASELINE) return; + + for (const result of results) { + const entry = baseline[result.name]; + if (!entry) { + console.warn( + ` ⚠️ WARNING: ${result.name} baseline entry missing — skipping regression check` + ); + continue; + } + + // Slope ratio regression + if (entry.slopeRatio > 0.5 && result.slopeRatio > 0) { + const slopeAllowed = entry.slopeRatio * (1 + MAX_SLOPE_VARIANCE); + if (result.slopeRatio > slopeAllowed) { + console.warn( + ` ⚠️ WARNING: ${result.name} slope ratio regression: ` + + `${result.slopeRatio.toFixed(4)} > allowed ${slopeAllowed.toFixed(4)} ` + + `(baseline: ${entry.slopeRatio.toFixed(4)}, threshold: +${(MAX_SLOPE_VARIANCE * 100).toFixed(0)}%)` + ); } } - }); -}); + + // Per-item time regression + const currentPerItem = result.perItemMs[result.perItemMs.length - 1]; + const allowed = entry.perItemMsAtMax * (1 + MAX_REGRESSION); + if (currentPerItem > allowed) { + console.warn( + ` ⚠️ WARNING: ${result.name} per-item time regression: ` + + `${currentPerItem.toFixed(7)}ms > allowed ${allowed.toFixed(7)}ms ` + + `(baseline: ${entry.perItemMsAtMax.toFixed(7)}ms, threshold: +${(MAX_REGRESSION * 100).toFixed(0)}%)` + ); + } + } +} + +/** Builds a synthetic entity definition for DataReader benchmarks (B1). */ +function buildSyntheticEntityForDataReader(entityName: string): any { + return { + name: entityName, + "@HideFromDataInspector": false, + "@cds.query.limit.default": 1000, + "@cds.query.limit.max": 1000, + elements: { + id: { type: "cds.UUID", key: true }, + ...Object.fromEntries( + Array.from({ length: 9 }, (_, j) => [`field_${j + 1}`, { type: "cds.String", key: false }]) + ), + }, + get keyElements4DataInspector() { + return ["id"]; + }, + get dataSource4DataInspector() { + return "db"; + }, + }; +} diff --git a/test/performance/check-baseline-drift.js b/test/performance/check-baseline-drift.js new file mode 100644 index 0000000..bab4107 --- /dev/null +++ b/test/performance/check-baseline-drift.js @@ -0,0 +1,270 @@ +#!/usr/bin/env node +// check-baseline-drift.js +// +// Detects gradual drift in the CI performance baseline across git commits. +// +// Background: each developer keeps a local `performance-baseline.json` (gitignored) +// calibrated to their own machine. The CI-managed baseline is +// `performance-baseline.ci.json`, which is committed and updated only via the +// manual `performance-rebaseline` GitHub Actions workflow. Because it lives in git, +// its history captures every time the CI environment was re-baselined, making it +// possible to detect gradual cost drift even when no single update exceeded the +// single-run regression threshold. +// +// This script reads those commits and warns when: +// - The total per-item cost increase across the examined window exceeds +// DRIFT_MAX_TOTAL_INCREASE (default 20%). +// - There are DRIFT_CONSECUTIVE_WARN (default 3) consecutive increases. +// +// Run: +// npm run test:performance:check-drift (uses CI baseline history) +// node test/performance/check-baseline-drift.js +// +// Options (env vars): +// DRIFT_BASELINE_FILE (default "test/performance/performance-baseline.ci.json"): +// git path of the baseline file to inspect. +// DRIFT_WINDOW (default 10): number of recent commits to examine. +// DRIFT_MAX_TOTAL_INCREASE (default 0.20): max allowed total increase across +// the window as a fraction (0.20 = 20%). +// DRIFT_CONSECUTIVE_WARN (default 3): number of consecutive per-item cost +// increases before emitting a warning. + +/* eslint-disable no-console */ +"use strict"; + +const { execSync } = require("child_process"); + +const BASELINE_GIT_PATH = + process.env.DRIFT_BASELINE_FILE ?? "test/performance/performance-baseline.ci.json"; +const DRIFT_WINDOW = Number(process.env.DRIFT_WINDOW ?? "10"); +const DRIFT_MAX_TOTAL = Number(process.env.DRIFT_MAX_TOTAL_INCREASE ?? "0.20"); +const DRIFT_CONSECUTIVE = Number(process.env.DRIFT_CONSECUTIVE_WARN ?? "3"); + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function run(cmd) { + try { + return execSync(cmd, { + encoding: "utf8", + stdio: ["pipe", "pipe", "pipe"], + }).trim(); + } catch { + return null; + } +} + +/** + * Returns commits that touched the baseline file, most recent first. + * Each entry: { hash: string, date: string } + */ +function getCommitHistory() { + const raw = run(`git log --follow --format="%H %aI" -- ${BASELINE_GIT_PATH}`); + if (!raw) return []; + return raw + .split("\n") + .map((line) => { + const cleaned = line.replace(/"/g, ""); + const spaceIdx = cleaned.indexOf(" "); + if (spaceIdx === -1) return null; + return { + hash: cleaned.slice(0, spaceIdx), + date: cleaned.slice(spaceIdx + 1), + }; + }) + .filter((c) => c && c.hash && c.date); +} + +/** + * Reads and parses performance-baseline.ci.json at the given commit hash. + */ +function readBaselineAtCommit(hash) { + const raw = run(`git show ${hash}:${BASELINE_GIT_PATH}`); + if (!raw) return null; + try { + return JSON.parse(raw); + } catch { + return null; + } +} + +/** + * Returns the Ordinary Least Squares slope for `values` indexed 0..n-1. + */ +function olsSlope(values) { + const n = values.length; + if (n < 2) return 0; + const meanX = (n - 1) / 2; + const meanY = values.reduce((a, b) => a + b, 0) / n; + let num = 0, + den = 0; + for (let i = 0; i < n; i++) { + const dx = i - meanX; + num += dx * (values[i] - meanY); + den += dx * dx; + } + return den === 0 ? 0 : num / den; +} + +/** + * Returns the length of the trailing run of strictly increasing values. + * E.g. [1, 2, 1, 3, 4, 5] → 3 (last three entries form an increasing run) + */ +function trailingIncreaseStreak(values) { + let count = 0; + for (let i = values.length - 1; i > 0; i--) { + if (values[i] > values[i - 1]) count++; + else break; + } + return count; +} + +/** Left-pad / right-pad helpers for table formatting. */ +const rpad = (s, w) => String(s).slice(0, w).padEnd(w); +const lpad = (s, w) => String(s).slice(0, w).padStart(w); + +// --------------------------------------------------------------------------- +// Main +// --------------------------------------------------------------------------- + +function main() { + console.log("=== Performance Baseline Drift Check ===\n"); + + const commits = getCommitHistory(); + if (commits.length === 0) { + console.log(`No git history found for: ${BASELINE_GIT_PATH}`); + console.log( + "Tip: the CI baseline is created by the `performance-rebaseline` workflow (manual trigger in GitHub Actions)." + ); + console.log( + " Run it at least twice to accumulate history. Until then, drift detection is not possible." + ); + process.exit(0); + } + + const window = commits.slice(0, DRIFT_WINDOW); // most recent first + console.log( + `Examining ${window.length} most recent commit(s) (${commits.length} total). DRIFT_WINDOW=${DRIFT_WINDOW}\n` + ); + + // Load snapshots in chronological order (oldest first) for trend analysis. + const snapshots = []; + for (const commit of [...window].reverse()) { + const data = readBaselineAtCommit(commit.hash); + if (data) snapshots.push({ ...commit, data }); + } + + if (snapshots.length < 2) { + console.log(`Only ${snapshots.length} readable snapshot(s) — need at least 2 to detect drift.`); + console.log( + "Trigger the `performance-rebaseline` workflow again to accumulate a second snapshot." + ); + process.exit(0); + } + + // Collect all known benchmark names across all snapshots. + const benchmarkNames = [...new Set(snapshots.flatMap((s) => Object.keys(s.data)))]; + + // ------------------------------------------------------------------------- + // History table: perItemMsAtMax per benchmark per commit date + // ------------------------------------------------------------------------- + const dateHeaders = snapshots.map((s) => s.date.slice(0, 10)); + const nameWidth = 36; + const colWidth = 14; + + const headerRow = + rpad("Benchmark (perItemMsAtMax)", nameWidth) + + dateHeaders.map((d) => lpad(d, colWidth)).join(""); + console.log(headerRow); + console.log("-".repeat(headerRow.length)); + + for (const name of benchmarkNames) { + const cells = snapshots.map((s) => { + const v = s.data[name]?.perItemMsAtMax; + return typeof v === "number" ? v.toExponential(3) : "n/a"; + }); + console.log(rpad(name, nameWidth) + cells.map((c) => lpad(c, colWidth)).join("")); + } + + // ------------------------------------------------------------------------- + // Drift analysis per benchmark + // ------------------------------------------------------------------------- + console.log("\n=== Drift Analysis ===\n"); + let hasViolation = false; + + for (const name of benchmarkNames) { + const values = snapshots + .map((s) => s.data[name]?.perItemMsAtMax) + .filter((v) => typeof v === "number"); + + if (values.length < 2) continue; + + const oldest = values[0]; + const latest = values[values.length - 1]; + const totalIncrease = oldest > 0 ? (latest - oldest) / oldest : 0; + const streak = trailingIncreaseStreak(values); + const slope = olsSlope(values); + // Normalized slope: fraction of oldest value per commit step. + const slopeNorm = oldest > 0 ? slope / oldest : 0; + + const issues = []; + if (totalIncrease > DRIFT_MAX_TOTAL) { + issues.push( + `total increase ${(totalIncrease * 100).toFixed(1)}% exceeds DRIFT_MAX_TOTAL_INCREASE=${(DRIFT_MAX_TOTAL * 100).toFixed(0)}%` + ); + hasViolation = true; + } + if (streak >= DRIFT_CONSECUTIVE) { + // Streak warnings are advisory only — not violations (could be noise). + issues.push( + `${streak} consecutive increases (DRIFT_CONSECUTIVE_WARN=${DRIFT_CONSECUTIVE}) — investigate, may be noise` + ); + } + + const tag = + issues.length > 0 && totalIncrease > DRIFT_MAX_TOTAL + ? "FAIL" + : issues.length > 0 + ? "WARN" + : slopeNorm > 0 + ? "info" + : "ok "; + + console.log( + `[${tag}] ${rpad(name, nameWidth - 7)}` + + ` total=${lpad((totalIncrease * 100).toFixed(1) + "%", 7)}` + + ` streak=${streak}` + + ` slope=${slopeNorm >= 0 ? "+" : ""}${(slopeNorm * 100).toFixed(2)}%/commit` + ); + for (const issue of issues) { + console.log(` └─ ${issue}`); + } + } + + console.log("\n--- Thresholds ---"); + console.log( + ` DRIFT_MAX_TOTAL_INCREASE = ${(DRIFT_MAX_TOTAL * 100).toFixed(0)}% (set via env var)` + ); + console.log( + ` DRIFT_CONSECUTIVE_WARN = ${DRIFT_CONSECUTIVE} consecutive increases (advisory, not a violation)` + ); + console.log( + "\nTo re-baseline after an intentional performance change: trigger the `performance-rebaseline` workflow in GitHub Actions." + ); + console.log( + " Developers: keep your local `performance-baseline.json` up to date with `npm run test:performance:update-baseline`." + ); + + if (hasViolation) { + console.log( + "\n[WARN] Baseline drift exceeds threshold(s). Either optimize the affected code path" + + " and update the baseline, or raise DRIFT_MAX_TOTAL_INCREASE if the increase is intentional." + ); + } else { + console.log("\n[PASS] No significant drift detected."); + } + process.exit(0); +} + +main(); diff --git a/test/performance/helpers/index.ts b/test/performance/helpers/index.ts new file mode 100644 index 0000000..ba43bd6 --- /dev/null +++ b/test/performance/helpers/index.ts @@ -0,0 +1,12 @@ +/** + * Barrel export for performance test helpers. + * + * Usage in test files: + * import { benchmarkSync, buildSyntheticEntities, ... } from "./helpers"; + */ + +export * from "./types"; +export * from "./statistics"; +export * from "./measurement"; +export * from "./reporting"; +export * from "./synthetic-data"; diff --git a/test/performance/helpers/measurement.ts b/test/performance/helpers/measurement.ts new file mode 100644 index 0000000..f74acd8 --- /dev/null +++ b/test/performance/helpers/measurement.ts @@ -0,0 +1,259 @@ +/** + * Benchmark measurement infrastructure. + * + * Provides timing functions (sync/async), outlier trimming, slope computation, + * R² linear fit analysis, and high-level benchmark runners that orchestrate + * warmup → measure → analyze across multiple input sizes. + */ + +import { performance } from "perf_hooks"; +import type { MeasurementStats, BenchmarkResult } from "./types"; +import { mean, calculateStats } from "./statistics"; + +// --------------------------------------------------------------------------- +// Configuration (env-overridable defaults) +// --------------------------------------------------------------------------- +const MEASUREMENT_RUNS = Number(process.env.PERF_MEASUREMENT_RUNS ?? "20"); +const OUTLIER_TRIM_PERCENT = Number(process.env.PERF_OUTLIER_TRIM_PERCENT ?? "0.5"); +const WARMUP_RUNS = Number(process.env.PERF_WARMUP_RUNS ?? "10"); + +/** Default input sizes used across all benchmarks. */ +export const sizes = [10, 50, 100, 500, 1000]; + +// --------------------------------------------------------------------------- +// Low-level timing +// --------------------------------------------------------------------------- + +/** Result of a single measurement pass (one input size). */ +type MeasurementResult = { + timings: number[]; + stats: MeasurementStats; + /** Heap delta in MB across all runs. */ + memoryDeltaMB: number; +}; + +/** + * Times an async function `runs` times (plus extra runs for outlier trimming). + * Returns trimmed timings, descriptive stats, and heap memory delta. + */ +export const measureAsync = async ( + fn: () => Promise, + runs: number = MEASUREMENT_RUNS +): Promise => { + const extraRuns = Math.ceil(runs * OUTLIER_TRIM_PERCENT); + const totalRuns = runs + extraRuns; + const allTimings: number[] = []; + const memBefore = process.memoryUsage(); + + for (let i = 0; i < totalRuns; i++) { + const start = performance.now(); + await fn(); + const end = performance.now(); + allTimings.push(end - start); + } + + const memAfter = process.memoryUsage(); + const memoryDeltaMB = (memAfter.heapUsed - memBefore.heapUsed) / (1024 * 1024); + + return { ...trimOutliers(allTimings, runs), memoryDeltaMB }; +}; + +/** + * Times a synchronous function `runs` times (plus extra runs for outlier trimming). + * Returns trimmed timings, descriptive stats, and heap memory delta. + */ +export const measureSync = (fn: () => void, runs: number = MEASUREMENT_RUNS): MeasurementResult => { + const extraRuns = Math.ceil(runs * OUTLIER_TRIM_PERCENT); + const totalRuns = runs + extraRuns; + const allTimings: number[] = []; + const memBefore = process.memoryUsage(); + + for (let i = 0; i < totalRuns; i++) { + const start = performance.now(); + fn(); + const end = performance.now(); + allTimings.push(end - start); + } + + const memAfter = process.memoryUsage(); + const memoryDeltaMB = (memAfter.heapUsed - memBefore.heapUsed) / (1024 * 1024); + + return { ...trimOutliers(allTimings, runs), memoryDeltaMB }; +}; + +/** + * Removes outliers by keeping the `keep` values closest to the preliminary mean. + * Returns the trimmed, sorted timings and their stats. + */ +function trimOutliers( + allTimings: number[], + keep: number +): { timings: number[]; stats: MeasurementStats } { + const preliminaryMean = mean(allTimings); + const timingsWithDistance = allTimings.map((timing) => ({ + timing, + distance: Math.abs(timing - preliminaryMean), + })); + timingsWithDistance.sort((a, b) => a.distance - b.distance); + const trimmedTimings = timingsWithDistance + .slice(0, keep) + .map((t) => t.timing) + .sort((a, b) => a - b); + + return { timings: trimmedTimings, stats: calculateStats(trimmedTimings) }; +} + +// --------------------------------------------------------------------------- +// Scaling analysis +// --------------------------------------------------------------------------- + +/** + * Computes the slope (Δtime / Δsize) between each consecutive pair of sizes. + * Returns an array of length `times.length - 1`. + */ +export const computeSlopes = (times: number[], sizeValues: number[]): number[] => { + const slopes: number[] = []; + for (let i = 1; i < times.length; i++) { + const deltaT = times[i] - times[i - 1]; + const deltaN = sizeValues[i] - sizeValues[i - 1]; + slopes.push(deltaT / deltaN); + } + return slopes; +}; + +/** + * Computes R² (coefficient of determination) for a linear least-squares fit + * of `times` vs `sizeValues`. Returns 1.0 for a perfect straight line. + */ +export const computeR2 = (times: number[], sizeValues: number[]): number => { + const n = times.length; + if (n < 2) return 1; + const meanX = sizeValues.reduce((sum, x) => sum + x, 0) / n; + const meanY = times.reduce((sum, y) => sum + y, 0) / n; + let numerator = 0; + let denominator = 0; + for (let i = 0; i < n; i++) { + const dx = sizeValues[i] - meanX; + numerator += dx * (times[i] - meanY); + denominator += dx * dx; + } + const slope = denominator === 0 ? 0 : numerator / denominator; + const intercept = meanY - slope * meanX; + let ssRes = 0; + let ssTot = 0; + for (let i = 0; i < n; i++) { + const predicted = slope * sizeValues[i] + intercept; + ssRes += (times[i] - predicted) ** 2; + ssTot += (times[i] - meanY) ** 2; + } + return ssTot === 0 ? 1 : 1 - ssRes / ssTot; +}; + +// --------------------------------------------------------------------------- +// High-level benchmark runners +// --------------------------------------------------------------------------- + +/** + * Runs a synchronous benchmark across all input sizes. + * For each size: warmup → measure → compute per-item cost, slopes, R². + * Logs progress to stdout. + */ +export const benchmarkSync = (name: string, runFn: (size: number) => void): BenchmarkResult => { + const timingsMs: number[] = []; + const timingStats: MeasurementStats[] = []; + const memoryDeltaMB: number[] = []; + + console.log(` Benchmarking ${name}...`); + for (const size of sizes) { + const totalRuns = MEASUREMENT_RUNS + Math.ceil(MEASUREMENT_RUNS * OUTLIER_TRIM_PERCENT); + process.stdout.write(` Size ${size}: warmup (${WARMUP_RUNS} runs)...`); + + for (let w = 0; w < WARMUP_RUNS; w++) { + runFn(size); + } + + process.stdout.write(` measuring (${totalRuns} runs)...`); + const measurement = measureSync(() => runFn(size), MEASUREMENT_RUNS); + + timingsMs.push(measurement.stats.median); + timingStats.push(measurement.stats); + memoryDeltaMB.push(measurement.memoryDeltaMB); + + const cv = (measurement.stats.stdDev / measurement.stats.mean) * 100; + const cvWarning = cv > 20 ? " ! HIGH VARIANCE" : ""; + console.log( + ` ✓ (${measurement.stats.median.toFixed(2)}ms ±${measurement.stats.confidenceInterval.toFixed(2)}ms, CV: ${cv.toFixed(1)}%${cvWarning})` + ); + } + + const perItemMs = timingsMs.map((time, index) => time / sizes[index]); + const slopes = computeSlopes(timingsMs, sizes); + const slopeRatio = slopes.length >= 2 ? slopes[slopes.length - 1] / slopes[0] : 1; + const r2 = computeR2(timingsMs, sizes); + + return { + name, + sizes: [...sizes], + timingsMs, + timingStats, + perItemMs, + slopes, + slopeRatio, + r2, + memoryDeltaMB, + }; +}; + +/** + * Runs an async benchmark across all input sizes. + * For each size: warmup → measure → compute per-item cost, slopes, R². + * Logs progress to stdout. + */ +export const benchmarkAsync = async ( + name: string, + runFn: (size: number) => Promise +): Promise => { + const timingsMs: number[] = []; + const timingStats: MeasurementStats[] = []; + const memoryDeltaMB: number[] = []; + + console.log(` Benchmarking ${name}...`); + for (const size of sizes) { + const totalRuns = MEASUREMENT_RUNS + Math.ceil(MEASUREMENT_RUNS * OUTLIER_TRIM_PERCENT); + process.stdout.write(` Size ${size}: warmup (${WARMUP_RUNS} runs)...`); + + for (let w = 0; w < WARMUP_RUNS; w++) { + await runFn(size); + } + + process.stdout.write(` measuring (${totalRuns} runs)...`); + const measurement = await measureAsync(() => runFn(size), MEASUREMENT_RUNS); + + timingsMs.push(measurement.stats.median); + timingStats.push(measurement.stats); + memoryDeltaMB.push(measurement.memoryDeltaMB); + + const cv = (measurement.stats.stdDev / measurement.stats.mean) * 100; + const cvWarning = cv > 20 ? " ! HIGH VARIANCE" : ""; + console.log( + ` ✓ (${measurement.stats.median.toFixed(2)}ms ±${measurement.stats.confidenceInterval.toFixed(2)}ms, CV: ${cv.toFixed(1)}%${cvWarning})` + ); + } + + const perItemMs = timingsMs.map((time, index) => time / sizes[index]); + const slopes = computeSlopes(timingsMs, sizes); + const slopeRatio = slopes.length >= 2 ? slopes[slopes.length - 1] / slopes[0] : 1; + const r2 = computeR2(timingsMs, sizes); + + return { + name, + sizes: [...sizes], + timingsMs, + timingStats, + perItemMs, + slopes, + slopeRatio, + r2, + memoryDeltaMB, + }; +}; diff --git a/test/performance/helpers/reporting.ts b/test/performance/helpers/reporting.ts new file mode 100644 index 0000000..487b99e --- /dev/null +++ b/test/performance/helpers/reporting.ts @@ -0,0 +1,161 @@ +/** + * Performance report generation. + * + * Builds a human-readable Markdown report from benchmark results, + * including environment info, configuration, results table with + * emoji-coded indicators, and a legend. + */ + +import type { Report } from "./types"; + +// --------------------------------------------------------------------------- +// Emoji indicators for report table cells +// --------------------------------------------------------------------------- + +/** Slope ratio: 🟢 ≤2.0 (linear), 🟡 2–4 (suspicious), 🔴 >4 (non-linear). */ +export const slopeRatioEmoji = (ratio: number): string => { + if (ratio <= 2.0) return "🟢"; + if (ratio <= 4.0) return "🟡"; + return "🔴"; +}; + +/** R²: 🟢 ≥0.995 (excellent), 🟡 0.98–0.995, 🔴 <0.98. */ +export const r2Emoji = (r2: number): string => { + if (r2 >= 0.995) return "🟢"; + if (r2 >= 0.98) return "🟡"; + return "🔴"; +}; + +/** CV%: 🟢 ≤5% (stable), 🟡 5–15%, 🔴 >15% (noisy). */ +export const cvEmoji = (cv: number): string => { + if (cv <= 5) return "🟢"; + if (cv <= 15) return "🟡"; + return "🔴"; +}; + +// --------------------------------------------------------------------------- +// Formatting helpers +// --------------------------------------------------------------------------- + +/** Format a number to fixed decimal places. */ +const formatNumber = (value: number, digits: number): string => value.toFixed(digits); + +/** Format an array of numbers as a comma-separated string. */ +const formatList = (values: number[], digits: number): string => + values.map((v) => formatNumber(v, digits)).join(", "); + +// --------------------------------------------------------------------------- +// Markdown report builder +// --------------------------------------------------------------------------- + +/** + * Builds a complete Markdown performance report. + * + * Sections: Environment, Test Configuration, System Warnings, + * Results table (with baseline comparison columns), and Legend. + */ +export const buildMarkdownReport = (report: Report): string => { + const lines: string[] = []; + lines.push(`# Performance Report (${report.timestamp})`); + lines.push(""); + + // --- Environment --- + lines.push("## Environment"); + lines.push(""); + lines.push(`- Node: ${report.environment.node}`); + lines.push(`- Platform: ${report.environment.platform}`); + lines.push(`- CPU: ${report.environment.cpus}`); + lines.push(`- Memory: ${report.environment.totalMemoryGB.toFixed(1)} GB`); + lines.push(`- CPU Load: ${report.environment.cpuLoad.map((l) => l.toFixed(2)).join(", ")}`); + lines.push(""); + + // --- Test Configuration --- + lines.push("## Test Configuration"); + lines.push(""); + lines.push(`- Warmup runs: ${report.testConfig.warmupRuns}`); + lines.push(`- Measurement runs: ${report.testConfig.measurementRuns}`); + lines.push( + `- Outlier trim: ${(report.testConfig.outlierTrimPercent * 100).toFixed(0)}% extra (${report.testConfig.totalRunsPerSize - report.testConfig.measurementRuns} trimmed)` + ); + lines.push(`- Total runs per size: ${report.testConfig.totalRunsPerSize}`); + + // --- System Warnings --- + if (report.systemWarnings.length > 0) { + lines.push(""); + lines.push("### System Warnings"); + lines.push(""); + report.systemWarnings.forEach((w) => lines.push(`- ${w}`)); + } + + // --- Results table --- + lines.push(""); + lines.push("## Results"); + lines.push(""); + lines.push( + "| Benchmark | Timings ms (median) | Variance (CV%) | Per-item ms | Memory ΔMB | Slope ratio | R² | Baseline per-item max | Baseline slope ratio | Baseline R² |" + ); + lines.push("| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |"); + + for (const result of report.results) { + const baseline = report.baseline?.[result.name]; + const baselinePerItem = baseline ? formatNumber(baseline.perItemMsAtMax, 7) : "n/a"; + const baselineSlope = baseline ? formatNumber(baseline.slopeRatio, 4) : "n/a"; + const baselineR2 = baseline?.r2 !== undefined ? formatNumber(baseline.r2, 4) : "n/a"; + const avgCV = + result.timingStats.map((s) => (s.stdDev / s.mean) * 100).reduce((sum, cv) => sum + cv, 0) / + result.timingStats.length; + + lines.push( + [ + result.name, + formatList(result.timingsMs, 2), + formatNumber(avgCV, 1) + "% " + cvEmoji(avgCV), + formatList(result.perItemMs, 7), + formatList(result.memoryDeltaMB, 2), + formatNumber(result.slopeRatio, 4) + " " + slopeRatioEmoji(result.slopeRatio), + formatNumber(result.r2, 4) + " " + r2Emoji(result.r2), + baselinePerItem, + baselineSlope, + baselineR2, + ].join(" | ") + ); + } + + // --- Legend --- + lines.push(""); + lines.push("## Legend"); + lines.push(""); + lines.push("### Slope ratio"); + lines.push(""); + lines.push( + "Ratio of the last slope segment to the first. A perfectly linear O(n) function scores 1.0." + ); + lines.push(""); + lines.push("| Indicator | Range | Meaning |"); + lines.push("| --- | --- | --- |"); + lines.push("| 🟢 | ≤ 2.0 | Consistent with O(n) linear scaling |"); + lines.push("| 🟡 | 2.0 – 4.0 | Suspicious — possible mild super-linear growth |"); + lines.push("| 🔴 | > 4.0 | Clearly non-linear (O(n²) or worse) |"); + lines.push(""); + lines.push("### CV% (Coefficient of Variation)"); + lines.push(""); + lines.push("Average CV across all measured sizes. Measures measurement stability."); + lines.push(""); + lines.push("| Indicator | Range | Meaning |"); + lines.push("| --- | --- | --- |"); + lines.push("| 🟢 | ≤ 5% | Stable — measurements are repeatable |"); + lines.push("| 🟡 | 5% – 15% | Acceptable for Node.js |"); + lines.push("| 🔴 | > 15% | High noise — results unreliable |"); + lines.push(""); + lines.push("### R² (Coefficient of Determination)"); + lines.push(""); + lines.push("1.0 = medians fall perfectly on a straight line."); + lines.push(""); + lines.push("| Indicator | Range | Meaning |"); + lines.push("| --- | --- | --- |"); + lines.push("| 🟢 | ≥ 0.995 | Excellent linear fit |"); + lines.push("| 🟡 | 0.980 – 0.995 | Minor deviation from linearity |"); + lines.push("| 🔴 | < 0.980 | Clearly non-linear scaling |"); + + return lines.join("\n"); +}; diff --git a/test/performance/helpers/statistics.ts b/test/performance/helpers/statistics.ts new file mode 100644 index 0000000..a5952b0 --- /dev/null +++ b/test/performance/helpers/statistics.ts @@ -0,0 +1,70 @@ +/** + * Statistical functions for performance measurement analysis. + * + * Provides basic descriptive statistics (median, mean, standard deviation), + * composite stats calculation, and system health checks. + */ + +import os from "os"; +import type { MeasurementStats } from "./types"; + +/** Returns the median of a numeric array. */ +export const median = (values: number[]): number => { + const sorted = [...values].sort((a, b) => a - b); + const mid = Math.floor(sorted.length / 2); + return sorted.length % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid]; +}; + +/** Returns the arithmetic mean of a numeric array. */ +export const mean = (values: number[]): number => + values.reduce((sum, val) => sum + val, 0) / values.length; + +/** Returns the population standard deviation of a numeric array. */ +export const stdDev = (values: number[]): number => { + const avg = mean(values); + const squareDiffs = values.map((value) => Math.pow(value - avg, 2)); + return Math.sqrt(mean(squareDiffs)); +}; + +/** + * Computes full descriptive statistics for a set of timing values. + * Includes median, mean, stdDev, min, max, and 95% confidence interval. + */ +export const calculateStats = (values: number[]): MeasurementStats => { + const sorted = [...values].sort((a, b) => a - b); + const avg = mean(values); + const sd = stdDev(values); + const ci = 1.96 * (sd / Math.sqrt(values.length)); + return { + median: median(values), + mean: avg, + stdDev: sd, + min: sorted[0], + max: sorted[sorted.length - 1], + confidenceInterval: ci, + }; +}; + +/** + * Checks current system state and returns warnings if conditions + * may produce unreliable benchmark results (high CPU load, high memory pressure). + */ +export const checkSystemState = (): string[] => { + const warnings: string[] = []; + const loadAvg = os.loadavg(); + const cpuCount = os.cpus().length; + if (loadAvg[0] > cpuCount * 0.7) { + warnings.push( + `High CPU load detected: ${loadAvg[0].toFixed(2)} (${cpuCount} CPUs). Results may be unreliable.` + ); + } + const freeMemGB = os.freemem() / 1024 ** 3; + const totalMemGB = os.totalmem() / 1024 ** 3; + const memUsagePercent = ((totalMemGB - freeMemGB) / totalMemGB) * 100; + if (memUsagePercent > 85) { + warnings.push( + `High memory usage: ${memUsagePercent.toFixed(1)}% (${freeMemGB.toFixed(1)}GB free of ${totalMemGB.toFixed(1)}GB).` + ); + } + return warnings; +}; diff --git a/test/performance/helpers/synthetic-data.ts b/test/performance/helpers/synthetic-data.ts new file mode 100644 index 0000000..5a26a1b --- /dev/null +++ b/test/performance/helpers/synthetic-data.ts @@ -0,0 +1,178 @@ +/** + * Synthetic data generators and mock request builders. + * + * These functions produce deterministic, configurable test data that isolates + * plugin processing from real CDS models and database queries. Used by + * performance benchmarks to control input size precisely. + */ + +// --------------------------------------------------------------------------- +// Entity generators +// --------------------------------------------------------------------------- + +/** + * Builds an array of synthetic CDS-like entity definitions. + * + * Each entity includes: + * - A UUID key element + * - `elementsPerEntity - 1` typed fields with varied annotations + * - One hidden element (`@HideFromDataInspector: true`) — should be filtered out + * - One association element — should be filtered out + * + * @param count - Number of entities to generate + * @param elementsPerEntity - Number of regular elements per entity (default 10) + */ +export function buildSyntheticEntities(count: number, elementsPerEntity: number = 10): any[] { + const entities: any[] = []; + for (let i = 0; i < count; i++) { + const elements: Record = {}; + + // Key element + elements[`id_${i}`] = { + type: "cds.UUID", + key: true, + "@HideFromDataInspector": false, + }; + + // Regular elements with varied types and annotations + for (let j = 1; j < elementsPerEntity; j++) { + elements[`field_${i}_${j}`] = { + type: j % 3 === 0 ? "cds.Integer" : j % 3 === 1 ? "cds.String" : "cds.Boolean", + key: false, + length: j % 3 === 1 ? 255 : undefined, + default: j % 5 === 0 ? { val: "default" } : undefined, + notNull: j % 4 === 0, + "@PersonalData.IsPotentiallySensitive": j % 7 === 0, + "@Core.Computed": j % 9 === 0, + "@HideFromDataInspector": false, + }; + } + + // Hidden element (filtered out by EntityDefinitionReader) + elements[`hidden_${i}`] = { + type: "cds.String", + "@HideFromDataInspector": true, + }; + + // Association element (filtered out by EntityDefinitionReader) + elements[`assoc_${i}`] = { + type: "cds.Association", + }; + + entities.push({ + name: `perf.test.Entity_${i}`, + "@title": i % 3 === 0 ? `Entity ${i} Title` : undefined, + "@HideFromDataInspector": false, + elements, + get dataSource4DataInspector() { + return i % 2 === 0 ? "db" : "service"; + }, + get keyElements4DataInspector() { + return [`id_${i}`]; + }, + }); + } + return entities; +} + +// --------------------------------------------------------------------------- +// Record generators +// --------------------------------------------------------------------------- + +/** + * Builds an array of synthetic database records for DataReader benchmarks. + * + * Each record contains an `id` field and `fieldsPerRecord - 1` typed fields. + * The returned array has a `$count` property set to `count` (simulating CDS query result). + * + * @param count - Number of records to generate + * @param fieldsPerRecord - Number of fields per record (default 10) + */ +export function buildSyntheticRecords(count: number, fieldsPerRecord: number = 10): any[] { + const records: any[] = []; + for (let i = 0; i < count; i++) { + const record: Record = { id: `uuid-${i}` }; + for (let j = 1; j < fieldsPerRecord; j++) { + record[`field_${j}`] = j % 3 === 0 ? i * j : j % 3 === 1 ? `value_${i}_${j}` : i % 2 === 0; + } + records.push(record); + } + (records as any).$count = count; + return records; +} + +// --------------------------------------------------------------------------- +// Mock request builders +// --------------------------------------------------------------------------- + +/** + * Creates a mock `cds.Request` for EntityDefinitionReader.read() — collection request. + * + * Simulates a GET with `$select=*` and optional OData query options. + * + * @param options.filter - OData $filter expression (e.g. `contains(name, 'Foo')`) + * @param options.orderby - OData $orderby expression (e.g. `name asc`) + * @param options.skip - OData $skip value + * @param options.top - OData $top value + */ +export function buildEntityDefinitionRequest(options?: { + filter?: string; + orderby?: string; + skip?: number; + top?: number; +}): any { + const columns = ["*"]; + return { + params: [], + query: { + SELECT: { + columns, + count: true, + orderBy: options?.orderby + ? [{ ref: [options.orderby.split(" ")[0]], sort: options.orderby.split(" ")[1] || "asc" }] + : undefined, + }, + }, + req: { + query: { + $filter: options?.filter, + $orderby: options?.orderby, + $skip: options?.skip !== undefined ? String(options.skip) : undefined, + $top: options?.top !== undefined ? String(options.top) : undefined, + }, + }, + reject: (code: number, msg: string) => { + throw new Error(`Request rejected: ${code} ${msg}`); + }, + }; +} + +/** + * Creates a mock `cds.Request` for DataReader.read() — data retrieval request. + * + * Simulates a GET filtered by entity name with `$select=*`. + * + * @param entityName - The entity name to filter on (e.g. `perf.test.Entity_0`) + */ +export function buildDataReadRequest(entityName: string): any { + const columns = ["*"]; + return { + params: [], + query: { + SELECT: { + columns, + count: true, + }, + }, + req: { + query: { + $filter: `entityName = '${entityName}'`, + $skip: "0", + $top: "1000", + }, + }, + reject: (code: number, msg: string) => { + throw new Error(`Request rejected: ${code} ${msg}`); + }, + }; +} diff --git a/test/performance/helpers/types.ts b/test/performance/helpers/types.ts new file mode 100644 index 0000000..05ea53c --- /dev/null +++ b/test/performance/helpers/types.ts @@ -0,0 +1,75 @@ +/** + * Type definitions for the performance testing infrastructure. + * + * These types define the shape of measurement results, baseline data, + * and the final performance report. + */ + +/** Descriptive statistics for a set of timing measurements. */ +export type MeasurementStats = { + median: number; + mean: number; + stdDev: number; + min: number; + max: number; + /** 95% confidence interval half-width (±value). */ + confidenceInterval: number; +}; + +/** Result of a single benchmark run across all input sizes. */ +export type BenchmarkResult = { + name: string; + sizes: number[]; + /** Median timing in ms for each size. */ + timingsMs: number[]; + /** Full statistics for each size. */ + timingStats: MeasurementStats[]; + /** Time per item (timingMs / size) for each size. */ + perItemMs: number[]; + /** Slope between consecutive size pairs (ms per additional item). */ + slopes: number[]; + /** Ratio of last slope to first slope. 1.0 = perfectly linear. */ + slopeRatio: number; + /** R² coefficient of determination for linear fit. 1.0 = perfect. */ + r2: number; + /** Heap memory delta in MB for each size. */ + memoryDeltaMB: number[]; +}; + +/** A single entry in the performance baseline file. */ +export type BaselineEntry = { + sizes: number[]; + perItemMsAtMax: number; + slopeRatio: number; + r2?: number; +}; + +/** The full baseline file: benchmark name → baseline entry. */ +export type BaselineData = Record; + +/** Test configuration summary for the report. */ +export type TestConfig = { + warmupRuns: number; + measurementRuns: number; + outlierTrimPercent: number; + totalRunsPerSize: number; +}; + +/** The complete performance report written to disk after a run. */ +export type Report = { + timestamp: string; + sizes: number[]; + results: BenchmarkResult[]; + baseline?: BaselineData; + regressionThreshold: number; + slopeVarianceThreshold: number; + testConfig: TestConfig; + environment: { + node: string; + platform: string; + cpus: string; + totalMemoryGB: number; + cpuLoad: number[]; + }; + systemWarnings: string[]; +}; From c79b47e3836611ef7d3098aff8456d1dfc382b8b Mon Sep 17 00:00:00 2001 From: Malem Date: Mon, 30 Mar 2026 16:16:49 +0530 Subject: [PATCH 3/5] performance workflows added --- .github/workflows/performance-rebaseline.yml | 59 ++++++++++++++++++++ .github/workflows/performance-tests.yml | 40 +++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 .github/workflows/performance-rebaseline.yml create mode 100644 .github/workflows/performance-tests.yml diff --git a/.github/workflows/performance-rebaseline.yml b/.github/workflows/performance-rebaseline.yml new file mode 100644 index 0000000..749d867 --- /dev/null +++ b/.github/workflows/performance-rebaseline.yml @@ -0,0 +1,59 @@ +name: Update CI Performance Baseline + +on: + workflow_dispatch: + inputs: + reason: + description: "Reason for re-baselining" + required: true + type: string + +permissions: + contents: write + +jobs: + rebaseline: + runs-on: ubuntu-latest + timeout-minutes: 15 + + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: 22 + cache: npm + + - name: Install dependencies + run: npm ci + + - name: Run performance tests (update baseline) + run: npm run test:performance:update-baseline + env: + CI: true + PERF_TESTS: "1" + PERF_UPDATE_BASELINE: "1" + PERF_BASELINE_FILE: "performance-baseline.ci.json" + + - name: Commit CI baseline + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add test/performance/performance-baseline.ci.json + git commit -m "chore: update CI performance baseline + + Reason: ${{ github.event.inputs.reason }} + Triggered by: @${{ github.actor }} + Workflow run: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + git push + + - name: Summary + run: | + echo "## Performance Baseline Updated" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "**Reason:** ${{ github.event.inputs.reason }}" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "The CI baseline has been committed to \`test/performance/performance-baseline.ci.json\`." >> $GITHUB_STEP_SUMMARY + echo "Future performance test runs will compare against this baseline." >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + cat coverage/performance-report.md >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/performance-tests.yml b/.github/workflows/performance-tests.yml new file mode 100644 index 0000000..a80d10f --- /dev/null +++ b/.github/workflows/performance-tests.yml @@ -0,0 +1,40 @@ +name: Performance Tests + +on: + pull_request: + branches: [main] + paths: + - "srv/**" + - "lib/**" + - "test/performance/**" + - "package.json" + workflow_dispatch: + +concurrency: + group: performance-${{ github.ref }} + cancel-in-progress: true + +jobs: + performance: + runs-on: ubuntu-latest + timeout-minutes: 15 + + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: 22 + cache: npm + + - name: Install dependencies + run: npm ci + + - name: Run performance tests + run: npm run test:performance + env: + CI: true + PERF_TESTS: "1" + PERF_BASELINE_FILE: "performance-baseline.ci.json" + PERF_MAX_REGRESSION: "0.3" + PERF_MAX_SLOPE_VARIANCE: "0.3" From 8049f2e386abb1d96f5c0a6c846cbb4e3dd532b1 Mon Sep 17 00:00:00 2001 From: titanh3art <18174614+titanh3art@users.noreply.github.com> Date: Wed, 1 Apr 2026 15:58:58 +0530 Subject: [PATCH 4/5] standards update --- .../PERFORMANCE-TESTING-STRATEGY.md | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/test/performance/PERFORMANCE-TESTING-STRATEGY.md b/test/performance/PERFORMANCE-TESTING-STRATEGY.md index 9bdc1a0..7255402 100644 --- a/test/performance/PERFORMANCE-TESTING-STRATEGY.md +++ b/test/performance/PERFORMANCE-TESTING-STRATEGY.md @@ -10,21 +10,24 @@ This document describes the performance testing strategy for the `@cap-js/data-i ## 2. Product Standards Coverage -This testing strategy addresses the following SAP Performance Product Standards: +The SAP Performance Product Standards comprise 7 requirements: PERF-01, PERF-03, PERF-04, PERF-11, PERF-13, PERF-20, PERF-21. This section maps each standard to how it is addressed (or why it is not applicable) for this plugin. -| Standard | Title | How Addressed | -| ----------- | ----------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| **PERF-01** | Prohibit quadratic or worse scaling | Slope-ratio analysis across 5 input sizes (10→1000) detects O(n²) growth patterns. R² coefficient verifies linearity. | -| **PERF-03** | Monitor for performance regressions | Baseline comparison with configurable regression threshold (default 30%). CI workflow runs on every PR. | -| **PERF-05** | Avoid hidden allocations | Memory delta tracking (heap usage before/after) per benchmark identifies unexpected allocation growth. | -| **PERF-11** | Use caching where appropriate | Caching effectiveness is indirectly validated via EntityDefinitionReader benchmarks — repeated entity reads exercise the WeakMap cache in CsnRuntimeExtensions; per-item cost should remain flat. | +### Standards addressed by this testing strategy -### Standards not applicable to first release +| Standard | Title | How Addressed | +| ----------- | --------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **PERF-01** | A performance test strategy shall be in place | **This document itself** fulfils PERF-01. It defines the test environment (local Node.js with synthetic data, CI via GitHub Actions), test data (synthetic CDS models and records), test cases (5 benchmarks across scaling sizes), test types (unit-level regression and scaling tests), test tools (Mocha + custom measurement helpers), and test results (JSON + Markdown reports in `coverage/`). | +| **PERF-04** | Competitive average and maximum throughput or end-to-end response time for a UIS shall be planned, recorded, and verified | Per-item processing time is recorded at each input size and compared against baseline targets. As a plugin (not a standalone UI), we measure **server-side processing time** contributed by the plugin's handlers. The benchmarks record median, mean, CI, and per-item cost, providing the "planned, recorded, and verified" data required by PERF-04. | +| **PERF-11** | As long as the functionality remains identical there shall be no regression of the resource consumption for subsequent deliveries | Baseline comparison with configurable regression threshold (default 30%) for both per-item time and slope ratio. CI workflow runs on every PR. Memory delta (heap usage before/after) is tracked per benchmark to detect resource consumption regressions. Baseline drift detection script analyses git history of CI baselines to catch gradual degradation. | +| **PERF-13** | Enable throughput and response time optimization by utilizing available resources for scale up and scale out | Slope-ratio analysis across 5 input sizes (10→1000) verifies that processing scales linearly — i.e., CPU and memory consumption grow at most linearly with input size (O(n)). R² coefficient of determination confirms linearity. This proves the plugin's processing will not become a bottleneck as CDS models grow, supporting the scalability requirement. | -| Standard | Title | Reason | -| ----------- | ------------------------------- | ------------------------------------------------------------------------------------------------------- | -| **PERF-07** | Network round-trip optimization | Plugin does not make outbound network calls; DB access is delegated to the CAP runtime. | -| **PERF-09** | Concurrent request handling | As a CDS service handler plugin, concurrency is managed by the CAP Node.js runtime, not by this plugin. | +### Standards not applicable to this plugin (first release) + +| Standard | Title | Reason | +| ----------- | ----------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **PERF-03** | An application or service shall be cost- and resource-consumption aware; a procedure for capacity modelling/sizing shall be defined | As a CDS plugin, `data-inspector` does not provision or consume cloud infrastructure independently. It runs within the host CAP application's process. Capacity modelling and sizing are the responsibility of the host application. The plugin's own resource footprint is characterised by the benchmarks (per-item time, memory delta), but a standalone sizing procedure is not applicable. | +| **PERF-20** | Enforce and support quota management | The plugin does not manage tenants, users, or request quotas. It runs as an in-process CDS service handler. Quota management (rate limiting, resource limits) is the responsibility of the host CAP application and the cloud platform (e.g., BTP). The plugin does not make independent outbound network calls that would require rate limiting. | +| **PERF-21** | Enable elastic scale out/in based on demand levels, in and among clouds | Elastic scalability is an infrastructure and platform concern. The plugin is stateless and runs within the host application's Node.js process. It does not manage instances, scaling rules, or cloud resources. Horizontal/vertical scaling is handled by the CAP runtime and the deployment platform. | ## 3. Architecture @@ -209,7 +212,7 @@ The `check-baseline-drift.js` script detects **gradual performance degradation** As the plugin evolves, consider adding: -1. **Memory profiling benchmarks** — Track heap growth across repeated operations to detect memory leaks (PERF-05 deeper coverage) +1. **Memory profiling benchmarks** — Track heap growth across repeated operations to detect memory leaks (deeper PERF-11 resource consumption coverage) 2. **Concurrent simulation** — If the plugin adds stateful processing, add benchmarks that simulate concurrent request patterns 3. **Larger scale tests** — Extend the sizes array to [100, 500, 1000, 5000, 10000] if real-world deployments involve very large CDS models 4. **UI rendering benchmarks** — If the SAPUI5 frontend becomes a performance concern, add browser-based benchmarks using Puppeteer From 1ce9e044e686e15e0b625f995ac5dfa80c52fb2f Mon Sep 17 00:00:00 2001 From: titanh3art <18174614+titanh3art@users.noreply.github.com> Date: Wed, 1 Apr 2026 16:42:59 +0530 Subject: [PATCH 5/5] refinement --- .../PERFORMANCE-TESTING-STRATEGY.md | 49 ++++++------------- 1 file changed, 14 insertions(+), 35 deletions(-) diff --git a/test/performance/PERFORMANCE-TESTING-STRATEGY.md b/test/performance/PERFORMANCE-TESTING-STRATEGY.md index 7255402..ab82c2c 100644 --- a/test/performance/PERFORMANCE-TESTING-STRATEGY.md +++ b/test/performance/PERFORMANCE-TESTING-STRATEGY.md @@ -8,30 +8,9 @@ This document describes the performance testing strategy for the `@cap-js/data-i `data-inspector` is a CDS plugin that is consumed by host CAP applications. End-to-end latency depends heavily on the host application's database, network, and authentication stack — none of which are under this plugin's control. Testing at the class/method level isolates the plugin's own computational work and produces **stable, reproducible, CI-friendly** measurements. -## 2. Product Standards Coverage +## 2. Architecture -The SAP Performance Product Standards comprise 7 requirements: PERF-01, PERF-03, PERF-04, PERF-11, PERF-13, PERF-20, PERF-21. This section maps each standard to how it is addressed (or why it is not applicable) for this plugin. - -### Standards addressed by this testing strategy - -| Standard | Title | How Addressed | -| ----------- | --------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| **PERF-01** | A performance test strategy shall be in place | **This document itself** fulfils PERF-01. It defines the test environment (local Node.js with synthetic data, CI via GitHub Actions), test data (synthetic CDS models and records), test cases (5 benchmarks across scaling sizes), test types (unit-level regression and scaling tests), test tools (Mocha + custom measurement helpers), and test results (JSON + Markdown reports in `coverage/`). | -| **PERF-04** | Competitive average and maximum throughput or end-to-end response time for a UIS shall be planned, recorded, and verified | Per-item processing time is recorded at each input size and compared against baseline targets. As a plugin (not a standalone UI), we measure **server-side processing time** contributed by the plugin's handlers. The benchmarks record median, mean, CI, and per-item cost, providing the "planned, recorded, and verified" data required by PERF-04. | -| **PERF-11** | As long as the functionality remains identical there shall be no regression of the resource consumption for subsequent deliveries | Baseline comparison with configurable regression threshold (default 30%) for both per-item time and slope ratio. CI workflow runs on every PR. Memory delta (heap usage before/after) is tracked per benchmark to detect resource consumption regressions. Baseline drift detection script analyses git history of CI baselines to catch gradual degradation. | -| **PERF-13** | Enable throughput and response time optimization by utilizing available resources for scale up and scale out | Slope-ratio analysis across 5 input sizes (10→1000) verifies that processing scales linearly — i.e., CPU and memory consumption grow at most linearly with input size (O(n)). R² coefficient of determination confirms linearity. This proves the plugin's processing will not become a bottleneck as CDS models grow, supporting the scalability requirement. | - -### Standards not applicable to this plugin (first release) - -| Standard | Title | Reason | -| ----------- | ----------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| **PERF-03** | An application or service shall be cost- and resource-consumption aware; a procedure for capacity modelling/sizing shall be defined | As a CDS plugin, `data-inspector` does not provision or consume cloud infrastructure independently. It runs within the host CAP application's process. Capacity modelling and sizing are the responsibility of the host application. The plugin's own resource footprint is characterised by the benchmarks (per-item time, memory delta), but a standalone sizing procedure is not applicable. | -| **PERF-20** | Enforce and support quota management | The plugin does not manage tenants, users, or request quotas. It runs as an in-process CDS service handler. Quota management (rate limiting, resource limits) is the responsibility of the host CAP application and the cloud platform (e.g., BTP). The plugin does not make independent outbound network calls that would require rate limiting. | -| **PERF-21** | Enable elastic scale out/in based on demand levels, in and among clouds | Elastic scalability is an infrastructure and platform concern. The plugin is stateless and runs within the host application's Node.js process. It does not manage instances, scaling rules, or cloud resources. Horizontal/vertical scaling is handled by the CAP runtime and the deployment platform. | - -## 3. Architecture - -### 3.1 Test location +### 2.1 Test location ``` test/performance/ @@ -43,7 +22,7 @@ test/performance/ └── PERFORMANCE-TESTING-STRATEGY.md # This file ``` -### 3.2 What is benchmarked +### 2.2 What is benchmarked | Group | Benchmark | What it measures | | ------ | ------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------- | @@ -53,7 +32,7 @@ test/performance/ | **B1** | `DataReader.read (response construction, DB stubbed)` | Response loop after DB query: entity resolution, key construction, record transformation. DB returns pre-built synthetic records. | | **B2** | `DataReader._emitAuditlogs (stubbed audit-log)` | Audit log emission with sensitive data fields. Audit-log service is stubbed; measures per-record processing overhead. | -### 3.3 Measurement methodology +### 2.3 Measurement methodology For each benchmark, measurements are taken across 5 input sizes: **10, 50, 100, 500, 1000**. @@ -63,7 +42,7 @@ For each size: 3. **Outlier removal** — Runs are sorted by distance from preliminary mean; the 50% extra runs furthest from the mean are discarded 4. **Statistics** — Median, mean, standard deviation, 95% confidence interval, CV% -### 3.4 Scaling analysis +### 2.4 Scaling analysis Three complementary metrics detect non-linear scaling: @@ -73,7 +52,7 @@ Three complementary metrics detect non-linear scaling: | **R² (coefficient of determination)** | How well medians fit a straight line. 1.0 = perfect linear | 🟢 ≥ 0.995 / 🟡 0.98–0.995 / 🔴 < 0.98 | | **Per-item time** | Time per item at max size; detects absolute overhead | Compared to baseline (30% tolerance) | -### 3.5 Baseline management +### 2.5 Baseline management Two baselines are maintained, following the same pattern as `ai-log-analyzer`: @@ -88,7 +67,7 @@ Two baselines are maintained, following the same pattern as `ai-log-analyzer`: - The CI baseline is **committed** so it is reproducible, auditable via `git log`, and immune to cache eviction - The first run without a baseline gracefully skips (no failure) -### 3.6 Regression detection +### 2.6 Regression detection When a baseline exists, each benchmark result is compared: @@ -97,7 +76,7 @@ When a baseline exists, each benchmark result is compared: #### Warn-only behavior (by design) -Regressions are surfaced via `console.warn` — **they do not fail the test**. The test only fails if no benchmarks run at all. This is intentional and consistent with [ai-log-analyzer](https://github.tools.sap/erp4sme/ai-log-analyzer)'s approach, for the following reasons: +Regressions are surfaced via `console.warn` — **they do not fail the test**. The test only fails if no benchmarks run at all. This is intentional for the following reasons: - **CI hardware variance**: GitHub Actions shared runners have noisy neighbors, variable CPU clock speeds, and occasional GC pauses. Even with a 30% threshold and outlier trimming, hard failures would produce flaky CI. - **Primary value is scaling detection**: The slope ratio and R² metrics detect O(n²) bugs, which produce dramatic regressions (10x+). These are obvious even in warn-only mode. @@ -112,7 +91,7 @@ If a hard gate is desired in the future: 2. Consider increasing the threshold to 50% for CI to absorb more noise 3. Alternatively, add a separate CI job with `continue-on-error: true` so it shows as a yellow check (not a red X) — signaling "review needed" without blocking merge -## 4. Running the Tests +## 3. Running the Tests ### Local development @@ -147,7 +126,7 @@ npm run test:performance:check-drift | `performance-tests.yml` | PR to `main` (when srv/, lib/, test/performance/ change) | Run benchmarks, compare to committed CI baseline, log warnings | | `performance-rebaseline.yml` | Manual dispatch | Run benchmarks on CI and commit `performance-baseline.ci.json` | -## 5. Reports +## 4. Reports After each run, two report files are generated in `coverage/`: @@ -161,7 +140,7 @@ The markdown report includes: - Results table with timing medians, CV%, per-item times, memory deltas, slope ratios, R², and baseline comparisons - Legend explaining all indicators -## 6. Synthetic Data Design +## 5. Synthetic Data Design All benchmarks use **synthetic data** rather than real CDS models: @@ -175,7 +154,7 @@ This approach ensures: - Configurable scaling (the `sizes` array can be adjusted) - Fast execution (no CDS server boot required) -## 7. Baseline Drift Detection +## 6. Baseline Drift Detection The `check-baseline-drift.js` script detects **gradual performance degradation** that no single run would catch. It reads the git history of `performance-baseline.ci.json` and analyzes how `perItemMsAtMax` values have changed across commits. @@ -208,11 +187,11 @@ The `check-baseline-drift.js` script detects **gradual performance degradation** - As part of periodic performance health checks - Before major releases, to verify no gradual cost drift has occurred -## 8. Future Enhancements +## 7. Future Enhancements As the plugin evolves, consider adding: -1. **Memory profiling benchmarks** — Track heap growth across repeated operations to detect memory leaks (deeper PERF-11 resource consumption coverage) +1. **Memory profiling benchmarks** — Track heap growth across repeated operations to detect memory leaks and unexpected resource consumption growth 2. **Concurrent simulation** — If the plugin adds stateful processing, add benchmarks that simulate concurrent request patterns 3. **Larger scale tests** — Extend the sizes array to [100, 500, 1000, 5000, 10000] if real-world deployments involve very large CDS models 4. **UI rendering benchmarks** — If the SAPUI5 frontend becomes a performance concern, add browser-based benchmarks using Puppeteer