diff --git a/.github/workflows/performance-rebaseline.yml b/.github/workflows/performance-rebaseline.yml
new file mode 100644
index 0000000..749d867
--- /dev/null
+++ b/.github/workflows/performance-rebaseline.yml
@@ -0,0 +1,59 @@
+name: Update CI Performance Baseline
+
+on:
+  workflow_dispatch:
+    inputs:
+      reason:
+        description: "Reason for re-baselining"
+        required: true
+        type: string
+
+permissions:
+  contents: write
+
+jobs:
+  rebaseline:
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: npm
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Run performance tests (update baseline)
+        run: npm run test:performance:update-baseline
+        env:
+          CI: true
+          PERF_TESTS: "1"
+          PERF_UPDATE_BASELINE: "1"
+          PERF_BASELINE_FILE: "performance-baseline.ci.json"
+
+      - name: Commit CI baseline
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          git add test/performance/performance-baseline.ci.json
+          git commit -m "chore: update CI performance baseline
+
+          Reason: ${{ github.event.inputs.reason }}
+          Triggered by: @${{ github.actor }}
+          Workflow run: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+          git push
+
+      - name: Summary
+        run: |
+          echo "## Performance Baseline Updated" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "**Reason:** ${{ github.event.inputs.reason }}" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "The CI baseline has been committed to \`test/performance/performance-baseline.ci.json\`." >> $GITHUB_STEP_SUMMARY
+          echo "Future performance test runs will compare against this baseline." >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          cat coverage/performance-report.md >> $GITHUB_STEP_SUMMARY
diff --git a/.github/workflows/performance-tests.yml b/.github/workflows/performance-tests.yml
new file mode 100644
index 0000000..a80d10f
--- /dev/null
+++ b/.github/workflows/performance-tests.yml
@@ -0,0 +1,40 @@
+name: Performance Tests
+
+on:
+  pull_request:
+    branches: [main]
+    paths:
+      - "srv/**"
+      - "lib/**"
+      - "test/performance/**"
+      - "package.json"
+  workflow_dispatch:
+
+concurrency:
+  group: performance-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  performance:
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: npm
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Run performance tests
+        run: npm run test:performance
+        env:
+          CI: true
+          PERF_TESTS: "1"
+          PERF_BASELINE_FILE: "performance-baseline.ci.json"
+          PERF_MAX_REGRESSION: "0.3"
+          PERF_MAX_SLOPE_VARIANCE: "0.3"
diff --git a/.gitignore b/.gitignore
index e026a18..27cc349 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,4 +39,7 @@ mta_archives/
 # Tests
 coverage/
 TEST-mocha.xml
-.nyc_output/
\ No newline at end of file
+.nyc_output/
+
+# Performance baselines (machine-specific)
+test/performance/performance-baseline.json
diff --git a/.mocharc.json b/.mocharc.json
index 647522c..6306198 100644
--- a/.mocharc.json
+++ b/.mocharc.json
@@ -1,5 +1,6 @@
 {
   "spec": "test/**/*.test.ts",
+  "ignore": ["test/performance/**"],
   "recursive": true,
   "timeout": 600000,
   "require": ["ts-node/register/transpile-only"],
diff --git a/package.json b/package.json
index 38faeb3..97d2d46 100644
--- a/package.json
+++ b/package.json
@@ -23,6 +23,9 @@
     "tar": "npm run build && cd gen && npm pack",
     "watch-data-inspector-ui": "cds watch --open data-inspector-ui/webapp/index.html?sap-ui-xx-viewCache=false",
     "test": "cross-env CDS_TYPESCRIPT=true mocha",
+    "test:performance": "cross-env CDS_TYPESCRIPT=true PERF_TESTS=1 mocha --config test/performance/.mocharc.performance.json",
+    "test:performance:update-baseline": "cross-env CDS_TYPESCRIPT=true PERF_TESTS=1 PERF_UPDATE_BASELINE=1 mocha --config test/performance/.mocharc.performance.json",
+    "test:performance:check-drift": "node test/performance/check-baseline-drift.js",
     "coverage": "cross-env CDS_TYPESCRIPT=true c8 mocha"
   },
   "peerDependencies": {
diff --git a/test/performance/.mocharc.performance.json b/test/performance/.mocharc.performance.json
new file mode 100644
index 0000000..d0f5895
--- /dev/null
+++ b/test/performance/.mocharc.performance.json
@@ -0,0 +1,7 @@
+{
+  "spec": "test/performance/**/*.test.ts",
+  "recursive": true,
+  "timeout": 600000,
+  "require": ["ts-node/register/transpile-only"],
+  "exit": true
+}
diff --git a/test/performance/PERFORMANCE-TESTING-STRATEGY.md b/test/performance/PERFORMANCE-TESTING-STRATEGY.md
new file mode 100644
index 0000000..ab82c2c
--- /dev/null
+++ b/test/performance/PERFORMANCE-TESTING-STRATEGY.md
@@ -0,0 +1,616 @@
+# Performance Testing Strategy — @cap-js/data-inspector
+
+## 1. Overview
+
+This document describes the performance testing strategy for the `@cap-js/data-inspector` CAP plugin. The strategy focuses on **local processing benchmarks** — measuring the CPU/memory cost of in-process data transformations performed by the plugin's core classes, with external I/O (database, network) stubbed out.
+
+### Why not end-to-end?
+
+`data-inspector` is a CDS plugin that is consumed by host CAP applications. End-to-end latency depends heavily on the host application's database, network, and authentication stack — none of which are under this plugin's control. Testing at the class/method level isolates the plugin's own computational work and produces **stable, reproducible, CI-friendly** measurements.
+
+## 2. Architecture
+
+### 2.1 Test location
+
+```
+test/performance/
+├── .mocharc.performance.json       # Mocha config (perf tests only)
+├── ProcessingPerformance.test.ts   # All benchmarks
+├── check-baseline-drift.js         # Long-term drift detection across git history
+├── performance-baseline.json       # Local developer baseline (gitignored)
+├── performance-baseline.ci.json    # CI baseline (committed to repo)
+└── PERFORMANCE-TESTING-STRATEGY.md # This file
+```
+
+### 2.2 What is benchmarked
+
+| Group  | Benchmark                                              | What it measures                                                                                                                  |
+| ------ | ------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------- |
+| **A1** | `EntityDefinitionReader.read (collection)`             | Full collection read: iterate entities, build element metadata, filter hidden entities, paginate, sort, construct response        |
+| **A2** | `EntityDefinitionReader.read (filtered)`               | Same as A1 but with `$filter=contains(name, ...)` to measure filter parsing overhead                                              |
+| **A3** | `EntityDefinitionReader._getEntityElements (via read)` | Element extraction scaling: one entity with N elements (N = 10→1000)                                                              |
+| **B1** | `DataReader.read (response construction, DB stubbed)`  | Response loop after DB query: entity resolution, key construction, record transformation. DB returns pre-built synthetic records. |
+| **B2** | `DataReader._emitAuditlogs (stubbed audit-log)`        | Audit log emission with sensitive data fields. Audit-log service is stubbed; measures per-record processing overhead.             |
+
+### 2.3 Measurement methodology
+
+For each benchmark, measurements are taken across 5 input sizes: **10, 50, 100, 500, 1000**.
+
+For each size:
+1. **Warmup** — 10 runs (configurable) to stabilize JIT
+2. **Measurement** — 30 total runs (20 kept + 10 extra for outlier trimming)
+3. **Outlier removal** — Runs are sorted by distance from preliminary mean; the 50% extra runs furthest from the mean are discarded
+4. **Statistics** — Median, mean, standard deviation, 95% confidence interval, CV%
+
+### 2.4 Scaling analysis
+
+Three complementary metrics detect non-linear scaling:
+
+| Metric                                | What it detects                                            | Threshold                            |
+| ------------------------------------- | ---------------------------------------------------------- | ------------------------------------ |
+| **Slope ratio**                       | Ratio of last slope segment to first. O(n) = ~1.0          | 🟢 ≤ 2.0 / 🟡 2.0–4.0 / 🔴 > 4.0        |
+| **R² (coefficient of determination)** | How well medians fit a straight line. 1.0 = perfect linear | 🟢 ≥ 0.995 / 🟡 0.98–0.995 / 🔴 < 0.98  |
+| **Per-item time**                     | Time per item at max size; detects absolute overhead       | Compared to baseline (30% tolerance) |
+
+### 2.5 Baseline management
+
+Two baselines are maintained, following the same pattern as `ai-log-analyzer`:
+
+| File                           | Git status     | Purpose                                     |
+| ------------------------------ | -------------- | ------------------------------------------- |
+| `performance-baseline.json`    | **gitignored** | Local developer baseline (machine-specific) |
+| `performance-baseline.ci.json` | **committed**  | CI baseline (shared, versioned reference)   |
+
+- **Local**: Run `npm run test:performance:update-baseline` to create `performance-baseline.json` for your machine
+- **CI**: The rebaseline workflow (`performance-rebaseline.yml`) runs benchmarks on CI hardware and commits `performance-baseline.ci.json` back to the repo. The PR workflow reads this committed file via `PERF_BASELINE_FILE=performance-baseline.ci.json`
+- Local baselines are **machine-specific** (gitignored) because absolute timings vary by hardware
+- The CI baseline is **committed** so it is reproducible, auditable via `git log`, and immune to cache eviction
+- The first run without a baseline gracefully skips (no failure)
+
+### 2.6 Regression detection
+
+When a baseline exists, each benchmark result is compared:
+
+1. **Per-item time** at maximum size must not exceed `baseline × (1 + MAX_REGRESSION)` (default: +30%)
+2. **Slope ratio** must not exceed `baseline × (1 + MAX_SLOPE_VARIANCE)` (default: +30%)
+
+#### Warn-only behavior (by design)
+
+Regressions are surfaced via `console.warn` — **they do not fail the test**. The test only fails if no benchmarks run at all. This is intentional for the following reasons:
+
+- **CI hardware variance**: GitHub Actions shared runners have noisy neighbors, variable CPU clock speeds, and occasional GC pauses. Even with a 30% threshold and outlier trimming, hard failures would produce flaky CI.
+- **Primary value is scaling detection**: The slope ratio and R² metrics detect O(n²) bugs, which produce dramatic regressions (10x+). These are obvious even in warn-only mode.
+- **Per-item regression is informational**: Absolute timing depends on hardware; a 30% regression on CI may not reproduce locally.
+
+Warnings appear in the CI console output and in the performance report files (`coverage/performance-report.md`), so PR reviewers can investigate if they see them.
+
+#### Evolving to a hard gate (future)
+
+If a hard gate is desired in the future:
+1. Change `console.warn` to `expect` assertions in the regression checks
+2. Consider increasing the threshold to 50% for CI to absorb more noise
+3. Alternatively, add a separate CI job with `continue-on-error: true` so it shows as a yellow check (not a red X) — signaling "review needed" without blocking merge
+
+## 3. Running the Tests
+
+### Local development
+
+```bash
+# First time: create your machine's baseline
+npm run test:performance:update-baseline
+
+# Subsequent runs: compare against baseline
+npm run test:performance
+
+# Check for gradual drift across CI baseline git history
+npm run test:performance:check-drift
+```
+
+### Environment variables
+
+| Variable                    | Default                     | Description                                    |
+| --------------------------- | --------------------------- | ---------------------------------------------- |
+| `PERF_TESTS`                | `0`                         | Set to `1` to enable performance tests         |
+| `PERF_UPDATE_BASELINE`      | `0`                         | Set to `1` to write new baseline after run     |
+| `PERF_MAX_REGRESSION`       | `0.3`                       | Maximum allowed per-item time regression (30%) |
+| `PERF_MAX_SLOPE_VARIANCE`   | `0.3`                       | Maximum allowed slope ratio increase (30%)     |
+| `PERF_WARMUP_RUNS`          | `10`                        | Warmup iterations before measurement           |
+| `PERF_MEASUREMENT_RUNS`     | `20`                        | Measurement iterations (kept after trimming)   |
+| `PERF_OUTLIER_TRIM_PERCENT` | `0.5`                       | Extra runs as fraction of measurement runs     |
+| `PERF_BASELINE_FILE`        | `performance-baseline.json` | Baseline filename                              |
+
+### CI workflows
+
+| Workflow                     | Trigger                                                  | Purpose                                                        |
+| ---------------------------- | -------------------------------------------------------- | -------------------------------------------------------------- |
+| `performance-tests.yml`      | PR to `main` (when srv/, lib/, test/performance/ change) | Run benchmarks, compare to committed CI baseline, log warnings |
+| `performance-rebaseline.yml` | Manual dispatch                                          | Run benchmarks on CI and commit `performance-baseline.ci.json` |
+
+## 4. Reports
+
+After each run, two report files are generated in `coverage/`:
+
+- **`performance-report.json`** — Machine-readable full results
+- **`performance-report.md`** — Human-readable markdown with emoji indicators
+
+The markdown report includes:
+- Environment details (Node version, CPU, memory, load)
+- Test configuration (warmup, measurement, trim settings)
+- System warnings (high CPU load, memory pressure)
+- Results table with timing medians, CV%, per-item times, memory deltas, slope ratios, R², and baseline comparisons
+- Legend explaining all indicators
+
+## 5. Synthetic Data Design
+
+All benchmarks use **synthetic data** rather than real CDS models:
+
+- **Entities**: Generated with configurable element counts, including keys, typed fields, hidden elements, associations, and various annotations (`@HideFromDataInspector`, `@PersonalData.IsPotentiallySensitive`, `@Core.Computed`)
+- **Records**: Generated with configurable field counts, simulating realistic DB query results with UUIDs, strings, integers, and booleans
+- **CDS Runtime**: `cds.model.all()`, `cds.services.db.run()`, `cds.parse.expr()`, and `cds.connect.to()` are monkey-patched per benchmark to return synthetic data, isolating the plugin's processing from actual CDS bootstrapping
+
+This approach ensures:
+- No dependency on database state
+- Deterministic, reproducible inputs
+- Configurable scaling (the `sizes` array can be adjusted)
+- Fast execution (no CDS server boot required)
+
+## 6. Baseline Drift Detection
+
+The `check-baseline-drift.js` script detects **gradual performance degradation** that no single run would catch. It reads the git history of `performance-baseline.ci.json` and analyzes how `perItemMsAtMax` values have changed across commits.
+
+### What it detects
+
+| Condition                                               | Default Threshold                | Severity            |
+| ------------------------------------------------------- | -------------------------------- | ------------------- |
+| Total per-item cost increase across the examined window | 20% (`DRIFT_MAX_TOTAL_INCREASE`) | **FAIL**            |
+| Consecutive per-item cost increases                     | 3 (`DRIFT_CONSECUTIVE_WARN`)     | **WARN** (advisory) |
+
+### How it works
+
+1. Queries `git log` for commits that touched `performance-baseline.ci.json`
+2. Loads up to 10 historical snapshots (configurable via `DRIFT_WINDOW`)
+3. For each benchmark, computes total increase, consecutive-increase streak, and OLS trend slope
+4. Outputs a history table and per-benchmark analysis
+
+### Configuration (env vars)
+
+| Variable                   | Default                                         | Description                                   |
+| -------------------------- | ----------------------------------------------- | --------------------------------------------- |
+| `DRIFT_BASELINE_FILE`      | `test/performance/performance-baseline.ci.json` | Git path of the baseline file to inspect      |
+| `DRIFT_WINDOW`             | `10`                                            | Number of recent commits to examine           |
+| `DRIFT_MAX_TOTAL_INCREASE` | `0.20`                                          | Max allowed total increase (fraction)         |
+| `DRIFT_CONSECUTIVE_WARN`   | `3`                                             | Consecutive increases before advisory warning |
+
+### When to use
+
+- After accumulating 2+ CI baseline snapshots in git history (requires running the rebaseline workflow at least twice)
+- As part of periodic performance health checks
+- Before major releases, to verify no gradual cost drift has occurred
+
+## 7. Future Enhancements
+
+As the plugin evolves, consider adding:
+
+1. **Memory profiling benchmarks** — Track heap growth across repeated operations to detect memory leaks and unexpected resource consumption growth
+2. **Concurrent simulation** — If the plugin adds stateful processing, add benchmarks that simulate concurrent request patterns
+3. **Larger scale tests** — Extend the sizes array to [100, 500, 1000, 5000, 10000] if real-world deployments involve very large CDS models
+4. **UI rendering benchmarks** — If the SAPUI5 frontend becomes a performance concern, add browser-based benchmarks using Puppeteer
+
+---
+
+## Appendix: Performance Testing 101 — Concepts & KPIs Explained
+
+This appendix explains every statistical concept and KPI used in this testing strategy from first principles. If you've never done performance benchmarking before, start here.
+
+---
+
+### A.1 Why do we measure performance at all?
+
+Software can be "correct" (produces the right answer) yet still unusable if it's too slow. Performance testing answers two questions:
+
+1. **Does it scale?** — If the input doubles, does the time roughly double (good) or quadruple (bad)?
+2. **Did it get slower?** — Compared to last week's version, is the same operation taking longer?
+
+Question 1 is about **algorithmic complexity**. Question 2 is about **regression detection**.
+
+---
+
+### A.2 Big-O Notation
+
+Big-O describes how an algorithm's cost grows as input size *n* increases:
+
+| Notation       | Name       | Example                          | Doubling *n* does what?           |
+| -------------- | ---------- | -------------------------------- | --------------------------------- |
+| **O(1)**       | Constant   | Hash table lookup                | Time stays the same               |
+| **O(n)**       | Linear     | Scanning every item in a list    | Time doubles                      |
+| **O(n²)**      | Quadratic  | Nested loop over all pairs       | Time quadruples (4×)              |
+| **O(n³)**      | Cubic      | Triple nested loop               | Time increases 8×                 |
+| **O(n·log n)** | Log-linear | Good sort algorithms (mergesort) | Time roughly doubles (a bit more) |
+
+**Our goal**: every operation in data-inspector should be **O(n)** or better. If we accidentally introduce an O(n²) algorithm (e.g., a nested loop that compares every entity to every other entity), the benchmarks will catch it.
+
+---
+
+### A.3 Median vs. Mean — Which "average" to use?
+
+Both are measures of central tendency, but they behave differently with outliers:
+
+- **Mean** (arithmetic average): Sum all values, divide by count. Sensitive to outliers — one very slow run pulls the mean up dramatically.
+- **Median**: Sort all values, pick the middle one. Robust to outliers — even if one run was 100× slower, the median barely moves.
+
+**Why we use the median for benchmark reporting**: In benchmarking, you occasionally get "hiccup" runs where the garbage collector fires, the OS scheduler intervenes, or the CPU thermal-throttles. The median naturally ignores these glitches without requiring you to manually identify and remove them.
+
+We still report the mean (and use it internally for outlier detection), but the **median is the primary metric** in our results.
+
+---
+
+### A.4 Standard Deviation (σ) and Coefficient of Variation (CV%)
+
+Imagine you time a function 20 times and get these results (in ms):
+
+```
+Run 1: 5.1    Run 2: 4.9    Run 3: 5.0    Run 4: 5.2    Run 5: 5.0   ...
+```
+
+The **mean** is 5.04ms. But how *consistent* are these numbers? That's what standard deviation tells you.
+
+#### Standard Deviation (σ) — "How spread out are my measurements?"
+
+Think of σ as the "average distance from the mean." Here's the intuition:
+
+1. Take each measurement and ask: "How far is this from the mean?"
+   - Run 1: |5.1 - 5.04| = 0.06
+   - Run 2: |4.9 - 5.04| = 0.14
+   - Run 3: |5.0 - 5.04| = 0.04
+   - ...and so on for all 20 runs
+2. Square those distances (so negative and positive don't cancel out)
+3. Average the squared distances
+4. Take the square root (to get back to the original units — milliseconds)
+
+The result is σ. A small σ (say 0.08ms when the mean is 5ms) means your measurements are very consistent. A large σ (say 2.5ms when the mean is 5ms) means they're all over the place.
+
+#### Coefficient of Variation (CV%) — "Is that spread *relatively* big or small?"
+
+Here's the problem with σ alone: is σ = 2ms "good" or "bad"? It depends on context:
+
+- If the mean is **1000ms**, then σ = 2ms is tiny (0.2% of the mean) → very stable
+- If the mean is **5ms**, then σ = 2ms is huge (40% of the mean) → extremely noisy
+
+CV% solves this by expressing σ as a percentage of the mean:
+
+```
+CV% = (σ / mean) × 100
+```
+
+This lets you compare the stability of a 5ms benchmark to a 500ms benchmark on equal footing.
+
+**Real-world example from our tests**:
+- Benchmark A: mean = 0.04ms, σ = 0.008ms → CV = 20% 🔴 (noisy — the function is so fast that GC jitter dominates)
+- Benchmark B: mean = 3.85ms, σ = 0.12ms → CV = 3.1% 🟢 (stable — the function takes long enough that noise is negligible)
+
+**Our thresholds**:
+
+| CV%   | Indicator | Meaning                                                |
+| ----- | --------- | ------------------------------------------------------ |
+| ≤ 5%  | 🟢         | Stable — measurements are repeatable                   |
+| 5–15% | 🟡         | Acceptable for Node.js (GC pauses cause some variance) |
+| > 15% | 🔴         | High noise — consider more warmup or runs              |
+
+---
+
+### A.5 Confidence Interval (CI)
+
+Imagine you measured a function 20 times and got a median of 5.23ms. If you ran those 20 measurements again tomorrow, would you get exactly 5.23ms again? Probably not — maybe 5.18ms, or 5.31ms. The **confidence interval** tells you the range where the "true" value most likely lives.
+
+#### The analogy
+
+Think of it like measuring your height with a wobbly ruler. You measure yourself 5 times and get: 175.2cm, 174.8cm, 175.1cm, 175.5cm, 174.9cm. You're probably not exactly 175.1cm tall, but you're pretty confident you're somewhere between 174.8cm and 175.5cm. That range is your confidence interval.
+
+#### The math (simplified)
+
+```
+CI = ±1.96 × (σ / √n)
+```
+
+Breaking this down:
+- **σ** = standard deviation (how noisy your measurements are — see A.4)
+- **√n** = square root of the number of runs (more runs = narrower interval, because more data = more certainty)
+- **1.96** = a magic number from statistics that gives you 95% confidence (you can think of it as "about 2")
+
+So the formula says: *"Take the noise level (σ), shrink it by how many measurements you took (√n), and multiply by ~2."*
+
+#### A worked example
+
+- You measured 20 runs. Median = 5.23ms. σ = 0.22ms.
+- CI = ±1.96 × (0.22 / √20) = ±1.96 × (0.22 / 4.47) = ±1.96 × 0.049 = **±0.097ms**
+- So we report: **5.23ms ±0.10ms**
+- Meaning: "We're 95% confident the true typical time is between 5.13ms and 5.33ms."
+
+#### Why it matters for us
+
+When comparing two benchmark results (e.g., before vs. after a code change), if their confidence intervals overlap, the difference is probably just measurement noise — not a real performance change. For example:
+- Before: 5.23ms ±0.10ms → range [5.13, 5.33]
+- After:  5.28ms ±0.12ms → range [5.16, 5.40]
+- The ranges overlap heavily → **no meaningful difference** (don't panic!)
+
+But if:
+- Before: 5.23ms ±0.10ms → range [5.13, 5.33]
+- After:  6.80ms ±0.15ms → range [6.65, 6.95]
+- No overlap at all → **real regression** (investigate!)
+
+---
+
+### A.6 Outlier Trimming
+
+Raw benchmark timings often contain outliers — unusually slow (or fast) runs caused by GC pauses, OS scheduling, background processes, etc.
+
+**Our approach** (mean-distance trimming):
+1. Run 30 iterations (20 to keep + 10 extra)
+2. Compute the preliminary mean of all 30
+3. For each run, compute its distance from the mean
+4. Sort by distance (closest to mean first)
+5. Keep the 20 closest; discard the 10 furthest
+
+This is more nuanced than simple "remove top/bottom 10%" trimming, because it removes outliers on *both* ends that are far from the central tendency, regardless of which direction they're in.
+
+---
+
+### A.7 Warmup Runs
+
+JavaScript engines (V8 in Node.js) use **Just-In-Time (JIT) compilation**. The first few calls to a function are interpreted (slow), then V8 compiles them to optimized machine code (fast). This process is called "warming up."
+
+If you measure the first 5 runs, you're measuring the interpreter, not the optimized code that will run in production. That's why we run 10 warmup iterations (discarded) before starting measurements.
+
+**Think of it like warming up a car engine** — you don't measure fuel efficiency during the first 30 seconds after a cold start.
+
+---
+
+### A.8 Slope and Slope Ratio
+
+These are the core metrics for detecting whether an algorithm is O(n) or worse. The key idea is surprisingly simple: **if adding more items always costs the same amount of extra time, the algorithm is linear. If adding more items costs *increasingly* more time, it's not.**
+
+#### Slope — "How much extra time does each additional item cost?"
+
+Imagine you're timing a function with different input sizes and you get:
+
+```
+Size  10 → took  1ms
+Size  50 → took  5ms
+Size 100 → took 10ms
+Size 500 → took 50ms
+Size 1000 → took 100ms
+```
+
+The **slope** between any two points is the "price per additional item":
+
+```
+slope = (time₂ - time₁) / (size₂ - size₁)
+```
+
+For the data above:
+- Between size 10→50: slope = (5 - 1) / (50 - 10) = 4 / 40 = **0.1ms per item**
+- Between size 500→1000: slope = (100 - 50) / (1000 - 500) = 50 / 500 = **0.1ms per item**
+
+The slope is the same! Each additional item always costs 0.1ms, regardless of whether you have 10 items or 1000. This is classic **O(n) linear** behavior.
+
+Now imagine a *bad* function:
+
+```
+Size  10 → took   1ms
+Size  50 → took   5ms
+Size 100 → took  20ms
+Size 500 → took 250ms
+Size 1000 → took 1000ms
+```
+
+- Between size 10→50: slope = (5 - 1) / 40 = **0.1ms per item**
+- Between size 500→1000: slope = (1000 - 250) / 500 = **1.5ms per item**
+
+The slope grew 15× ! Adding items at large scale is much more expensive than at small scale. This screams **O(n²)**.
+
+#### Slope Ratio — "Did the slope stay the same or grow?"
+
+Instead of eyeballing slopes, we compute a single number:
+
+```
+slope_ratio = last_slope / first_slope
+```
+
+Using the examples above:
+- Good function: 0.1 / 0.1 = **1.0** (perfect — the cost per item never changed)
+- Bad function: 1.5 / 0.1 = **15.0** (terrible — the cost per item grew 15×)
+
+**Think of it like a road trip**: If driving the first 100km takes 1 hour, and the last 100km also takes 1 hour, the "slope" (time per km) is constant — that's a straight highway (linear). If the last 100km takes 5 hours, the road got progressively worse — that's like a quadratic algorithm bogging down as data grows.
+
+**Interpretation**:
+
+| Slope ratio | What it means                                          | Big-O           |
+| ----------- | ------------------------------------------------------ | --------------- |
+| ~1.0        | Each additional item costs the same regardless of size | **O(n)**        |
+| ~2.0        | Cost per item roughly doubles at larger scale          | **~O(n·log n)** |
+| ~4.0+       | Cost per item grows dramatically — likely quadratic    | **O(n²)**       |
+| ~10.0+      | Severe super-linear scaling                            | **O(n²)+**      |
+
+**Our thresholds**:
+
+| Range   | Indicator | Assessment                                       |
+| ------- | --------- | ------------------------------------------------ |
+| ≤ 2.0   | 🟢         | Consistent with O(n) linear scaling              |
+| 2.0–4.0 | 🟡         | Suspicious — investigate for hidden nested loops |
+| > 4.0   | 🔴         | Clearly non-linear (O(n²) or worse)              |
+
+---
+
+### A.9 R² — Coefficient of Determination
+
+R² answers a simple question: **"If I draw the best possible straight line through my data, how well does it fit?"**
+
+#### The school analogy
+
+Imagine you're a teacher plotting students' study hours (x-axis) vs. exam scores (y-axis). If every student who studied twice as long scored exactly twice as high, all the dots would fall on a perfect straight line — R² = 1.0.
+
+In reality, some students score higher or lower than the line predicts. R² tells you what fraction of the pattern is explained by the straight line vs. what fraction is "random scatter."
+
+#### Visually
+
+```
+R² ≈ 1.0 (linear)          R² ≈ 0.7 (curved/noisy)
+
+Time ↑                      Time ↑
+     |          •                |            •
+     |        •                  |        •
+     |      •                    |    •
+     |    •                      |          •
+     |  •                        |  •
+     +----------→ Size           +----------→ Size
+     Points hug the line         Points curve away from the line
+```
+
+#### How it works (no math degree needed)
+
+1. **Draw the best straight line** through your 5 data points (the computer finds the line that minimizes the total distance from all points)
+2. **Measure the "misses"**: For each point, how far is it from the line? Square those distances and add them up. Call this **"unexplained scatter."**
+3. **Measure the "baseline scatter"**: How far is each point from the simple average (a flat horizontal line)? Square and sum. Call this **"total scatter."**
+4. **Compute R²**:
+
+```
+R² = 1 - (unexplained scatter / total scatter)
+```
+
+- If the line explains everything → unexplained scatter = 0 → R² = 1.0
+- If the line explains nothing (data is random) → unexplained = total → R² = 0.0
+
+#### What R² values mean for our benchmarks
+
+| R²         | Meaning                                                                     |
+| ---------- | --------------------------------------------------------------------------- |
+| 1.000      | All points fall exactly on a straight line — perfectly linear               |
+| 0.995+     | Excellent linear fit — minor measurement noise only                         |
+| 0.98–0.995 | Mostly linear with some deviation — could be noise or mild non-linearity    |
+| < 0.98     | Clearly not linear — the relationship curves (quadratic, exponential, etc.) |
+
+#### Why do we need BOTH slope ratio and R²?
+
+They catch **different types of problems**:
+
+**Slope ratio** only looks at the first and last segments — like checking the start and end of a road trip. **R²** looks at every point along the way.
+
+Consider this scenario:
+```
+Size:  10   50   100   500   1000
+Time:  1ms  5ms  30ms  50ms  100ms
+```
+
+- Slope ratio = (100-50)/(1000-500) ÷ (5-1)/(50-10) = 0.1 / 0.1 = **1.0** → looks perfect!
+- But R² = **0.93** → wait, something's off!
+
+What happened? The function has a "hump" at size 100 (30ms is way above the straight line). The slope ratio missed it because it only compared the endpoints, but R² caught it because it checks every point.
+
+That's why we use both: **slope ratio catches endpoint divergence, R² catches mid-range curvature.**
+
+---
+
+### A.10 Per-Item Time
+
+This is the simplest metric — just divide total time by input size:
+
+```
+per_item_ms = median_time_ms / size
+```
+
+For a truly O(n) algorithm, per-item time should be roughly constant regardless of size. If per-item time grows with size, you have a scaling problem.
+
+**Per-item time at max size** (the value stored in the baseline) is the most important data point because it amplifies any scaling issues. At size 10, even an O(n²) algorithm might only add 0.001ms overhead. At size 1000, that same O(n²) adds 1.0ms — visible and measurable.
+
+---
+
+### A.11 Memory Delta (Heap ΔMB)
+
+We measure `process.memoryUsage().heapUsed` before and after each benchmark:
+
+```
+ΔMB = (heapAfter - heapBefore) / (1024 × 1024)
+```
+
+This catches:
+- **Hidden allocations** — Creating intermediate arrays, string concatenations, or object copies that scale with input size
+- **Memory leaks** — Objects that survive garbage collection because they're accidentally retained
+
+**Note**: JavaScript GC is non-deterministic, so memory deltas are noisier than timing measurements. They're included as an advisory signal, not a hard gate.
+
+---
+
+### A.12 Baseline and Regression Detection
+
+A **baseline** is a snapshot of your benchmark results at a known-good point in time. It records, for each benchmark:
+- `perItemMsAtMax` — per-item time at maximum size
+- `slopeRatio` — scaling behavior
+- `r2` — linearity score
+
+**Regression detection** compares current results to the baseline:
+
+```
+allowed = baseline_value × (1 + threshold)
+
+# Example with 30% threshold:
+# If baseline per-item time = 0.005ms
+# allowed = 0.005 × 1.30 = 0.0065ms
+# If current = 0.007ms → REGRESSION WARNING
+```
+
+**Why 30% threshold?** Benchmark noise on shared CI runners (GitHub Actions) typically causes 5–15% variance. A 30% threshold means only genuine code-level regressions trigger warnings, not hardware noise.
+
+---
+
+### A.13 Putting It All Together — Reading a Result Row
+
+Here's how to read a line from the performance report:
+
+```
+| EntityDefReader.read | 0.05, 0.19, 0.38, 1.92, 3.85 | 4.2% 🟢 | 0.0050, 0.0038, 0.0038, 0.0038, 0.0039 | 0.12, 0.15, 0.18, 0.22, 0.25 | 1.0234 🟢 | 0.9998 🟢 | 0.0040 | 1.0100 | 0.9995 |
+```
+
+Reading left to right:
+1. **Timings** [0.05→3.85ms]: Time grows ~77× as input grows 100× → slightly sub-linear (good)
+2. **CV% 4.2% 🟢**: Low variance — stable measurements
+3. **Per-item** [0.005→0.0039ms]: Cost per item stays flat → O(n) confirmed
+4. **Memory** [0.12→0.25MB]: Slight growth — proportional to input (expected)
+5. **Slope ratio 1.0234 🟢**: Almost exactly 1.0 → perfectly linear
+6. **R² 0.9998 🟢**: Nearly perfect straight line
+7. **Baseline columns**: Previous per-item=0.004ms, slope=1.01, R²=0.9995 — no regression
+
+**Verdict**: This benchmark is healthy — linear scaling, stable measurements, no regression.
+
+---
+
+### A.14 Quick Reference: All Emoji Indicators
+
+| Metric      | 🟢 Good  | 🟡 Watch       | 🔴 Problem |
+| ----------- | ------- | ------------- | --------- |
+| Slope ratio | ≤ 2.0   | 2.0 – 4.0     | > 4.0     |
+| R²          | ≥ 0.995 | 0.980 – 0.995 | < 0.980   |
+| CV%         | ≤ 5%    | 5% – 15%      | > 15%     |
+
+### A.15 Glossary
+
+| Term                    | Definition                                                                                             |
+| ----------------------- | ------------------------------------------------------------------------------------------------------ |
+| **Benchmark**           | A controlled, repeatable experiment measuring one specific operation                                   |
+| **Warmup**              | Discarded initial runs that let the JIT compiler optimize the code path                                |
+| **Outlier**             | A measurement far from the typical value, usually caused by GC/OS interference                         |
+| **Trimming**            | Removing outlier measurements before computing statistics                                              |
+| **Median**              | The middle value when measurements are sorted; our primary metric                                      |
+| **Mean**                | The arithmetic average of all measurements                                                             |
+| **Standard deviation**  | How spread out measurements are from the mean                                                          |
+| **CV%**                 | Standard deviation as a percentage of the mean — normalized measure of noise                           |
+| **Confidence interval** | Range within which the true value likely falls (95% probability)                                       |
+| **Slope**               | Rate of time change per unit of input size between two measurement points                              |
+| **Slope ratio**         | Last slope ÷ first slope; 1.0 = perfectly linear growth                                                |
+| **R²**                  | Coefficient of determination; 1.0 = data falls perfectly on a straight line                            |
+| **Per-item time**       | Total time ÷ input size; should stay constant for O(n) algorithms                                      |
+| **Baseline**            | Stored snapshot of benchmark results used as the reference for regression detection                    |
+| **Regression**          | A statistically significant increase in cost compared to the baseline                                  |
+| **Drift**               | Gradual, incremental performance degradation across many commits (no single commit triggers a warning) |
+| **Heap delta**          | Change in V8 heap memory usage during a benchmark run                                                  |
+| **JIT**                 | Just-In-Time compilation — V8's process of compiling JavaScript to machine code at runtime             |
+| **GC**                  | Garbage Collection — V8's automatic memory reclamation process                                         |
diff --git a/test/performance/ProcessingPerformance.test.ts b/test/performance/ProcessingPerformance.test.ts
new file mode 100644
index 0000000..a1cff54
--- /dev/null
+++ b/test/performance/ProcessingPerformance.test.ts
@@ -0,0 +1,445 @@
+/**
+ * Performance benchmarks for @cap-js/data-inspector.
+ *
+ * Measures local processing cost of EntityDefinitionReader and DataReader
+ * across multiple input sizes (10→1000) to detect non-linear scaling and
+ * regressions against a stored baseline.
+ *
+ * Run:
+ *   npm run test:performance              # compare against baseline
+ *   npm run test:performance:update-baseline  # create/update baseline
+ *
+ * See PERFORMANCE-TESTING-STRATEGY.md for full documentation.
+ */
+
+import cds from "@sap/cds";
+import fs from "fs";
+import path from "path";
+import os from "os";
+import { expect } from "chai";
+
+import { EntityDefinitionReader } from "../../srv/EntityDefinitionReader";
+import { DataReader } from "../../srv/DataReader";
+
+import {
+  type BenchmarkResult,
+  type BaselineData,
+  type Report,
+  sizes,
+  checkSystemState,
+  benchmarkSync,
+  benchmarkAsync,
+  buildMarkdownReport,
+  buildSyntheticEntities,
+  buildSyntheticRecords,
+  buildEntityDefinitionRequest,
+  buildDataReadRequest,
+} from "./helpers";
+
+// ---------------------------------------------------------------------------
+// Configuration (env-overridable)
+// ---------------------------------------------------------------------------
+const PERF_ENABLED = process.env.PERF_TESTS === "1";
+const UPDATE_BASELINE = process.env.PERF_UPDATE_BASELINE === "1";
+const MAX_REGRESSION = Number(process.env.PERF_MAX_REGRESSION ?? "0.3");
+const MAX_SLOPE_VARIANCE = Number(process.env.PERF_MAX_SLOPE_VARIANCE ?? "0.3");
+const WARMUP_RUNS = Number(process.env.PERF_WARMUP_RUNS ?? "10");
+const MEASUREMENT_RUNS = Number(process.env.PERF_MEASUREMENT_RUNS ?? "20");
+const OUTLIER_TRIM_PERCENT = Number(process.env.PERF_OUTLIER_TRIM_PERCENT ?? "0.5");
+
+const BASELINE_FILENAME = process.env.PERF_BASELINE_FILE ?? "performance-baseline.json";
+const BASELINE_PATH = path.resolve(__dirname, BASELINE_FILENAME);
+const REPORT_PATH = path.resolve(__dirname, "..", "..", "coverage", "performance-report.json");
+const REPORT_MD_PATH = path.resolve(__dirname, "..", "..", "coverage", "performance-report.md");
+
+const describePerf = PERF_ENABLED ? describe : describe.skip;
+
+// ---------------------------------------------------------------------------
+// Test suite
+// ---------------------------------------------------------------------------
+describePerf("Performance - Data Inspector Processing", function () {
+  this.timeout(300000); // 5 minutes
+
+  /** Pre-built synthetic data per size (populated in before hook). */
+  const entitiesBySize = new Map<number, any[]>();
+  const recordsBySize = new Map<number, any[]>();
+
+  let report: Report;
+
+  // Load CDS model so cds.model, cds.parse, cds.ql are available
+  before(async function () {
+    const csn = await cds.load(path.resolve(__dirname, "..", ".."));
+    cds.model = cds.compile.for.nodejs(csn);
+
+    if (!UPDATE_BASELINE && !fs.existsSync(BASELINE_PATH)) {
+      const isCI = process.env.CI === "true" || !!process.env.GITHUB_ACTIONS;
+      const message = isCI
+        ? `Performance baseline not found at ${BASELINE_FILENAME}.\n` +
+          "      To establish the CI baseline, run the 'Update CI Performance Baseline' workflow.\n" +
+          "      See: .github/workflows/performance-rebaseline.yml"
+        : `Performance baseline not found at ${BASELINE_FILENAME}.\n` +
+          "      Run 'npm run test:performance:update-baseline' to create a baseline for your machine.";
+      console.log(`\n    ⚠️  Skipping performance tests: ${message}\n`);
+      this.skip();
+    }
+  });
+
+  // Pre-generate synthetic data for all sizes
+  before(() => {
+    for (const size of sizes) {
+      entitiesBySize.set(size, buildSyntheticEntities(size));
+      recordsBySize.set(size, buildSyntheticRecords(size));
+    }
+  });
+
+  // Write reports and optionally update baseline after all benchmarks
+  after(() => {
+    if (!report) return;
+
+    const reportDir = path.dirname(REPORT_PATH);
+    fs.mkdirSync(reportDir, { recursive: true });
+    fs.writeFileSync(REPORT_PATH, JSON.stringify(report, null, 2), "utf8");
+    fs.writeFileSync(REPORT_MD_PATH, buildMarkdownReport(report), "utf8");
+
+    if (UPDATE_BASELINE) {
+      fs.writeFileSync(
+        BASELINE_PATH,
+        JSON.stringify(
+          report.results.reduce((acc, result) => {
+            acc[result.name] = {
+              sizes: result.sizes,
+              perItemMsAtMax: result.perItemMs[result.perItemMs.length - 1],
+              slopeRatio: result.slopeRatio,
+              r2: result.r2,
+            };
+            return acc;
+          }, {} as BaselineData),
+          null,
+          2
+        ),
+        "utf8"
+      );
+    }
+  });
+
+  it("should keep local processing roughly linear", async () => {
+    const results: BenchmarkResult[] = [];
+
+    // -------------------------------------------------------------------
+    // Group A: EntityDefinitionReader — pure in-memory, no DB
+    // -------------------------------------------------------------------
+
+    // A1: Collection read — iterate entities, build metadata, paginate, sort
+    results.push(
+      benchmarkSync("EntityDefinitionReader.read (collection)", (size) => {
+        const entities = entitiesBySize.get(size)!;
+        const originalAll = cds.model.all;
+        cds.model.all = ((kind: string) => {
+          if (kind === "entity") return entities;
+          if (kind === "service") return [];
+          return originalAll.call(cds.model, kind);
+        }) as any;
+
+        try {
+          const reader = new EntityDefinitionReader();
+          const req = buildEntityDefinitionRequest({ top: size });
+          reader.read(req as any);
+        } finally {
+          cds.model.all = originalAll;
+        }
+      })
+    );
+
+    // A2: Collection read with $filter — measures filter parsing overhead
+    results.push(
+      benchmarkSync("EntityDefinitionReader.read (filtered)", (size) => {
+        const entities = entitiesBySize.get(size)!;
+        const originalAll = cds.model.all;
+        cds.model.all = ((kind: string) => {
+          if (kind === "entity") return entities;
+          if (kind === "service") return [];
+          return originalAll.call(cds.model, kind);
+        }) as any;
+
+        try {
+          const reader = new EntityDefinitionReader();
+          const req = buildEntityDefinitionRequest({
+            filter: `contains(name, 'Entity')`,
+            top: size,
+          });
+          reader.read(req as any);
+        } finally {
+          cds.model.all = originalAll;
+        }
+      })
+    );
+
+    // A3: Element extraction — one entity with N elements (N = 10→1000)
+    results.push(
+      benchmarkSync("EntityDefinitionReader._getEntityElements (via read)", (size) => {
+        const entity = buildSyntheticEntities(1, size)[0];
+        const entities = [entity];
+        const originalAll = cds.model.all;
+        cds.model.all = ((kind: string) => {
+          if (kind === "entity") return entities;
+          if (kind === "service") return [];
+          return originalAll.call(cds.model, kind);
+        }) as any;
+
+        try {
+          const reader = new EntityDefinitionReader();
+          const req: any = {
+            params: [{ name: entity.name }],
+            query: { SELECT: { columns: ["*"] } },
+            req: { query: {} },
+            reject: (code: number, msg: string) => {
+              throw new Error(`${code} ${msg}`);
+            },
+          };
+          reader.read(req as any);
+        } finally {
+          cds.model.all = originalAll;
+        }
+      })
+    );
+
+    // -------------------------------------------------------------------
+    // Group B: DataReader — response construction (DB stubbed)
+    // -------------------------------------------------------------------
+
+    // B1: Response loop — entity resolution, key construction, record transformation
+    results.push(
+      await benchmarkAsync("DataReader.read (response construction, DB stubbed)", async (size) => {
+        const records = recordsBySize.get(size)!;
+        const entityName = "perf.test.Entity_0";
+
+        const syntheticEntity = buildSyntheticEntityForDataReader(entityName);
+
+        // Stub cds.model.all
+        const originalAll = cds.model.all;
+        cds.model.all = ((kind: string) => {
+          if (kind === "entity") return [syntheticEntity];
+          if (kind === "service") return [];
+          return originalAll.call(cds.model, kind);
+        }) as any;
+
+        // Stub cds.services.db.run → return synthetic records
+        const originalDb = cds.services.db;
+        (cds.services as any).db = {
+          run: async () => {
+            const result = [...records];
+            (result as any).$count = records.length;
+            return result;
+          },
+        };
+
+        // Stub cds.ql.SELECT → chainable builder
+        const originalQL = cds.ql;
+        (cds as any).ql = {
+          ...originalQL,
+          SELECT: {
+            from: () => {
+              const builder: any = {
+                columns: () => builder,
+                where: () => builder,
+                orderBy: () => builder,
+                limit: (l: number, o: number) => {
+                  builder.SELECT = { limit: { offset: { val: o } }, count: true };
+                  return builder;
+                },
+                SELECT: { limit: { offset: { val: 0 } }, count: true },
+              };
+              return builder;
+            },
+          },
+        };
+
+        // Stub cds.parse.expr
+        const originalParse = cds.parse;
+        (cds as any).parse = {
+          ...originalParse,
+          expr: () => ({
+            xpr: [{ ref: ["entityName"] }, "=", { val: entityName }],
+          }),
+        };
+
+        try {
+          const reader = new DataReader();
+          const req = buildDataReadRequest(entityName);
+          await reader.read(req as any);
+        } finally {
+          cds.model.all = originalAll;
+          (cds.services as any).db = originalDb;
+          (cds as any).ql = originalQL;
+          (cds as any).parse = originalParse;
+        }
+      })
+    );
+
+    // B2: Audit log emission — sensitive data fields, stubbed audit-log service
+    results.push(
+      await benchmarkAsync("DataReader._emitAuditlogs (stubbed audit-log)", async (size) => {
+        const records = recordsBySize.get(size)!;
+
+        const syntheticEntity: any = {
+          name: "perf.test.SensitiveEntity",
+          "@PersonalData.DataSubjectRole": "Customer",
+          elements: {
+            id: { type: "cds.UUID", key: true },
+            email: { type: "cds.String", key: false, "@PersonalData.IsPotentiallySensitive": true },
+            phone: { type: "cds.String", key: false, "@PersonalData.IsPotentiallySensitive": true },
+            name: { type: "cds.String", key: false },
+          },
+          get keyElements4DataInspector() {
+            return ["id"];
+          },
+        };
+
+        const sensitiveRecords = records.map((r: any) => ({
+          ...r,
+          email: `user_${r.id}@example.com`,
+          phone: `+1-555-${String(records.indexOf(r)).padStart(4, "0")}`,
+          name: `User ${r.id}`,
+        }));
+
+        // Stub cds.env.requires to include audit-log
+        const originalEnv = { ...cds.env };
+        (cds.env as any).requires = {
+          ...cds.env.requires,
+          "audit-log": { kind: "audit-log-to-console" },
+        };
+
+        // Stub cds.connect.to → return stubbed audit-log service
+        const originalConnect = cds.connect;
+        (cds as any).connect = {
+          ...originalConnect,
+          to: async (serviceName: string) => {
+            if (serviceName === "audit-log") return { log: async () => {} };
+            return originalConnect.to(serviceName);
+          },
+        };
+
+        try {
+          const reader = new DataReader();
+          await (reader as any)._emitAuditlogs(syntheticEntity, sensitiveRecords);
+        } finally {
+          (cds as any).env = originalEnv;
+          (cds as any).connect = originalConnect;
+        }
+      })
+    );
+
+    // -------------------------------------------------------------------
+    // Build report and check regressions
+    // -------------------------------------------------------------------
+    report = buildReport(results);
+
+    if (report.systemWarnings.length > 0) {
+      console.log("\n    System Warnings:");
+      report.systemWarnings.forEach((w) => console.log(`      !  ${w}`));
+      console.log("");
+    }
+
+    // Load baseline and check for regressions
+    let baseline: BaselineData | undefined;
+    if (fs.existsSync(BASELINE_PATH)) {
+      baseline = JSON.parse(fs.readFileSync(BASELINE_PATH, "utf8")) as BaselineData;
+      report.baseline = baseline;
+    }
+
+    expect(results).to.have.length.greaterThan(0);
+    checkRegressions(results, baseline);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Helpers (test-specific, not reusable across projects)
+// ---------------------------------------------------------------------------
+
+/** Builds the Report object from benchmark results and current environment. */
+function buildReport(results: BenchmarkResult[]): Report {
+  return {
+    timestamp: new Date().toISOString(),
+    sizes: [...sizes],
+    results,
+    regressionThreshold: MAX_REGRESSION,
+    slopeVarianceThreshold: MAX_SLOPE_VARIANCE,
+    testConfig: {
+      warmupRuns: WARMUP_RUNS,
+      measurementRuns: MEASUREMENT_RUNS,
+      outlierTrimPercent: OUTLIER_TRIM_PERCENT,
+      totalRunsPerSize: MEASUREMENT_RUNS + Math.ceil(MEASUREMENT_RUNS * OUTLIER_TRIM_PERCENT),
+    },
+    environment: {
+      node: process.version,
+      platform: `${process.platform} ${os.release()}`,
+      cpus: os.cpus()[0].model,
+      totalMemoryGB: os.totalmem() / 1024 ** 3,
+      cpuLoad: os.loadavg(),
+    },
+    systemWarnings: checkSystemState(),
+  };
+}
+
+/**
+ * Checks each result against the baseline and emits warnings for regressions.
+ * Warnings are advisory only — they do not fail the test (see strategy doc §6).
+ */
+function checkRegressions(results: BenchmarkResult[], baseline?: BaselineData): void {
+  if (!baseline || UPDATE_BASELINE) return;
+
+  for (const result of results) {
+    const entry = baseline[result.name];
+    if (!entry) {
+      console.warn(
+        `  ⚠️  WARNING: ${result.name} baseline entry missing — skipping regression check`
+      );
+      continue;
+    }
+
+    // Slope ratio regression
+    if (entry.slopeRatio > 0.5 && result.slopeRatio > 0) {
+      const slopeAllowed = entry.slopeRatio * (1 + MAX_SLOPE_VARIANCE);
+      if (result.slopeRatio > slopeAllowed) {
+        console.warn(
+          `  ⚠️  WARNING: ${result.name} slope ratio regression: ` +
+            `${result.slopeRatio.toFixed(4)} > allowed ${slopeAllowed.toFixed(4)} ` +
+            `(baseline: ${entry.slopeRatio.toFixed(4)}, threshold: +${(MAX_SLOPE_VARIANCE * 100).toFixed(0)}%)`
+        );
+      }
+    }
+
+    // Per-item time regression
+    const currentPerItem = result.perItemMs[result.perItemMs.length - 1];
+    const allowed = entry.perItemMsAtMax * (1 + MAX_REGRESSION);
+    if (currentPerItem > allowed) {
+      console.warn(
+        `  ⚠️  WARNING: ${result.name} per-item time regression: ` +
+          `${currentPerItem.toFixed(7)}ms > allowed ${allowed.toFixed(7)}ms ` +
+          `(baseline: ${entry.perItemMsAtMax.toFixed(7)}ms, threshold: +${(MAX_REGRESSION * 100).toFixed(0)}%)`
+      );
+    }
+  }
+}
+
+/** Builds a synthetic entity definition for DataReader benchmarks (B1). */
+function buildSyntheticEntityForDataReader(entityName: string): any {
+  return {
+    name: entityName,
+    "@HideFromDataInspector": false,
+    "@cds.query.limit.default": 1000,
+    "@cds.query.limit.max": 1000,
+    elements: {
+      id: { type: "cds.UUID", key: true },
+      ...Object.fromEntries(
+        Array.from({ length: 9 }, (_, j) => [`field_${j + 1}`, { type: "cds.String", key: false }])
+      ),
+    },
+    get keyElements4DataInspector() {
+      return ["id"];
+    },
+    get dataSource4DataInspector() {
+      return "db";
+    },
+  };
+}
diff --git a/test/performance/check-baseline-drift.js b/test/performance/check-baseline-drift.js
new file mode 100644
index 0000000..bab4107
--- /dev/null
+++ b/test/performance/check-baseline-drift.js
@@ -0,0 +1,270 @@
+#!/usr/bin/env node
+// check-baseline-drift.js
+//
+// Detects gradual drift in the CI performance baseline across git commits.
+//
+// Background: each developer keeps a local `performance-baseline.json` (gitignored)
+// calibrated to their own machine. The CI-managed baseline is
+// `performance-baseline.ci.json`, which is committed and updated only via the
+// manual `performance-rebaseline` GitHub Actions workflow. Because it lives in git,
+// its history captures every time the CI environment was re-baselined, making it
+// possible to detect gradual cost drift even when no single update exceeded the
+// single-run regression threshold.
+//
+// This script reads those commits and warns when:
+//   - The total per-item cost increase across the examined window exceeds
+//     DRIFT_MAX_TOTAL_INCREASE (default 20%).
+//   - There are DRIFT_CONSECUTIVE_WARN (default 3) consecutive increases.
+//
+// Run:
+//   npm run test:performance:check-drift          (uses CI baseline history)
+//   node test/performance/check-baseline-drift.js
+//
+// Options (env vars):
+//   DRIFT_BASELINE_FILE        (default "test/performance/performance-baseline.ci.json"):
+//                              git path of the baseline file to inspect.
+//   DRIFT_WINDOW               (default 10): number of recent commits to examine.
+//   DRIFT_MAX_TOTAL_INCREASE   (default 0.20): max allowed total increase across
+//                              the window as a fraction (0.20 = 20%).
+//   DRIFT_CONSECUTIVE_WARN     (default 3): number of consecutive per-item cost
+//                              increases before emitting a warning.
+
+/* eslint-disable no-console */
+"use strict";
+
+const { execSync } = require("child_process");
+
+const BASELINE_GIT_PATH =
+  process.env.DRIFT_BASELINE_FILE ?? "test/performance/performance-baseline.ci.json";
+const DRIFT_WINDOW = Number(process.env.DRIFT_WINDOW ?? "10");
+const DRIFT_MAX_TOTAL = Number(process.env.DRIFT_MAX_TOTAL_INCREASE ?? "0.20");
+const DRIFT_CONSECUTIVE = Number(process.env.DRIFT_CONSECUTIVE_WARN ?? "3");
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function run(cmd) {
+  try {
+    return execSync(cmd, {
+      encoding: "utf8",
+      stdio: ["pipe", "pipe", "pipe"],
+    }).trim();
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Returns commits that touched the baseline file, most recent first.
+ * Each entry: { hash: string, date: string }
+ */
+function getCommitHistory() {
+  const raw = run(`git log --follow --format="%H %aI" -- ${BASELINE_GIT_PATH}`);
+  if (!raw) return [];
+  return raw
+    .split("\n")
+    .map((line) => {
+      const cleaned = line.replace(/"/g, "");
+      const spaceIdx = cleaned.indexOf(" ");
+      if (spaceIdx === -1) return null;
+      return {
+        hash: cleaned.slice(0, spaceIdx),
+        date: cleaned.slice(spaceIdx + 1),
+      };
+    })
+    .filter((c) => c && c.hash && c.date);
+}
+
+/**
+ * Reads and parses performance-baseline.ci.json at the given commit hash.
+ */
+function readBaselineAtCommit(hash) {
+  const raw = run(`git show ${hash}:${BASELINE_GIT_PATH}`);
+  if (!raw) return null;
+  try {
+    return JSON.parse(raw);
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Returns the Ordinary Least Squares slope for `values` indexed 0..n-1.
+ */
+function olsSlope(values) {
+  const n = values.length;
+  if (n < 2) return 0;
+  const meanX = (n - 1) / 2;
+  const meanY = values.reduce((a, b) => a + b, 0) / n;
+  let num = 0,
+    den = 0;
+  for (let i = 0; i < n; i++) {
+    const dx = i - meanX;
+    num += dx * (values[i] - meanY);
+    den += dx * dx;
+  }
+  return den === 0 ? 0 : num / den;
+}
+
+/**
+ * Returns the length of the trailing run of strictly increasing values.
+ * E.g. [1, 2, 1, 3, 4, 5] → 3  (last three entries form an increasing run)
+ */
+function trailingIncreaseStreak(values) {
+  let count = 0;
+  for (let i = values.length - 1; i > 0; i--) {
+    if (values[i] > values[i - 1]) count++;
+    else break;
+  }
+  return count;
+}
+
+/** Left-pad / right-pad helpers for table formatting. */
+const rpad = (s, w) => String(s).slice(0, w).padEnd(w);
+const lpad = (s, w) => String(s).slice(0, w).padStart(w);
+
+// ---------------------------------------------------------------------------
+// Main
+// ---------------------------------------------------------------------------
+
+function main() {
+  console.log("=== Performance Baseline Drift Check ===\n");
+
+  const commits = getCommitHistory();
+  if (commits.length === 0) {
+    console.log(`No git history found for: ${BASELINE_GIT_PATH}`);
+    console.log(
+      "Tip: the CI baseline is created by the `performance-rebaseline` workflow (manual trigger in GitHub Actions)."
+    );
+    console.log(
+      "     Run it at least twice to accumulate history. Until then, drift detection is not possible."
+    );
+    process.exit(0);
+  }
+
+  const window = commits.slice(0, DRIFT_WINDOW); // most recent first
+  console.log(
+    `Examining ${window.length} most recent commit(s) (${commits.length} total). DRIFT_WINDOW=${DRIFT_WINDOW}\n`
+  );
+
+  // Load snapshots in chronological order (oldest first) for trend analysis.
+  const snapshots = [];
+  for (const commit of [...window].reverse()) {
+    const data = readBaselineAtCommit(commit.hash);
+    if (data) snapshots.push({ ...commit, data });
+  }
+
+  if (snapshots.length < 2) {
+    console.log(`Only ${snapshots.length} readable snapshot(s) — need at least 2 to detect drift.`);
+    console.log(
+      "Trigger the `performance-rebaseline` workflow again to accumulate a second snapshot."
+    );
+    process.exit(0);
+  }
+
+  // Collect all known benchmark names across all snapshots.
+  const benchmarkNames = [...new Set(snapshots.flatMap((s) => Object.keys(s.data)))];
+
+  // -------------------------------------------------------------------------
+  // History table: perItemMsAtMax per benchmark per commit date
+  // -------------------------------------------------------------------------
+  const dateHeaders = snapshots.map((s) => s.date.slice(0, 10));
+  const nameWidth = 36;
+  const colWidth = 14;
+
+  const headerRow =
+    rpad("Benchmark (perItemMsAtMax)", nameWidth) +
+    dateHeaders.map((d) => lpad(d, colWidth)).join("");
+  console.log(headerRow);
+  console.log("-".repeat(headerRow.length));
+
+  for (const name of benchmarkNames) {
+    const cells = snapshots.map((s) => {
+      const v = s.data[name]?.perItemMsAtMax;
+      return typeof v === "number" ? v.toExponential(3) : "n/a";
+    });
+    console.log(rpad(name, nameWidth) + cells.map((c) => lpad(c, colWidth)).join(""));
+  }
+
+  // -------------------------------------------------------------------------
+  // Drift analysis per benchmark
+  // -------------------------------------------------------------------------
+  console.log("\n=== Drift Analysis ===\n");
+  let hasViolation = false;
+
+  for (const name of benchmarkNames) {
+    const values = snapshots
+      .map((s) => s.data[name]?.perItemMsAtMax)
+      .filter((v) => typeof v === "number");
+
+    if (values.length < 2) continue;
+
+    const oldest = values[0];
+    const latest = values[values.length - 1];
+    const totalIncrease = oldest > 0 ? (latest - oldest) / oldest : 0;
+    const streak = trailingIncreaseStreak(values);
+    const slope = olsSlope(values);
+    // Normalized slope: fraction of oldest value per commit step.
+    const slopeNorm = oldest > 0 ? slope / oldest : 0;
+
+    const issues = [];
+    if (totalIncrease > DRIFT_MAX_TOTAL) {
+      issues.push(
+        `total increase ${(totalIncrease * 100).toFixed(1)}% exceeds DRIFT_MAX_TOTAL_INCREASE=${(DRIFT_MAX_TOTAL * 100).toFixed(0)}%`
+      );
+      hasViolation = true;
+    }
+    if (streak >= DRIFT_CONSECUTIVE) {
+      // Streak warnings are advisory only — not violations (could be noise).
+      issues.push(
+        `${streak} consecutive increases (DRIFT_CONSECUTIVE_WARN=${DRIFT_CONSECUTIVE}) — investigate, may be noise`
+      );
+    }
+
+    const tag =
+      issues.length > 0 && totalIncrease > DRIFT_MAX_TOTAL
+        ? "FAIL"
+        : issues.length > 0
+          ? "WARN"
+          : slopeNorm > 0
+            ? "info"
+            : "ok  ";
+
+    console.log(
+      `[${tag}] ${rpad(name, nameWidth - 7)}` +
+        `  total=${lpad((totalIncrease * 100).toFixed(1) + "%", 7)}` +
+        `  streak=${streak}` +
+        `  slope=${slopeNorm >= 0 ? "+" : ""}${(slopeNorm * 100).toFixed(2)}%/commit`
+    );
+    for (const issue of issues) {
+      console.log(`       └─ ${issue}`);
+    }
+  }
+
+  console.log("\n--- Thresholds ---");
+  console.log(
+    `  DRIFT_MAX_TOTAL_INCREASE = ${(DRIFT_MAX_TOTAL * 100).toFixed(0)}%  (set via env var)`
+  );
+  console.log(
+    `  DRIFT_CONSECUTIVE_WARN   = ${DRIFT_CONSECUTIVE} consecutive increases  (advisory, not a violation)`
+  );
+  console.log(
+    "\nTo re-baseline after an intentional performance change: trigger the `performance-rebaseline` workflow in GitHub Actions."
+  );
+  console.log(
+    "  Developers: keep your local `performance-baseline.json` up to date with `npm run test:performance:update-baseline`."
+  );
+
+  if (hasViolation) {
+    console.log(
+      "\n[WARN] Baseline drift exceeds threshold(s). Either optimize the affected code path" +
+        " and update the baseline, or raise DRIFT_MAX_TOTAL_INCREASE if the increase is intentional."
+    );
+  } else {
+    console.log("\n[PASS] No significant drift detected.");
+  }
+  process.exit(0);
+}
+
+main();
diff --git a/test/performance/helpers/index.ts b/test/performance/helpers/index.ts
new file mode 100644
index 0000000..ba43bd6
--- /dev/null
+++ b/test/performance/helpers/index.ts
@@ -0,0 +1,12 @@
+/**
+ * Barrel export for performance test helpers.
+ *
+ * Usage in test files:
+ *   import { benchmarkSync, buildSyntheticEntities, ... } from "./helpers";
+ */
+
+export * from "./types";
+export * from "./statistics";
+export * from "./measurement";
+export * from "./reporting";
+export * from "./synthetic-data";
diff --git a/test/performance/helpers/measurement.ts b/test/performance/helpers/measurement.ts
new file mode 100644
index 0000000..f74acd8
--- /dev/null
+++ b/test/performance/helpers/measurement.ts
@@ -0,0 +1,259 @@
+/**
+ * Benchmark measurement infrastructure.
+ *
+ * Provides timing functions (sync/async), outlier trimming, slope computation,
+ * R² linear fit analysis, and high-level benchmark runners that orchestrate
+ * warmup → measure → analyze across multiple input sizes.
+ */
+
+import { performance } from "perf_hooks";
+import type { MeasurementStats, BenchmarkResult } from "./types";
+import { mean, calculateStats } from "./statistics";
+
+// ---------------------------------------------------------------------------
+// Configuration (env-overridable defaults)
+// ---------------------------------------------------------------------------
+const MEASUREMENT_RUNS = Number(process.env.PERF_MEASUREMENT_RUNS ?? "20");
+const OUTLIER_TRIM_PERCENT = Number(process.env.PERF_OUTLIER_TRIM_PERCENT ?? "0.5");
+const WARMUP_RUNS = Number(process.env.PERF_WARMUP_RUNS ?? "10");
+
+/** Default input sizes used across all benchmarks. */
+export const sizes = [10, 50, 100, 500, 1000];
+
+// ---------------------------------------------------------------------------
+// Low-level timing
+// ---------------------------------------------------------------------------
+
+/** Result of a single measurement pass (one input size). */
+type MeasurementResult = {
+  timings: number[];
+  stats: MeasurementStats;
+  /** Heap delta in MB across all runs. */
+  memoryDeltaMB: number;
+};
+
+/**
+ * Times an async function `runs` times (plus extra runs for outlier trimming).
+ * Returns trimmed timings, descriptive stats, and heap memory delta.
+ */
+export const measureAsync = async (
+  fn: () => Promise<void>,
+  runs: number = MEASUREMENT_RUNS
+): Promise<MeasurementResult> => {
+  const extraRuns = Math.ceil(runs * OUTLIER_TRIM_PERCENT);
+  const totalRuns = runs + extraRuns;
+  const allTimings: number[] = [];
+  const memBefore = process.memoryUsage();
+
+  for (let i = 0; i < totalRuns; i++) {
+    const start = performance.now();
+    await fn();
+    const end = performance.now();
+    allTimings.push(end - start);
+  }
+
+  const memAfter = process.memoryUsage();
+  const memoryDeltaMB = (memAfter.heapUsed - memBefore.heapUsed) / (1024 * 1024);
+
+  return { ...trimOutliers(allTimings, runs), memoryDeltaMB };
+};
+
+/**
+ * Times a synchronous function `runs` times (plus extra runs for outlier trimming).
+ * Returns trimmed timings, descriptive stats, and heap memory delta.
+ */
+export const measureSync = (fn: () => void, runs: number = MEASUREMENT_RUNS): MeasurementResult => {
+  const extraRuns = Math.ceil(runs * OUTLIER_TRIM_PERCENT);
+  const totalRuns = runs + extraRuns;
+  const allTimings: number[] = [];
+  const memBefore = process.memoryUsage();
+
+  for (let i = 0; i < totalRuns; i++) {
+    const start = performance.now();
+    fn();
+    const end = performance.now();
+    allTimings.push(end - start);
+  }
+
+  const memAfter = process.memoryUsage();
+  const memoryDeltaMB = (memAfter.heapUsed - memBefore.heapUsed) / (1024 * 1024);
+
+  return { ...trimOutliers(allTimings, runs), memoryDeltaMB };
+};
+
+/**
+ * Removes outliers by keeping the `keep` values closest to the preliminary mean.
+ * Returns the trimmed, sorted timings and their stats.
+ */
+function trimOutliers(
+  allTimings: number[],
+  keep: number
+): { timings: number[]; stats: MeasurementStats } {
+  const preliminaryMean = mean(allTimings);
+  const timingsWithDistance = allTimings.map((timing) => ({
+    timing,
+    distance: Math.abs(timing - preliminaryMean),
+  }));
+  timingsWithDistance.sort((a, b) => a.distance - b.distance);
+  const trimmedTimings = timingsWithDistance
+    .slice(0, keep)
+    .map((t) => t.timing)
+    .sort((a, b) => a - b);
+
+  return { timings: trimmedTimings, stats: calculateStats(trimmedTimings) };
+}
+
+// ---------------------------------------------------------------------------
+// Scaling analysis
+// ---------------------------------------------------------------------------
+
+/**
+ * Computes the slope (Δtime / Δsize) between each consecutive pair of sizes.
+ * Returns an array of length `times.length - 1`.
+ */
+export const computeSlopes = (times: number[], sizeValues: number[]): number[] => {
+  const slopes: number[] = [];
+  for (let i = 1; i < times.length; i++) {
+    const deltaT = times[i] - times[i - 1];
+    const deltaN = sizeValues[i] - sizeValues[i - 1];
+    slopes.push(deltaT / deltaN);
+  }
+  return slopes;
+};
+
+/**
+ * Computes R² (coefficient of determination) for a linear least-squares fit
+ * of `times` vs `sizeValues`. Returns 1.0 for a perfect straight line.
+ */
+export const computeR2 = (times: number[], sizeValues: number[]): number => {
+  const n = times.length;
+  if (n < 2) return 1;
+  const meanX = sizeValues.reduce((sum, x) => sum + x, 0) / n;
+  const meanY = times.reduce((sum, y) => sum + y, 0) / n;
+  let numerator = 0;
+  let denominator = 0;
+  for (let i = 0; i < n; i++) {
+    const dx = sizeValues[i] - meanX;
+    numerator += dx * (times[i] - meanY);
+    denominator += dx * dx;
+  }
+  const slope = denominator === 0 ? 0 : numerator / denominator;
+  const intercept = meanY - slope * meanX;
+  let ssRes = 0;
+  let ssTot = 0;
+  for (let i = 0; i < n; i++) {
+    const predicted = slope * sizeValues[i] + intercept;
+    ssRes += (times[i] - predicted) ** 2;
+    ssTot += (times[i] - meanY) ** 2;
+  }
+  return ssTot === 0 ? 1 : 1 - ssRes / ssTot;
+};
+
+// ---------------------------------------------------------------------------
+// High-level benchmark runners
+// ---------------------------------------------------------------------------
+
+/**
+ * Runs a synchronous benchmark across all input sizes.
+ * For each size: warmup → measure → compute per-item cost, slopes, R².
+ * Logs progress to stdout.
+ */
+export const benchmarkSync = (name: string, runFn: (size: number) => void): BenchmarkResult => {
+  const timingsMs: number[] = [];
+  const timingStats: MeasurementStats[] = [];
+  const memoryDeltaMB: number[] = [];
+
+  console.log(`    Benchmarking ${name}...`);
+  for (const size of sizes) {
+    const totalRuns = MEASUREMENT_RUNS + Math.ceil(MEASUREMENT_RUNS * OUTLIER_TRIM_PERCENT);
+    process.stdout.write(`      Size ${size}: warmup (${WARMUP_RUNS} runs)...`);
+
+    for (let w = 0; w < WARMUP_RUNS; w++) {
+      runFn(size);
+    }
+
+    process.stdout.write(` measuring (${totalRuns} runs)...`);
+    const measurement = measureSync(() => runFn(size), MEASUREMENT_RUNS);
+
+    timingsMs.push(measurement.stats.median);
+    timingStats.push(measurement.stats);
+    memoryDeltaMB.push(measurement.memoryDeltaMB);
+
+    const cv = (measurement.stats.stdDev / measurement.stats.mean) * 100;
+    const cvWarning = cv > 20 ? " ! HIGH VARIANCE" : "";
+    console.log(
+      ` ✓ (${measurement.stats.median.toFixed(2)}ms ±${measurement.stats.confidenceInterval.toFixed(2)}ms, CV: ${cv.toFixed(1)}%${cvWarning})`
+    );
+  }
+
+  const perItemMs = timingsMs.map((time, index) => time / sizes[index]);
+  const slopes = computeSlopes(timingsMs, sizes);
+  const slopeRatio = slopes.length >= 2 ? slopes[slopes.length - 1] / slopes[0] : 1;
+  const r2 = computeR2(timingsMs, sizes);
+
+  return {
+    name,
+    sizes: [...sizes],
+    timingsMs,
+    timingStats,
+    perItemMs,
+    slopes,
+    slopeRatio,
+    r2,
+    memoryDeltaMB,
+  };
+};
+
+/**
+ * Runs an async benchmark across all input sizes.
+ * For each size: warmup → measure → compute per-item cost, slopes, R².
+ * Logs progress to stdout.
+ */
+export const benchmarkAsync = async (
+  name: string,
+  runFn: (size: number) => Promise<void>
+): Promise<BenchmarkResult> => {
+  const timingsMs: number[] = [];
+  const timingStats: MeasurementStats[] = [];
+  const memoryDeltaMB: number[] = [];
+
+  console.log(`    Benchmarking ${name}...`);
+  for (const size of sizes) {
+    const totalRuns = MEASUREMENT_RUNS + Math.ceil(MEASUREMENT_RUNS * OUTLIER_TRIM_PERCENT);
+    process.stdout.write(`      Size ${size}: warmup (${WARMUP_RUNS} runs)...`);
+
+    for (let w = 0; w < WARMUP_RUNS; w++) {
+      await runFn(size);
+    }
+
+    process.stdout.write(` measuring (${totalRuns} runs)...`);
+    const measurement = await measureAsync(() => runFn(size), MEASUREMENT_RUNS);
+
+    timingsMs.push(measurement.stats.median);
+    timingStats.push(measurement.stats);
+    memoryDeltaMB.push(measurement.memoryDeltaMB);
+
+    const cv = (measurement.stats.stdDev / measurement.stats.mean) * 100;
+    const cvWarning = cv > 20 ? " ! HIGH VARIANCE" : "";
+    console.log(
+      ` ✓ (${measurement.stats.median.toFixed(2)}ms ±${measurement.stats.confidenceInterval.toFixed(2)}ms, CV: ${cv.toFixed(1)}%${cvWarning})`
+    );
+  }
+
+  const perItemMs = timingsMs.map((time, index) => time / sizes[index]);
+  const slopes = computeSlopes(timingsMs, sizes);
+  const slopeRatio = slopes.length >= 2 ? slopes[slopes.length - 1] / slopes[0] : 1;
+  const r2 = computeR2(timingsMs, sizes);
+
+  return {
+    name,
+    sizes: [...sizes],
+    timingsMs,
+    timingStats,
+    perItemMs,
+    slopes,
+    slopeRatio,
+    r2,
+    memoryDeltaMB,
+  };
+};
diff --git a/test/performance/helpers/reporting.ts b/test/performance/helpers/reporting.ts
new file mode 100644
index 0000000..487b99e
--- /dev/null
+++ b/test/performance/helpers/reporting.ts
@@ -0,0 +1,161 @@
+/**
+ * Performance report generation.
+ *
+ * Builds a human-readable Markdown report from benchmark results,
+ * including environment info, configuration, results table with
+ * emoji-coded indicators, and a legend.
+ */
+
+import type { Report } from "./types";
+
+// ---------------------------------------------------------------------------
+// Emoji indicators for report table cells
+// ---------------------------------------------------------------------------
+
+/** Slope ratio: 🟢 ≤2.0 (linear), 🟡 2–4 (suspicious), 🔴 >4 (non-linear). */
+export const slopeRatioEmoji = (ratio: number): string => {
+  if (ratio <= 2.0) return "🟢";
+  if (ratio <= 4.0) return "🟡";
+  return "🔴";
+};
+
+/** R²: 🟢 ≥0.995 (excellent), 🟡 0.98–0.995, 🔴 <0.98. */
+export const r2Emoji = (r2: number): string => {
+  if (r2 >= 0.995) return "🟢";
+  if (r2 >= 0.98) return "🟡";
+  return "🔴";
+};
+
+/** CV%: 🟢 ≤5% (stable), 🟡 5–15%, 🔴 >15% (noisy). */
+export const cvEmoji = (cv: number): string => {
+  if (cv <= 5) return "🟢";
+  if (cv <= 15) return "🟡";
+  return "🔴";
+};
+
+// ---------------------------------------------------------------------------
+// Formatting helpers
+// ---------------------------------------------------------------------------
+
+/** Format a number to fixed decimal places. */
+const formatNumber = (value: number, digits: number): string => value.toFixed(digits);
+
+/** Format an array of numbers as a comma-separated string. */
+const formatList = (values: number[], digits: number): string =>
+  values.map((v) => formatNumber(v, digits)).join(", ");
+
+// ---------------------------------------------------------------------------
+// Markdown report builder
+// ---------------------------------------------------------------------------
+
+/**
+ * Builds a complete Markdown performance report.
+ *
+ * Sections: Environment, Test Configuration, System Warnings,
+ * Results table (with baseline comparison columns), and Legend.
+ */
+export const buildMarkdownReport = (report: Report): string => {
+  const lines: string[] = [];
+  lines.push(`# Performance Report (${report.timestamp})`);
+  lines.push("");
+
+  // --- Environment ---
+  lines.push("## Environment");
+  lines.push("");
+  lines.push(`- Node: ${report.environment.node}`);
+  lines.push(`- Platform: ${report.environment.platform}`);
+  lines.push(`- CPU: ${report.environment.cpus}`);
+  lines.push(`- Memory: ${report.environment.totalMemoryGB.toFixed(1)} GB`);
+  lines.push(`- CPU Load: ${report.environment.cpuLoad.map((l) => l.toFixed(2)).join(", ")}`);
+  lines.push("");
+
+  // --- Test Configuration ---
+  lines.push("## Test Configuration");
+  lines.push("");
+  lines.push(`- Warmup runs: ${report.testConfig.warmupRuns}`);
+  lines.push(`- Measurement runs: ${report.testConfig.measurementRuns}`);
+  lines.push(
+    `- Outlier trim: ${(report.testConfig.outlierTrimPercent * 100).toFixed(0)}% extra (${report.testConfig.totalRunsPerSize - report.testConfig.measurementRuns} trimmed)`
+  );
+  lines.push(`- Total runs per size: ${report.testConfig.totalRunsPerSize}`);
+
+  // --- System Warnings ---
+  if (report.systemWarnings.length > 0) {
+    lines.push("");
+    lines.push("### System Warnings");
+    lines.push("");
+    report.systemWarnings.forEach((w) => lines.push(`- ${w}`));
+  }
+
+  // --- Results table ---
+  lines.push("");
+  lines.push("## Results");
+  lines.push("");
+  lines.push(
+    "| Benchmark | Timings ms (median) | Variance (CV%) | Per-item ms | Memory ΔMB | Slope ratio | R² | Baseline per-item max | Baseline slope ratio | Baseline R² |"
+  );
+  lines.push("| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |");
+
+  for (const result of report.results) {
+    const baseline = report.baseline?.[result.name];
+    const baselinePerItem = baseline ? formatNumber(baseline.perItemMsAtMax, 7) : "n/a";
+    const baselineSlope = baseline ? formatNumber(baseline.slopeRatio, 4) : "n/a";
+    const baselineR2 = baseline?.r2 !== undefined ? formatNumber(baseline.r2, 4) : "n/a";
+    const avgCV =
+      result.timingStats.map((s) => (s.stdDev / s.mean) * 100).reduce((sum, cv) => sum + cv, 0) /
+      result.timingStats.length;
+
+    lines.push(
+      [
+        result.name,
+        formatList(result.timingsMs, 2),
+        formatNumber(avgCV, 1) + "% " + cvEmoji(avgCV),
+        formatList(result.perItemMs, 7),
+        formatList(result.memoryDeltaMB, 2),
+        formatNumber(result.slopeRatio, 4) + " " + slopeRatioEmoji(result.slopeRatio),
+        formatNumber(result.r2, 4) + " " + r2Emoji(result.r2),
+        baselinePerItem,
+        baselineSlope,
+        baselineR2,
+      ].join(" | ")
+    );
+  }
+
+  // --- Legend ---
+  lines.push("");
+  lines.push("## Legend");
+  lines.push("");
+  lines.push("### Slope ratio");
+  lines.push("");
+  lines.push(
+    "Ratio of the last slope segment to the first. A perfectly linear O(n) function scores 1.0."
+  );
+  lines.push("");
+  lines.push("| Indicator | Range | Meaning |");
+  lines.push("| --- | --- | --- |");
+  lines.push("| 🟢 | ≤ 2.0 | Consistent with O(n) linear scaling |");
+  lines.push("| 🟡 | 2.0 – 4.0 | Suspicious — possible mild super-linear growth |");
+  lines.push("| 🔴 | > 4.0 | Clearly non-linear (O(n²) or worse) |");
+  lines.push("");
+  lines.push("### CV% (Coefficient of Variation)");
+  lines.push("");
+  lines.push("Average CV across all measured sizes. Measures measurement stability.");
+  lines.push("");
+  lines.push("| Indicator | Range | Meaning |");
+  lines.push("| --- | --- | --- |");
+  lines.push("| 🟢 | ≤ 5% | Stable — measurements are repeatable |");
+  lines.push("| 🟡 | 5% – 15% | Acceptable for Node.js |");
+  lines.push("| 🔴 | > 15% | High noise — results unreliable |");
+  lines.push("");
+  lines.push("### R² (Coefficient of Determination)");
+  lines.push("");
+  lines.push("1.0 = medians fall perfectly on a straight line.");
+  lines.push("");
+  lines.push("| Indicator | Range | Meaning |");
+  lines.push("| --- | --- | --- |");
+  lines.push("| 🟢 | ≥ 0.995 | Excellent linear fit |");
+  lines.push("| 🟡 | 0.980 – 0.995 | Minor deviation from linearity |");
+  lines.push("| 🔴 | < 0.980 | Clearly non-linear scaling |");
+
+  return lines.join("\n");
+};
diff --git a/test/performance/helpers/statistics.ts b/test/performance/helpers/statistics.ts
new file mode 100644
index 0000000..a5952b0
--- /dev/null
+++ b/test/performance/helpers/statistics.ts
@@ -0,0 +1,70 @@
+/**
+ * Statistical functions for performance measurement analysis.
+ *
+ * Provides basic descriptive statistics (median, mean, standard deviation),
+ * composite stats calculation, and system health checks.
+ */
+
+import os from "os";
+import type { MeasurementStats } from "./types";
+
+/** Returns the median of a numeric array. */
+export const median = (values: number[]): number => {
+  const sorted = [...values].sort((a, b) => a - b);
+  const mid = Math.floor(sorted.length / 2);
+  return sorted.length % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid];
+};
+
+/** Returns the arithmetic mean of a numeric array. */
+export const mean = (values: number[]): number =>
+  values.reduce((sum, val) => sum + val, 0) / values.length;
+
+/** Returns the population standard deviation of a numeric array. */
+export const stdDev = (values: number[]): number => {
+  const avg = mean(values);
+  const squareDiffs = values.map((value) => Math.pow(value - avg, 2));
+  return Math.sqrt(mean(squareDiffs));
+};
+
+/**
+ * Computes full descriptive statistics for a set of timing values.
+ * Includes median, mean, stdDev, min, max, and 95% confidence interval.
+ */
+export const calculateStats = (values: number[]): MeasurementStats => {
+  const sorted = [...values].sort((a, b) => a - b);
+  const avg = mean(values);
+  const sd = stdDev(values);
+  const ci = 1.96 * (sd / Math.sqrt(values.length));
+  return {
+    median: median(values),
+    mean: avg,
+    stdDev: sd,
+    min: sorted[0],
+    max: sorted[sorted.length - 1],
+    confidenceInterval: ci,
+  };
+};
+
+/**
+ * Checks current system state and returns warnings if conditions
+ * may produce unreliable benchmark results (high CPU load, high memory pressure).
+ */
+export const checkSystemState = (): string[] => {
+  const warnings: string[] = [];
+  const loadAvg = os.loadavg();
+  const cpuCount = os.cpus().length;
+  if (loadAvg[0] > cpuCount * 0.7) {
+    warnings.push(
+      `High CPU load detected: ${loadAvg[0].toFixed(2)} (${cpuCount} CPUs). Results may be unreliable.`
+    );
+  }
+  const freeMemGB = os.freemem() / 1024 ** 3;
+  const totalMemGB = os.totalmem() / 1024 ** 3;
+  const memUsagePercent = ((totalMemGB - freeMemGB) / totalMemGB) * 100;
+  if (memUsagePercent > 85) {
+    warnings.push(
+      `High memory usage: ${memUsagePercent.toFixed(1)}% (${freeMemGB.toFixed(1)}GB free of ${totalMemGB.toFixed(1)}GB).`
+    );
+  }
+  return warnings;
+};
diff --git a/test/performance/helpers/synthetic-data.ts b/test/performance/helpers/synthetic-data.ts
new file mode 100644
index 0000000..5a26a1b
--- /dev/null
+++ b/test/performance/helpers/synthetic-data.ts
@@ -0,0 +1,178 @@
+/**
+ * Synthetic data generators and mock request builders.
+ *
+ * These functions produce deterministic, configurable test data that isolates
+ * plugin processing from real CDS models and database queries. Used by
+ * performance benchmarks to control input size precisely.
+ */
+
+// ---------------------------------------------------------------------------
+// Entity generators
+// ---------------------------------------------------------------------------
+
+/**
+ * Builds an array of synthetic CDS-like entity definitions.
+ *
+ * Each entity includes:
+ * - A UUID key element
+ * - `elementsPerEntity - 1` typed fields with varied annotations
+ * - One hidden element (`@HideFromDataInspector: true`) — should be filtered out
+ * - One association element — should be filtered out
+ *
+ * @param count - Number of entities to generate
+ * @param elementsPerEntity - Number of regular elements per entity (default 10)
+ */
+export function buildSyntheticEntities(count: number, elementsPerEntity: number = 10): any[] {
+  const entities: any[] = [];
+  for (let i = 0; i < count; i++) {
+    const elements: Record<string, any> = {};
+
+    // Key element
+    elements[`id_${i}`] = {
+      type: "cds.UUID",
+      key: true,
+      "@HideFromDataInspector": false,
+    };
+
+    // Regular elements with varied types and annotations
+    for (let j = 1; j < elementsPerEntity; j++) {
+      elements[`field_${i}_${j}`] = {
+        type: j % 3 === 0 ? "cds.Integer" : j % 3 === 1 ? "cds.String" : "cds.Boolean",
+        key: false,
+        length: j % 3 === 1 ? 255 : undefined,
+        default: j % 5 === 0 ? { val: "default" } : undefined,
+        notNull: j % 4 === 0,
+        "@PersonalData.IsPotentiallySensitive": j % 7 === 0,
+        "@Core.Computed": j % 9 === 0,
+        "@HideFromDataInspector": false,
+      };
+    }
+
+    // Hidden element (filtered out by EntityDefinitionReader)
+    elements[`hidden_${i}`] = {
+      type: "cds.String",
+      "@HideFromDataInspector": true,
+    };
+
+    // Association element (filtered out by EntityDefinitionReader)
+    elements[`assoc_${i}`] = {
+      type: "cds.Association",
+    };
+
+    entities.push({
+      name: `perf.test.Entity_${i}`,
+      "@title": i % 3 === 0 ? `Entity ${i} Title` : undefined,
+      "@HideFromDataInspector": false,
+      elements,
+      get dataSource4DataInspector() {
+        return i % 2 === 0 ? "db" : "service";
+      },
+      get keyElements4DataInspector() {
+        return [`id_${i}`];
+      },
+    });
+  }
+  return entities;
+}
+
+// ---------------------------------------------------------------------------
+// Record generators
+// ---------------------------------------------------------------------------
+
+/**
+ * Builds an array of synthetic database records for DataReader benchmarks.
+ *
+ * Each record contains an `id` field and `fieldsPerRecord - 1` typed fields.
+ * The returned array has a `$count` property set to `count` (simulating CDS query result).
+ *
+ * @param count - Number of records to generate
+ * @param fieldsPerRecord - Number of fields per record (default 10)
+ */
+export function buildSyntheticRecords(count: number, fieldsPerRecord: number = 10): any[] {
+  const records: any[] = [];
+  for (let i = 0; i < count; i++) {
+    const record: Record<string, any> = { id: `uuid-${i}` };
+    for (let j = 1; j < fieldsPerRecord; j++) {
+      record[`field_${j}`] = j % 3 === 0 ? i * j : j % 3 === 1 ? `value_${i}_${j}` : i % 2 === 0;
+    }
+    records.push(record);
+  }
+  (records as any).$count = count;
+  return records;
+}
+
+// ---------------------------------------------------------------------------
+// Mock request builders
+// ---------------------------------------------------------------------------
+
+/**
+ * Creates a mock `cds.Request` for EntityDefinitionReader.read() — collection request.
+ *
+ * Simulates a GET with `$select=*` and optional OData query options.
+ *
+ * @param options.filter - OData $filter expression (e.g. `contains(name, 'Foo')`)
+ * @param options.orderby - OData $orderby expression (e.g. `name asc`)
+ * @param options.skip - OData $skip value
+ * @param options.top - OData $top value
+ */
+export function buildEntityDefinitionRequest(options?: {
+  filter?: string;
+  orderby?: string;
+  skip?: number;
+  top?: number;
+}): any {
+  const columns = ["*"];
+  return {
+    params: [],
+    query: {
+      SELECT: {
+        columns,
+        count: true,
+        orderBy: options?.orderby
+          ? [{ ref: [options.orderby.split(" ")[0]], sort: options.orderby.split(" ")[1] || "asc" }]
+          : undefined,
+      },
+    },
+    req: {
+      query: {
+        $filter: options?.filter,
+        $orderby: options?.orderby,
+        $skip: options?.skip !== undefined ? String(options.skip) : undefined,
+        $top: options?.top !== undefined ? String(options.top) : undefined,
+      },
+    },
+    reject: (code: number, msg: string) => {
+      throw new Error(`Request rejected: ${code} ${msg}`);
+    },
+  };
+}
+
+/**
+ * Creates a mock `cds.Request` for DataReader.read() — data retrieval request.
+ *
+ * Simulates a GET filtered by entity name with `$select=*`.
+ *
+ * @param entityName - The entity name to filter on (e.g. `perf.test.Entity_0`)
+ */
+export function buildDataReadRequest(entityName: string): any {
+  const columns = ["*"];
+  return {
+    params: [],
+    query: {
+      SELECT: {
+        columns,
+        count: true,
+      },
+    },
+    req: {
+      query: {
+        $filter: `entityName = '${entityName}'`,
+        $skip: "0",
+        $top: "1000",
+      },
+    },
+    reject: (code: number, msg: string) => {
+      throw new Error(`Request rejected: ${code} ${msg}`);
+    },
+  };
+}
diff --git a/test/performance/helpers/types.ts b/test/performance/helpers/types.ts
new file mode 100644
index 0000000..05ea53c
--- /dev/null
+++ b/test/performance/helpers/types.ts
@@ -0,0 +1,75 @@
+/**
+ * Type definitions for the performance testing infrastructure.
+ *
+ * These types define the shape of measurement results, baseline data,
+ * and the final performance report.
+ */
+
+/** Descriptive statistics for a set of timing measurements. */
+export type MeasurementStats = {
+  median: number;
+  mean: number;
+  stdDev: number;
+  min: number;
+  max: number;
+  /** 95% confidence interval half-width (±value). */
+  confidenceInterval: number;
+};
+
+/** Result of a single benchmark run across all input sizes. */
+export type BenchmarkResult = {
+  name: string;
+  sizes: number[];
+  /** Median timing in ms for each size. */
+  timingsMs: number[];
+  /** Full statistics for each size. */
+  timingStats: MeasurementStats[];
+  /** Time per item (timingMs / size) for each size. */
+  perItemMs: number[];
+  /** Slope between consecutive size pairs (ms per additional item). */
+  slopes: number[];
+  /** Ratio of last slope to first slope. 1.0 = perfectly linear. */
+  slopeRatio: number;
+  /** R² coefficient of determination for linear fit. 1.0 = perfect. */
+  r2: number;
+  /** Heap memory delta in MB for each size. */
+  memoryDeltaMB: number[];
+};
+
+/** A single entry in the performance baseline file. */
+export type BaselineEntry = {
+  sizes: number[];
+  perItemMsAtMax: number;
+  slopeRatio: number;
+  r2?: number;
+};
+
+/** The full baseline file: benchmark name → baseline entry. */
+export type BaselineData = Record<string, BaselineEntry>;
+
+/** Test configuration summary for the report. */
+export type TestConfig = {
+  warmupRuns: number;
+  measurementRuns: number;
+  outlierTrimPercent: number;
+  totalRunsPerSize: number;
+};
+
+/** The complete performance report written to disk after a run. */
+export type Report = {
+  timestamp: string;
+  sizes: number[];
+  results: BenchmarkResult[];
+  baseline?: BaselineData;
+  regressionThreshold: number;
+  slopeVarianceThreshold: number;
+  testConfig: TestConfig;
+  environment: {
+    node: string;
+    platform: string;
+    cpus: string;
+    totalMemoryGB: number;
+    cpuLoad: number[];
+  };
+  systemWarnings: string[];
+};
diff --git a/test/performance/performance-baseline.ci.json b/test/performance/performance-baseline.ci.json
new file mode 100644
index 0000000..e7855f9
--- /dev/null
+++ b/test/performance/performance-baseline.ci.json
@@ -0,0 +1,62 @@
+{
+  "EntityDefinitionReader.read (collection)": {
+    "sizes": [
+      10,
+      50,
+      100,
+      500,
+      1000
+    ],
+    "perItemMsAtMax": 0.0008664794999999686,
+    "slopeRatio": 1.0563725392233307,
+    "r2": 0.9991454243695702
+  },
+  "EntityDefinitionReader.read (filtered)": {
+    "sizes": [
+      10,
+      50,
+      100,
+      500,
+      1000
+    ],
+    "perItemMsAtMax": 0.000915937500000041,
+    "slopeRatio": 0.8465581902233116,
+    "r2": 0.9997151636234601
+  },
+  "EntityDefinitionReader._getEntityElements (via read)": {
+    "sizes": [
+      10,
+      50,
+      100,
+      500,
+      1000
+    ],
+    "perItemMsAtMax": 0.00030279100000001334,
+    "slopeRatio": 0.9524803034682242,
+    "r2": 0.997822499293361
+  },
+  "DataReader.read (response construction, DB stubbed)": {
+    "sizes": [
+      10,
+      50,
+      100,
+      500,
+      1000
+    ],
+    "perItemMsAtMax": 0.00016691650000001345,
+    "slopeRatio": 0.5997224934519662,
+    "r2": 0.9979562674639578
+  },
+  "DataReader._emitAuditlogs (stubbed audit-log)": {
+    "sizes": [
+      10,
+      50,
+      100,
+      500,
+      1000
+    ],
+    "perItemMsAtMax": 0.0007755829999999832,
+    "slopeRatio": 0.40819046558119343,
+    "r2": 0.9743144804610471
+  }
+}
\ No newline at end of file
diff --git a/test/tsconfig.json b/test/tsconfig.json
index 93e467b..c833933 100644
--- a/test/tsconfig.json
+++ b/test/tsconfig.json
@@ -3,6 +3,6 @@
   "include": ["**/*.ts"],
   "compilerOptions": {
     "noEmit": true,
-    "rootDir": "."
+    "rootDir": ".."
   }
 }