diff --git a/.github/workflows/benchmark-pr.yml b/.github/workflows/benchmark-pr.yml
new file mode 100644
index 00000000..1845bce1
--- /dev/null
+++ b/.github/workflows/benchmark-pr.yml
@@ -0,0 +1,77 @@
+name: benchmark-pr
+
+on:
+ pull_request:
+ types: [opened, synchronize]
+
+permissions:
+ pull-requests: write
+ contents: read
+
+jobs:
+ benchmark:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: actions/setup-python@v5
+ with:
+ python-version: '3.11'
+
+ - name: Install uv
+ run: |
+ curl -LsSf https://astral.sh/uv/install.sh | sh
+ echo "$HOME/.local/bin" >> "$GITHUB_PATH"
+
+ - name: Install dependencies
+ run: uv pip install --system -e ".[certification,dev]"
+
+ - name: Run benchmarks
+ id: bench
+ run: |
+ mkdir -p .perf-artifacts
+ set +e
+ PYTHONPATH=src python tools/run_perf_matrix.py \
+ --establish-baseline \
+ --shuffle \
+ --artifact-root .perf-artifacts \
+ > .perf-artifacts/runner_output.json 2>&1
+ EXIT_CODE=$?
+ set -e
+ echo "exit_code=$EXIT_CODE" >> "$GITHUB_OUTPUT"
+
+ - name: Format benchmark comment
+ run: |
+ PYTHONPATH=src python tools/format_benchmark_comment.py \
+ .perf-artifacts/summary.json > .perf-artifacts/comment.md
+
+ - name: Post or update PR comment
+ env:
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ run: |
+ COMMENT_TAG=""
+ BODY=$(cat .perf-artifacts/comment.md)
+ FULL_BODY="${COMMENT_TAG}
+ ${BODY}"
+
+ EXISTING=$(gh api "repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/comments" \
+ --jq '[.[] | select(.body | startswith("")) | .id][0]' 2>/dev/null || true)
+
+ if [ -n "$EXISTING" ] && [ "$EXISTING" != "null" ]; then
+ gh api "repos/${{ github.repository }}/issues/comments/${EXISTING}" \
+ -X PATCH -f body="$FULL_BODY"
+ else
+ gh api "repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/comments" \
+ -f body="$FULL_BODY"
+ fi
+
+ - name: Upload benchmark artifacts
+ uses: actions/upload-artifact@v4
+ if: always()
+ with:
+ name: benchmark-artifacts-${{ github.event.pull_request.number }}
+ path: .perf-artifacts/
+
+ - name: Fail if benchmarks failed
+ if: steps.bench.outputs.exit_code != '0'
+ run: exit 1
diff --git a/.gitignore b/.gitignore
index 1389d657..186463f5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
*.pyc
/src/tigrcorn.egg-info
+.perf-artifacts/
diff --git a/src/tigrcorn/compat/perf_runner.py b/src/tigrcorn/compat/perf_runner.py
index 3cb4e405..4c6ccc06 100644
--- a/src/tigrcorn/compat/perf_runner.py
+++ b/src/tigrcorn/compat/perf_runner.py
@@ -3,6 +3,7 @@
import json
import os
import platform
+import random
import subprocess
import sys
import time
@@ -67,6 +68,8 @@ class PerfRunSummary:
passed: int
failed: int
profiles: list[PerfProfileResult]
+ shuffle_seed: int | None = None
+ execution_order: list[str] | None = None
class PerfRunnerError(RuntimeError):
@@ -115,12 +118,19 @@ def run_performance_matrix(
baseline_root: str | Path | None = None,
profile_ids: list[str] | None = None,
establish_baseline: bool = False,
+ shuffle: bool = False,
+ seed: int | None = None,
) -> PerfRunSummary:
source_root = Path(source_root)
matrix_file = source_root / (Path(matrix_path) if matrix_path is not None else DEFAULT_PERFORMANCE_MATRIX_PATH)
matrix = load_performance_matrix(matrix_file)
selected_ids = set(profile_ids or [profile.profile_id for profile in matrix.profiles])
selected_profiles = [profile for profile in matrix.profiles if profile.profile_id in selected_ids]
+ effective_seed: int | None = None
+ if shuffle:
+ effective_seed = seed if seed is not None else random.randint(0, 2**32 - 1)
+ rng = random.Random(effective_seed)
+ rng.shuffle(selected_profiles)
if not selected_profiles:
raise PerfRunnerError('no performance profiles selected')
@@ -199,8 +209,15 @@ def run_performance_matrix(
passed=sum(1 for result in results if result.passed),
failed=sum(1 for result in results if not result.passed),
profiles=results,
+ shuffle_seed=effective_seed if shuffle else None,
+ execution_order=[p.profile_id for p in selected_profiles] if shuffle else None,
)
- _write_run_summary(artifact_root, summary, environment, profiles=selected_profiles)
+ shuffle_meta = {
+ 'enabled': True,
+ 'seed': effective_seed,
+ 'execution_order': [p.profile_id for p in selected_profiles],
+ } if shuffle else None
+ _write_run_summary(artifact_root, summary, environment, profiles=selected_profiles, shuffle_metadata=shuffle_meta)
return summary
@@ -600,7 +617,7 @@ def _write_samples_csv(path: Path, samples: list[Any]) -> None:
path.write_text('\n'.join(lines) + '\n', encoding='utf-8')
-def _write_run_summary(artifact_root: Path, summary: PerfRunSummary, environment: Mapping[str, Any], *, profiles: list[PerfProfile]) -> None:
+def _write_run_summary(artifact_root: Path, summary: PerfRunSummary, environment: Mapping[str, Any], *, profiles: list[PerfProfile], shuffle_metadata: dict[str, Any] | None = None) -> None:
lane_counts: dict[str, int] = {}
for profile in profiles:
lane_counts[profile.lane] = lane_counts.get(profile.lane, 0) + 1
@@ -625,6 +642,8 @@ def _write_run_summary(artifact_root: Path, summary: PerfRunSummary, environment
],
'generated_at_epoch': environment.get('generated_at_epoch'),
}
+ if shuffle_metadata is not None:
+ payload['shuffle'] = shuffle_metadata
(artifact_root / 'summary.json').write_text(json.dumps(_jsonable(payload), indent=2, sort_keys=True) + '\n', encoding='utf-8')
(artifact_root / 'index.json').write_text(json.dumps(_jsonable(payload), indent=2, sort_keys=True) + '\n', encoding='utf-8')
diff --git a/tests/perf/test_shuffle_order.py b/tests/perf/test_shuffle_order.py
new file mode 100644
index 00000000..8cc40c26
--- /dev/null
+++ b/tests/perf/test_shuffle_order.py
@@ -0,0 +1,70 @@
+from __future__ import annotations
+
+import json
+import tempfile
+from pathlib import Path
+
+from tigrcorn.compat.perf_runner import load_performance_matrix, run_performance_matrix
+
+ROOT = Path(__file__).resolve().parents[2]
+MATRIX_PATH = 'docs/review/performance/performance_matrix.json'
+
+
+def _default_profile_order(profile_ids: list[str]) -> list[str]:
+ matrix = load_performance_matrix(ROOT / MATRIX_PATH)
+ return [p.profile_id for p in matrix.profiles if p.profile_id in set(profile_ids)]
+
+
+def test_same_seed_produces_identical_order():
+ profile_ids = ['http11_baseline', 'http11_keepalive', 'ws_http11', 'tls_handshake']
+ with tempfile.TemporaryDirectory() as tmp1, tempfile.TemporaryDirectory() as tmp2:
+ s1 = run_performance_matrix(
+ ROOT, artifact_root=Path(tmp1) / 'perf', profile_ids=profile_ids,
+ establish_baseline=True, shuffle=True, seed=12345,
+ )
+ s2 = run_performance_matrix(
+ ROOT, artifact_root=Path(tmp2) / 'perf', profile_ids=profile_ids,
+ establish_baseline=True, shuffle=True, seed=12345,
+ )
+ assert s1.execution_order == s2.execution_order
+
+
+def test_shuffle_changes_order():
+ profile_ids = ['http11_baseline', 'http11_keepalive', 'ws_http11', 'tls_handshake']
+ original_order = _default_profile_order(profile_ids)
+ found_different = False
+ for seed in range(10):
+ with tempfile.TemporaryDirectory() as tmp:
+ s = run_performance_matrix(
+ ROOT, artifact_root=Path(tmp) / 'perf', profile_ids=profile_ids,
+ establish_baseline=True, shuffle=True, seed=seed,
+ )
+ if s.execution_order != original_order:
+ found_different = True
+ break
+ assert found_different, 'shuffle never produced a different order across 10 seeds'
+
+
+def test_seed_recorded_in_artifact():
+ seed = 99999
+ with tempfile.TemporaryDirectory() as tmp:
+ summary = run_performance_matrix(
+ ROOT, artifact_root=Path(tmp) / 'perf', profile_ids=['http11_baseline'],
+ establish_baseline=True, shuffle=True, seed=seed,
+ )
+ artifact = json.loads((Path(tmp) / 'perf' / 'summary.json').read_text(encoding='utf-8'))
+ assert artifact['shuffle']['seed'] == seed
+ assert artifact['shuffle']['enabled'] is True
+ assert artifact['shuffle']['execution_order'] == [summary.profiles[0].profile_id]
+
+
+def test_no_shuffle_omits_metadata():
+ with tempfile.TemporaryDirectory() as tmp:
+ summary = run_performance_matrix(
+ ROOT, artifact_root=Path(tmp) / 'perf', profile_ids=['http11_baseline'],
+ establish_baseline=True, shuffle=False,
+ )
+ assert summary.shuffle_seed is None
+ assert summary.execution_order is None
+ artifact = json.loads((Path(tmp) / 'perf' / 'summary.json').read_text(encoding='utf-8'))
+ assert 'shuffle' not in artifact
diff --git a/tools/format_benchmark_comment.py b/tools/format_benchmark_comment.py
new file mode 100644
index 00000000..57e2e454
--- /dev/null
+++ b/tools/format_benchmark_comment.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+"""Format benchmark artifacts into a GitHub PR comment (markdown)."""
+from __future__ import annotations
+
+import json
+import sys
+from pathlib import Path
+
+
+def load_json(path: Path) -> dict:
+ return json.loads(path.read_text(encoding='utf-8'))
+
+
+def fmt_throughput(value: float) -> str:
+ return f'{value:,.2f}'
+
+
+def fmt_latency(value: float) -> str:
+ return f'{value:.3f}'
+
+
+def fmt_error_rate(value: float) -> str:
+ return f'{value:.3f}'
+
+
+def format_comment(summary: dict, artifact_root: Path) -> str:
+ total = summary.get('total', 0)
+ passed = summary.get('passed', 0)
+ failed = summary.get('failed', 0)
+ commit = summary.get('commit_hash', 'unknown')
+ platform = summary.get('certification_platform', 'unknown')
+ shuffle = summary.get('shuffle', {})
+ seed = shuffle.get('seed') if shuffle else None
+
+ lines: list[str] = []
+ lines.append('## Benchmark Results\n')
+
+ status_icon = ':white_check_mark:' if failed == 0 else ':x:'
+ lines.append(f'**Status:** {status_icon} {passed}/{total} passed, {failed} failed | **Commit:** `{commit[:8]}`')
+ meta = f'**Platform:** {platform}'
+ if seed is not None:
+ meta += f' | **Shuffle seed:** {seed}'
+ lines.append(meta)
+ lines.append('')
+
+ # Collect per-profile data grouped by lane
+ profiles_by_lane: dict[str, list[dict]] = {}
+ failures: list[dict] = []
+
+ for entry in summary.get('profiles', []):
+ pid = entry['profile_id']
+ profile_summary_path = artifact_root / pid / 'summary.json'
+ if profile_summary_path.exists():
+ profile_data = load_json(profile_summary_path)
+ else:
+ profile_data = {'lane': 'unknown', 'metrics': {}, 'passed': entry.get('passed', False)}
+
+ profile_data['_profile_id'] = pid
+ profile_data['_failure_reasons'] = entry.get('failure_reasons', [])
+ profile_data['_passed'] = entry.get('passed', True)
+
+ lane = profile_data.get('lane', 'unknown')
+ profiles_by_lane.setdefault(lane, []).append(profile_data)
+
+ if not entry.get('passed', True):
+ failures.append(profile_data)
+
+ # Failures section
+ if failures:
+ lines.append('### Failures\n')
+ lines.append('| Profile | Reasons |')
+ lines.append('|---------|---------|')
+ for f in failures:
+ reasons = '; '.join(f['_failure_reasons']) if f['_failure_reasons'] else 'unknown'
+ lines.append(f'| {f["_profile_id"]} | {reasons} |')
+ lines.append('')
+
+ # Results by lane
+ lines.append('### Results\n')
+
+ for lane, profiles in sorted(profiles_by_lane.items()):
+ count = len(profiles)
+ lines.append(f'{lane} ({count} profiles)
\n')
+ lines.append('| Profile | Status | Throughput (ops/s) | p99 (ms) | p99.9 (ms) | Error Rate |')
+ lines.append('|---------|--------|--------------------|----------|------------|------------|')
+
+ for p in profiles:
+ pid = p['_profile_id']
+ icon = ':white_check_mark:' if p['_passed'] else ':x:'
+ m = p.get('metrics', {})
+ throughput = fmt_throughput(m.get('throughput_ops_per_sec', 0))
+ p99 = fmt_latency(m.get('p99_ms', 0))
+ p99_9 = fmt_latency(m.get('p99_9_ms', 0))
+ err = fmt_error_rate(m.get('error_rate', 0))
+ lines.append(f'| {pid} | {icon} | {throughput} | {p99} | {p99_9} | {err} |')
+
+ lines.append('\n \n')
+
+ return '\n'.join(lines)
+
+
+def main() -> int:
+ if len(sys.argv) < 2:
+ print('Usage: format_benchmark_comment.py ', file=sys.stderr)
+ return 1
+
+ summary_path = Path(sys.argv[1])
+ if not summary_path.exists():
+ print(f'Error: {summary_path} not found', file=sys.stderr)
+ return 1
+
+ artifact_root = summary_path.parent
+ summary = load_json(summary_path)
+ print(format_comment(summary, artifact_root))
+ return 0
+
+
+if __name__ == '__main__':
+ raise SystemExit(main())
diff --git a/tools/run_perf_matrix.py b/tools/run_perf_matrix.py
index 0e4dedd0..a321e9ce 100644
--- a/tools/run_perf_matrix.py
+++ b/tools/run_perf_matrix.py
@@ -33,6 +33,8 @@ def build_parser() -> argparse.ArgumentParser:
parser.add_argument('--list-profiles', action='store_true', help='List profile ids and exit.')
parser.add_argument('--list-lanes', action='store_true', help='List matrix lanes and their profile ids, then exit.')
parser.add_argument('--validate', action='store_true', help='Validate an existing artifact root instead of running benchmarks.')
+ parser.add_argument('--shuffle', action='store_true', help='Randomize profile execution order.')
+ parser.add_argument('--seed', type=int, default=None, help='Random seed for reproducible shuffle (implies --shuffle).')
return parser
@@ -71,11 +73,13 @@ def main(argv: list[str] | None = None) -> int:
baseline_root=None if ns.establish_baseline else ns.baseline_root,
profile_ids=ns.profiles,
establish_baseline=ns.establish_baseline,
+ shuffle=ns.shuffle or ns.seed is not None,
+ seed=ns.seed,
)
lane_counts: dict[str, int] = {}
for profile in matrix.profiles:
lane_counts[profile.lane] = lane_counts.get(profile.lane, 0) + 1
- print(json.dumps({
+ output = {
'matrix_name': summary.matrix_name,
'artifact_root': summary.artifact_root,
'baseline_root': summary.baseline_root,
@@ -83,7 +87,11 @@ def main(argv: list[str] | None = None) -> int:
'failed': summary.failed,
'total': summary.total,
'lane_counts': lane_counts,
- }, indent=2, sort_keys=True))
+ }
+ if summary.shuffle_seed is not None:
+ output['shuffle_seed'] = summary.shuffle_seed
+ output['execution_order'] = summary.execution_order
+ print(json.dumps(output, indent=2, sort_keys=True))
return 0 if summary.failed == 0 else 1