From 493258ac68130f36fe29b0fa69d3315647d0a600 Mon Sep 17 00:00:00 2001 From: MichaelDecent Date: Wed, 1 Apr 2026 15:23:26 +0100 Subject: [PATCH 1/2] feat: add shuffle functionality to performance runner - Introduced a shuffle option in the performance matrix runner to randomize profile execution order. - Added a seed parameter for reproducibility of the shuffle. - Updated PerfRunSummary to include shuffle metadata. - Implemented tests to verify shuffle behavior and seed consistency. - Enhanced command-line interface to support new shuffle and seed arguments. --- src/tigrcorn/compat/perf_runner.py | 23 +++++++++- tests/perf/test_shuffle_order.py | 70 ++++++++++++++++++++++++++++++ tools/run_perf_matrix.py | 12 ++++- 3 files changed, 101 insertions(+), 4 deletions(-) create mode 100644 tests/perf/test_shuffle_order.py diff --git a/src/tigrcorn/compat/perf_runner.py b/src/tigrcorn/compat/perf_runner.py index 3cb4e405..4c6ccc06 100644 --- a/src/tigrcorn/compat/perf_runner.py +++ b/src/tigrcorn/compat/perf_runner.py @@ -3,6 +3,7 @@ import json import os import platform +import random import subprocess import sys import time @@ -67,6 +68,8 @@ class PerfRunSummary: passed: int failed: int profiles: list[PerfProfileResult] + shuffle_seed: int | None = None + execution_order: list[str] | None = None class PerfRunnerError(RuntimeError): @@ -115,12 +118,19 @@ def run_performance_matrix( baseline_root: str | Path | None = None, profile_ids: list[str] | None = None, establish_baseline: bool = False, + shuffle: bool = False, + seed: int | None = None, ) -> PerfRunSummary: source_root = Path(source_root) matrix_file = source_root / (Path(matrix_path) if matrix_path is not None else DEFAULT_PERFORMANCE_MATRIX_PATH) matrix = load_performance_matrix(matrix_file) selected_ids = set(profile_ids or [profile.profile_id for profile in matrix.profiles]) selected_profiles = [profile for profile in matrix.profiles if profile.profile_id in selected_ids] + effective_seed: int | None = None + if shuffle: + effective_seed = seed if seed is not None else random.randint(0, 2**32 - 1) + rng = random.Random(effective_seed) + rng.shuffle(selected_profiles) if not selected_profiles: raise PerfRunnerError('no performance profiles selected') @@ -199,8 +209,15 @@ def run_performance_matrix( passed=sum(1 for result in results if result.passed), failed=sum(1 for result in results if not result.passed), profiles=results, + shuffle_seed=effective_seed if shuffle else None, + execution_order=[p.profile_id for p in selected_profiles] if shuffle else None, ) - _write_run_summary(artifact_root, summary, environment, profiles=selected_profiles) + shuffle_meta = { + 'enabled': True, + 'seed': effective_seed, + 'execution_order': [p.profile_id for p in selected_profiles], + } if shuffle else None + _write_run_summary(artifact_root, summary, environment, profiles=selected_profiles, shuffle_metadata=shuffle_meta) return summary @@ -600,7 +617,7 @@ def _write_samples_csv(path: Path, samples: list[Any]) -> None: path.write_text('\n'.join(lines) + '\n', encoding='utf-8') -def _write_run_summary(artifact_root: Path, summary: PerfRunSummary, environment: Mapping[str, Any], *, profiles: list[PerfProfile]) -> None: +def _write_run_summary(artifact_root: Path, summary: PerfRunSummary, environment: Mapping[str, Any], *, profiles: list[PerfProfile], shuffle_metadata: dict[str, Any] | None = None) -> None: lane_counts: dict[str, int] = {} for profile in profiles: lane_counts[profile.lane] = lane_counts.get(profile.lane, 0) + 1 @@ -625,6 +642,8 @@ def _write_run_summary(artifact_root: Path, summary: PerfRunSummary, environment ], 'generated_at_epoch': environment.get('generated_at_epoch'), } + if shuffle_metadata is not None: + payload['shuffle'] = shuffle_metadata (artifact_root / 'summary.json').write_text(json.dumps(_jsonable(payload), indent=2, sort_keys=True) + '\n', encoding='utf-8') (artifact_root / 'index.json').write_text(json.dumps(_jsonable(payload), indent=2, sort_keys=True) + '\n', encoding='utf-8') diff --git a/tests/perf/test_shuffle_order.py b/tests/perf/test_shuffle_order.py new file mode 100644 index 00000000..8cc40c26 --- /dev/null +++ b/tests/perf/test_shuffle_order.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +import json +import tempfile +from pathlib import Path + +from tigrcorn.compat.perf_runner import load_performance_matrix, run_performance_matrix + +ROOT = Path(__file__).resolve().parents[2] +MATRIX_PATH = 'docs/review/performance/performance_matrix.json' + + +def _default_profile_order(profile_ids: list[str]) -> list[str]: + matrix = load_performance_matrix(ROOT / MATRIX_PATH) + return [p.profile_id for p in matrix.profiles if p.profile_id in set(profile_ids)] + + +def test_same_seed_produces_identical_order(): + profile_ids = ['http11_baseline', 'http11_keepalive', 'ws_http11', 'tls_handshake'] + with tempfile.TemporaryDirectory() as tmp1, tempfile.TemporaryDirectory() as tmp2: + s1 = run_performance_matrix( + ROOT, artifact_root=Path(tmp1) / 'perf', profile_ids=profile_ids, + establish_baseline=True, shuffle=True, seed=12345, + ) + s2 = run_performance_matrix( + ROOT, artifact_root=Path(tmp2) / 'perf', profile_ids=profile_ids, + establish_baseline=True, shuffle=True, seed=12345, + ) + assert s1.execution_order == s2.execution_order + + +def test_shuffle_changes_order(): + profile_ids = ['http11_baseline', 'http11_keepalive', 'ws_http11', 'tls_handshake'] + original_order = _default_profile_order(profile_ids) + found_different = False + for seed in range(10): + with tempfile.TemporaryDirectory() as tmp: + s = run_performance_matrix( + ROOT, artifact_root=Path(tmp) / 'perf', profile_ids=profile_ids, + establish_baseline=True, shuffle=True, seed=seed, + ) + if s.execution_order != original_order: + found_different = True + break + assert found_different, 'shuffle never produced a different order across 10 seeds' + + +def test_seed_recorded_in_artifact(): + seed = 99999 + with tempfile.TemporaryDirectory() as tmp: + summary = run_performance_matrix( + ROOT, artifact_root=Path(tmp) / 'perf', profile_ids=['http11_baseline'], + establish_baseline=True, shuffle=True, seed=seed, + ) + artifact = json.loads((Path(tmp) / 'perf' / 'summary.json').read_text(encoding='utf-8')) + assert artifact['shuffle']['seed'] == seed + assert artifact['shuffle']['enabled'] is True + assert artifact['shuffle']['execution_order'] == [summary.profiles[0].profile_id] + + +def test_no_shuffle_omits_metadata(): + with tempfile.TemporaryDirectory() as tmp: + summary = run_performance_matrix( + ROOT, artifact_root=Path(tmp) / 'perf', profile_ids=['http11_baseline'], + establish_baseline=True, shuffle=False, + ) + assert summary.shuffle_seed is None + assert summary.execution_order is None + artifact = json.loads((Path(tmp) / 'perf' / 'summary.json').read_text(encoding='utf-8')) + assert 'shuffle' not in artifact diff --git a/tools/run_perf_matrix.py b/tools/run_perf_matrix.py index 0e4dedd0..a321e9ce 100644 --- a/tools/run_perf_matrix.py +++ b/tools/run_perf_matrix.py @@ -33,6 +33,8 @@ def build_parser() -> argparse.ArgumentParser: parser.add_argument('--list-profiles', action='store_true', help='List profile ids and exit.') parser.add_argument('--list-lanes', action='store_true', help='List matrix lanes and their profile ids, then exit.') parser.add_argument('--validate', action='store_true', help='Validate an existing artifact root instead of running benchmarks.') + parser.add_argument('--shuffle', action='store_true', help='Randomize profile execution order.') + parser.add_argument('--seed', type=int, default=None, help='Random seed for reproducible shuffle (implies --shuffle).') return parser @@ -71,11 +73,13 @@ def main(argv: list[str] | None = None) -> int: baseline_root=None if ns.establish_baseline else ns.baseline_root, profile_ids=ns.profiles, establish_baseline=ns.establish_baseline, + shuffle=ns.shuffle or ns.seed is not None, + seed=ns.seed, ) lane_counts: dict[str, int] = {} for profile in matrix.profiles: lane_counts[profile.lane] = lane_counts.get(profile.lane, 0) + 1 - print(json.dumps({ + output = { 'matrix_name': summary.matrix_name, 'artifact_root': summary.artifact_root, 'baseline_root': summary.baseline_root, @@ -83,7 +87,11 @@ def main(argv: list[str] | None = None) -> int: 'failed': summary.failed, 'total': summary.total, 'lane_counts': lane_counts, - }, indent=2, sort_keys=True)) + } + if summary.shuffle_seed is not None: + output['shuffle_seed'] = summary.shuffle_seed + output['execution_order'] = summary.execution_order + print(json.dumps(output, indent=2, sort_keys=True)) return 0 if summary.failed == 0 else 1 From bcc4b6b5af80039a0bf77d8749b1418228a11725 Mon Sep 17 00:00:00 2001 From: MichaelDecent Date: Wed, 1 Apr 2026 17:45:01 +0100 Subject: [PATCH 2/2] feat: add GitHub Actions workflow for benchmarking - Introduced a new workflow to run benchmarks on pull requests. - Added a script to format benchmark results into a GitHub PR comment. - Updated .gitignore to exclude benchmark artifacts directory. --- .github/workflows/benchmark-pr.yml | 77 +++++++++++++++++++ .gitignore | 1 + tools/format_benchmark_comment.py | 119 +++++++++++++++++++++++++++++ 3 files changed, 197 insertions(+) create mode 100644 .github/workflows/benchmark-pr.yml create mode 100644 tools/format_benchmark_comment.py diff --git a/.github/workflows/benchmark-pr.yml b/.github/workflows/benchmark-pr.yml new file mode 100644 index 00000000..1845bce1 --- /dev/null +++ b/.github/workflows/benchmark-pr.yml @@ -0,0 +1,77 @@ +name: benchmark-pr + +on: + pull_request: + types: [opened, synchronize] + +permissions: + pull-requests: write + contents: read + +jobs: + benchmark: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install uv + run: | + curl -LsSf https://astral.sh/uv/install.sh | sh + echo "$HOME/.local/bin" >> "$GITHUB_PATH" + + - name: Install dependencies + run: uv pip install --system -e ".[certification,dev]" + + - name: Run benchmarks + id: bench + run: | + mkdir -p .perf-artifacts + set +e + PYTHONPATH=src python tools/run_perf_matrix.py \ + --establish-baseline \ + --shuffle \ + --artifact-root .perf-artifacts \ + > .perf-artifacts/runner_output.json 2>&1 + EXIT_CODE=$? + set -e + echo "exit_code=$EXIT_CODE" >> "$GITHUB_OUTPUT" + + - name: Format benchmark comment + run: | + PYTHONPATH=src python tools/format_benchmark_comment.py \ + .perf-artifacts/summary.json > .perf-artifacts/comment.md + + - name: Post or update PR comment + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + COMMENT_TAG="" + BODY=$(cat .perf-artifacts/comment.md) + FULL_BODY="${COMMENT_TAG} + ${BODY}" + + EXISTING=$(gh api "repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/comments" \ + --jq '[.[] | select(.body | startswith("")) | .id][0]' 2>/dev/null || true) + + if [ -n "$EXISTING" ] && [ "$EXISTING" != "null" ]; then + gh api "repos/${{ github.repository }}/issues/comments/${EXISTING}" \ + -X PATCH -f body="$FULL_BODY" + else + gh api "repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/comments" \ + -f body="$FULL_BODY" + fi + + - name: Upload benchmark artifacts + uses: actions/upload-artifact@v4 + if: always() + with: + name: benchmark-artifacts-${{ github.event.pull_request.number }} + path: .perf-artifacts/ + + - name: Fail if benchmarks failed + if: steps.bench.outputs.exit_code != '0' + run: exit 1 diff --git a/.gitignore b/.gitignore index 1389d657..186463f5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.pyc /src/tigrcorn.egg-info +.perf-artifacts/ diff --git a/tools/format_benchmark_comment.py b/tools/format_benchmark_comment.py new file mode 100644 index 00000000..57e2e454 --- /dev/null +++ b/tools/format_benchmark_comment.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 +"""Format benchmark artifacts into a GitHub PR comment (markdown).""" +from __future__ import annotations + +import json +import sys +from pathlib import Path + + +def load_json(path: Path) -> dict: + return json.loads(path.read_text(encoding='utf-8')) + + +def fmt_throughput(value: float) -> str: + return f'{value:,.2f}' + + +def fmt_latency(value: float) -> str: + return f'{value:.3f}' + + +def fmt_error_rate(value: float) -> str: + return f'{value:.3f}' + + +def format_comment(summary: dict, artifact_root: Path) -> str: + total = summary.get('total', 0) + passed = summary.get('passed', 0) + failed = summary.get('failed', 0) + commit = summary.get('commit_hash', 'unknown') + platform = summary.get('certification_platform', 'unknown') + shuffle = summary.get('shuffle', {}) + seed = shuffle.get('seed') if shuffle else None + + lines: list[str] = [] + lines.append('## Benchmark Results\n') + + status_icon = ':white_check_mark:' if failed == 0 else ':x:' + lines.append(f'**Status:** {status_icon} {passed}/{total} passed, {failed} failed | **Commit:** `{commit[:8]}`') + meta = f'**Platform:** {platform}' + if seed is not None: + meta += f' | **Shuffle seed:** {seed}' + lines.append(meta) + lines.append('') + + # Collect per-profile data grouped by lane + profiles_by_lane: dict[str, list[dict]] = {} + failures: list[dict] = [] + + for entry in summary.get('profiles', []): + pid = entry['profile_id'] + profile_summary_path = artifact_root / pid / 'summary.json' + if profile_summary_path.exists(): + profile_data = load_json(profile_summary_path) + else: + profile_data = {'lane': 'unknown', 'metrics': {}, 'passed': entry.get('passed', False)} + + profile_data['_profile_id'] = pid + profile_data['_failure_reasons'] = entry.get('failure_reasons', []) + profile_data['_passed'] = entry.get('passed', True) + + lane = profile_data.get('lane', 'unknown') + profiles_by_lane.setdefault(lane, []).append(profile_data) + + if not entry.get('passed', True): + failures.append(profile_data) + + # Failures section + if failures: + lines.append('### Failures\n') + lines.append('| Profile | Reasons |') + lines.append('|---------|---------|') + for f in failures: + reasons = '; '.join(f['_failure_reasons']) if f['_failure_reasons'] else 'unknown' + lines.append(f'| {f["_profile_id"]} | {reasons} |') + lines.append('') + + # Results by lane + lines.append('### Results\n') + + for lane, profiles in sorted(profiles_by_lane.items()): + count = len(profiles) + lines.append(f'
{lane} ({count} profiles)\n') + lines.append('| Profile | Status | Throughput (ops/s) | p99 (ms) | p99.9 (ms) | Error Rate |') + lines.append('|---------|--------|--------------------|----------|------------|------------|') + + for p in profiles: + pid = p['_profile_id'] + icon = ':white_check_mark:' if p['_passed'] else ':x:' + m = p.get('metrics', {}) + throughput = fmt_throughput(m.get('throughput_ops_per_sec', 0)) + p99 = fmt_latency(m.get('p99_ms', 0)) + p99_9 = fmt_latency(m.get('p99_9_ms', 0)) + err = fmt_error_rate(m.get('error_rate', 0)) + lines.append(f'| {pid} | {icon} | {throughput} | {p99} | {p99_9} | {err} |') + + lines.append('\n
\n') + + return '\n'.join(lines) + + +def main() -> int: + if len(sys.argv) < 2: + print('Usage: format_benchmark_comment.py ', file=sys.stderr) + return 1 + + summary_path = Path(sys.argv[1]) + if not summary_path.exists(): + print(f'Error: {summary_path} not found', file=sys.stderr) + return 1 + + artifact_root = summary_path.parent + summary = load_json(summary_path) + print(format_comment(summary, artifact_root)) + return 0 + + +if __name__ == '__main__': + raise SystemExit(main())