Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions .github/workflows/benchmark-pr.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
name: benchmark-pr

on:
pull_request:
types: [opened, synchronize]

permissions:
pull-requests: write
contents: read

jobs:
benchmark:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Install uv
run: |
curl -LsSf https://astral.sh/uv/install.sh | sh
echo "$HOME/.local/bin" >> "$GITHUB_PATH"

- name: Install dependencies
run: uv pip install --system -e ".[certification,dev]"

- name: Run benchmarks
id: bench
run: |
mkdir -p .perf-artifacts
set +e
PYTHONPATH=src python tools/run_perf_matrix.py \
--establish-baseline \
--shuffle \
--artifact-root .perf-artifacts \
> .perf-artifacts/runner_output.json 2>&1
EXIT_CODE=$?
set -e
echo "exit_code=$EXIT_CODE" >> "$GITHUB_OUTPUT"

- name: Format benchmark comment
run: |
PYTHONPATH=src python tools/format_benchmark_comment.py \
.perf-artifacts/summary.json > .perf-artifacts/comment.md

- name: Post or update PR comment
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
COMMENT_TAG="<!-- benchmark-results -->"
BODY=$(cat .perf-artifacts/comment.md)
FULL_BODY="${COMMENT_TAG}
${BODY}"

EXISTING=$(gh api "repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/comments" \
--jq '[.[] | select(.body | startswith("<!-- benchmark-results -->")) | .id][0]' 2>/dev/null || true)

if [ -n "$EXISTING" ] && [ "$EXISTING" != "null" ]; then
gh api "repos/${{ github.repository }}/issues/comments/${EXISTING}" \
-X PATCH -f body="$FULL_BODY"
else
gh api "repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/comments" \
-f body="$FULL_BODY"
fi

- name: Upload benchmark artifacts
uses: actions/upload-artifact@v4
if: always()
with:
name: benchmark-artifacts-${{ github.event.pull_request.number }}
path: .perf-artifacts/

- name: Fail if benchmarks failed
if: steps.bench.outputs.exit_code != '0'
run: exit 1
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
*.pyc
/src/tigrcorn.egg-info
.perf-artifacts/
23 changes: 21 additions & 2 deletions src/tigrcorn/compat/perf_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import json
import os
import platform
import random
import subprocess
import sys
import time
Expand Down Expand Up @@ -67,6 +68,8 @@ class PerfRunSummary:
passed: int
failed: int
profiles: list[PerfProfileResult]
shuffle_seed: int | None = None
execution_order: list[str] | None = None


class PerfRunnerError(RuntimeError):
Expand Down Expand Up @@ -115,12 +118,19 @@ def run_performance_matrix(
baseline_root: str | Path | None = None,
profile_ids: list[str] | None = None,
establish_baseline: bool = False,
shuffle: bool = False,
seed: int | None = None,
) -> PerfRunSummary:
source_root = Path(source_root)
matrix_file = source_root / (Path(matrix_path) if matrix_path is not None else DEFAULT_PERFORMANCE_MATRIX_PATH)
matrix = load_performance_matrix(matrix_file)
selected_ids = set(profile_ids or [profile.profile_id for profile in matrix.profiles])
selected_profiles = [profile for profile in matrix.profiles if profile.profile_id in selected_ids]
effective_seed: int | None = None
if shuffle:
effective_seed = seed if seed is not None else random.randint(0, 2**32 - 1)
rng = random.Random(effective_seed)
rng.shuffle(selected_profiles)
if not selected_profiles:
raise PerfRunnerError('no performance profiles selected')

Expand Down Expand Up @@ -199,8 +209,15 @@ def run_performance_matrix(
passed=sum(1 for result in results if result.passed),
failed=sum(1 for result in results if not result.passed),
profiles=results,
shuffle_seed=effective_seed if shuffle else None,
execution_order=[p.profile_id for p in selected_profiles] if shuffle else None,
)
_write_run_summary(artifact_root, summary, environment, profiles=selected_profiles)
shuffle_meta = {
'enabled': True,
'seed': effective_seed,
'execution_order': [p.profile_id for p in selected_profiles],
} if shuffle else None
_write_run_summary(artifact_root, summary, environment, profiles=selected_profiles, shuffle_metadata=shuffle_meta)
return summary


Expand Down Expand Up @@ -600,7 +617,7 @@ def _write_samples_csv(path: Path, samples: list[Any]) -> None:
path.write_text('\n'.join(lines) + '\n', encoding='utf-8')


def _write_run_summary(artifact_root: Path, summary: PerfRunSummary, environment: Mapping[str, Any], *, profiles: list[PerfProfile]) -> None:
def _write_run_summary(artifact_root: Path, summary: PerfRunSummary, environment: Mapping[str, Any], *, profiles: list[PerfProfile], shuffle_metadata: dict[str, Any] | None = None) -> None:
lane_counts: dict[str, int] = {}
for profile in profiles:
lane_counts[profile.lane] = lane_counts.get(profile.lane, 0) + 1
Expand All @@ -625,6 +642,8 @@ def _write_run_summary(artifact_root: Path, summary: PerfRunSummary, environment
],
'generated_at_epoch': environment.get('generated_at_epoch'),
}
if shuffle_metadata is not None:
payload['shuffle'] = shuffle_metadata
(artifact_root / 'summary.json').write_text(json.dumps(_jsonable(payload), indent=2, sort_keys=True) + '\n', encoding='utf-8')
(artifact_root / 'index.json').write_text(json.dumps(_jsonable(payload), indent=2, sort_keys=True) + '\n', encoding='utf-8')

Expand Down
70 changes: 70 additions & 0 deletions tests/perf/test_shuffle_order.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from __future__ import annotations

import json
import tempfile
from pathlib import Path

from tigrcorn.compat.perf_runner import load_performance_matrix, run_performance_matrix

ROOT = Path(__file__).resolve().parents[2]
MATRIX_PATH = 'docs/review/performance/performance_matrix.json'


def _default_profile_order(profile_ids: list[str]) -> list[str]:
matrix = load_performance_matrix(ROOT / MATRIX_PATH)
return [p.profile_id for p in matrix.profiles if p.profile_id in set(profile_ids)]


def test_same_seed_produces_identical_order():
profile_ids = ['http11_baseline', 'http11_keepalive', 'ws_http11', 'tls_handshake']
with tempfile.TemporaryDirectory() as tmp1, tempfile.TemporaryDirectory() as tmp2:
s1 = run_performance_matrix(
ROOT, artifact_root=Path(tmp1) / 'perf', profile_ids=profile_ids,
establish_baseline=True, shuffle=True, seed=12345,
)
s2 = run_performance_matrix(
ROOT, artifact_root=Path(tmp2) / 'perf', profile_ids=profile_ids,
establish_baseline=True, shuffle=True, seed=12345,
)
assert s1.execution_order == s2.execution_order


def test_shuffle_changes_order():
profile_ids = ['http11_baseline', 'http11_keepalive', 'ws_http11', 'tls_handshake']
original_order = _default_profile_order(profile_ids)
found_different = False
for seed in range(10):
with tempfile.TemporaryDirectory() as tmp:
s = run_performance_matrix(
ROOT, artifact_root=Path(tmp) / 'perf', profile_ids=profile_ids,
establish_baseline=True, shuffle=True, seed=seed,
)
if s.execution_order != original_order:
found_different = True
break
assert found_different, 'shuffle never produced a different order across 10 seeds'


def test_seed_recorded_in_artifact():
seed = 99999
with tempfile.TemporaryDirectory() as tmp:
summary = run_performance_matrix(
ROOT, artifact_root=Path(tmp) / 'perf', profile_ids=['http11_baseline'],
establish_baseline=True, shuffle=True, seed=seed,
)
artifact = json.loads((Path(tmp) / 'perf' / 'summary.json').read_text(encoding='utf-8'))
assert artifact['shuffle']['seed'] == seed
assert artifact['shuffle']['enabled'] is True
assert artifact['shuffle']['execution_order'] == [summary.profiles[0].profile_id]


def test_no_shuffle_omits_metadata():
with tempfile.TemporaryDirectory() as tmp:
summary = run_performance_matrix(
ROOT, artifact_root=Path(tmp) / 'perf', profile_ids=['http11_baseline'],
establish_baseline=True, shuffle=False,
)
assert summary.shuffle_seed is None
assert summary.execution_order is None
artifact = json.loads((Path(tmp) / 'perf' / 'summary.json').read_text(encoding='utf-8'))
assert 'shuffle' not in artifact
119 changes: 119 additions & 0 deletions tools/format_benchmark_comment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#!/usr/bin/env python3
"""Format benchmark artifacts into a GitHub PR comment (markdown)."""
from __future__ import annotations

import json
import sys
from pathlib import Path


def load_json(path: Path) -> dict:
return json.loads(path.read_text(encoding='utf-8'))


def fmt_throughput(value: float) -> str:
return f'{value:,.2f}'


def fmt_latency(value: float) -> str:
return f'{value:.3f}'


def fmt_error_rate(value: float) -> str:
return f'{value:.3f}'


def format_comment(summary: dict, artifact_root: Path) -> str:
total = summary.get('total', 0)
passed = summary.get('passed', 0)
failed = summary.get('failed', 0)
commit = summary.get('commit_hash', 'unknown')
platform = summary.get('certification_platform', 'unknown')
shuffle = summary.get('shuffle', {})
seed = shuffle.get('seed') if shuffle else None

lines: list[str] = []
lines.append('## Benchmark Results\n')

status_icon = ':white_check_mark:' if failed == 0 else ':x:'
lines.append(f'**Status:** {status_icon} {passed}/{total} passed, {failed} failed | **Commit:** `{commit[:8]}`')
meta = f'**Platform:** {platform}'
if seed is not None:
meta += f' | **Shuffle seed:** {seed}'
lines.append(meta)
lines.append('')

# Collect per-profile data grouped by lane
profiles_by_lane: dict[str, list[dict]] = {}
failures: list[dict] = []

for entry in summary.get('profiles', []):
pid = entry['profile_id']
profile_summary_path = artifact_root / pid / 'summary.json'
if profile_summary_path.exists():
profile_data = load_json(profile_summary_path)
else:
profile_data = {'lane': 'unknown', 'metrics': {}, 'passed': entry.get('passed', False)}

profile_data['_profile_id'] = pid
profile_data['_failure_reasons'] = entry.get('failure_reasons', [])
profile_data['_passed'] = entry.get('passed', True)

lane = profile_data.get('lane', 'unknown')
profiles_by_lane.setdefault(lane, []).append(profile_data)

if not entry.get('passed', True):
failures.append(profile_data)

# Failures section
if failures:
lines.append('### Failures\n')
lines.append('| Profile | Reasons |')
lines.append('|---------|---------|')
for f in failures:
reasons = '; '.join(f['_failure_reasons']) if f['_failure_reasons'] else 'unknown'
lines.append(f'| {f["_profile_id"]} | {reasons} |')
lines.append('')

# Results by lane
lines.append('### Results\n')

for lane, profiles in sorted(profiles_by_lane.items()):
count = len(profiles)
lines.append(f'<details><summary>{lane} ({count} profiles)</summary>\n')
lines.append('| Profile | Status | Throughput (ops/s) | p99 (ms) | p99.9 (ms) | Error Rate |')
lines.append('|---------|--------|--------------------|----------|------------|------------|')

for p in profiles:
pid = p['_profile_id']
icon = ':white_check_mark:' if p['_passed'] else ':x:'
m = p.get('metrics', {})
throughput = fmt_throughput(m.get('throughput_ops_per_sec', 0))
p99 = fmt_latency(m.get('p99_ms', 0))
p99_9 = fmt_latency(m.get('p99_9_ms', 0))
err = fmt_error_rate(m.get('error_rate', 0))
lines.append(f'| {pid} | {icon} | {throughput} | {p99} | {p99_9} | {err} |')

lines.append('\n</details>\n')

return '\n'.join(lines)


def main() -> int:
if len(sys.argv) < 2:
print('Usage: format_benchmark_comment.py <summary.json>', file=sys.stderr)
return 1

summary_path = Path(sys.argv[1])
if not summary_path.exists():
print(f'Error: {summary_path} not found', file=sys.stderr)
return 1

artifact_root = summary_path.parent
summary = load_json(summary_path)
print(format_comment(summary, artifact_root))
return 0


if __name__ == '__main__':
raise SystemExit(main())
12 changes: 10 additions & 2 deletions tools/run_perf_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ def build_parser() -> argparse.ArgumentParser:
parser.add_argument('--list-profiles', action='store_true', help='List profile ids and exit.')
parser.add_argument('--list-lanes', action='store_true', help='List matrix lanes and their profile ids, then exit.')
parser.add_argument('--validate', action='store_true', help='Validate an existing artifact root instead of running benchmarks.')
parser.add_argument('--shuffle', action='store_true', help='Randomize profile execution order.')
parser.add_argument('--seed', type=int, default=None, help='Random seed for reproducible shuffle (implies --shuffle).')
return parser


Expand Down Expand Up @@ -71,19 +73,25 @@ def main(argv: list[str] | None = None) -> int:
baseline_root=None if ns.establish_baseline else ns.baseline_root,
profile_ids=ns.profiles,
establish_baseline=ns.establish_baseline,
shuffle=ns.shuffle or ns.seed is not None,
seed=ns.seed,
)
lane_counts: dict[str, int] = {}
for profile in matrix.profiles:
lane_counts[profile.lane] = lane_counts.get(profile.lane, 0) + 1
print(json.dumps({
output = {
'matrix_name': summary.matrix_name,
'artifact_root': summary.artifact_root,
'baseline_root': summary.baseline_root,
'passed': summary.passed,
'failed': summary.failed,
'total': summary.total,
'lane_counts': lane_counts,
}, indent=2, sort_keys=True))
}
if summary.shuffle_seed is not None:
output['shuffle_seed'] = summary.shuffle_seed
output['execution_order'] = summary.execution_order
print(json.dumps(output, indent=2, sort_keys=True))
return 0 if summary.failed == 0 else 1


Expand Down