diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 0000000..dc46bb7 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,58 @@ +name: "Benchmark" + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + benchmark: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + with: + ref: ${{ github.event.workflow_run.head_branch }} + token: ${{ github.actor == 'dependabot[bot]' && secrets.REPO_AND_READ_PACKAGES_PAT || secrets.XML_SUBMODULE_PAT }} + submodules: "recursive" + + - name: Set up Python 3.14 + uses: actions/setup-python@v6 + with: + python-version: "3.14" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e ".[sqlmodels,ahbicht,benchmark]" + + - name: Download baseline (if PR) + id: download-baseline + if: github.event_name == 'pull_request' + uses: dawidd6/action-download-artifact@v6 + with: + name: benchmark-baseline + branch: main + path: .benchmarks/baseline + if_no_artifact_found: warn + + - name: Run benchmarks + run: | + pytest benchmarks/ --benchmark-only --benchmark-json=benchmark-results.json + + - name: Compare against baseline (if PR and baseline exists) + if: github.event_name == 'pull_request' + run: | + if [ -f ".benchmarks/baseline/benchmark-results.json" ]; then + python scripts/compare_benchmarks.py .benchmarks/baseline/benchmark-results.json benchmark-results.json + else + echo "No baseline found. Skipping comparison (first run or expired artifact)." + fi + + - name: Save baseline (if main) + if: github.ref == 'refs/heads/main' + uses: actions/upload-artifact@v4 + with: + name: benchmark-baseline + path: benchmark-results.json + retention-days: 90 diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 0000000..2c17b5f --- /dev/null +++ b/benchmarks/__init__.py @@ -0,0 +1 @@ +# Benchmark tests for database query performance diff --git a/benchmarks/conftest.py b/benchmarks/conftest.py new file mode 100644 index 0000000..7a25972 --- /dev/null +++ b/benchmarks/conftest.py @@ -0,0 +1,15 @@ +# Re-export fixtures from unittests for use in benchmarks +# The database creation happens in the fixture (module-scoped), not during benchmark timing. +# Only the SELECT queries inside benchmark() are timed. + +from unittests.conftest import ( + is_private_submodule_checked_out, + session_fv2410_fv2504_with_diff_view, + session_fv2510_fv2604_mscons_with_diff_view, +) + +__all__ = [ + "is_private_submodule_checked_out", + "session_fv2410_fv2504_with_diff_view", + "session_fv2510_fv2604_mscons_with_diff_view", +] diff --git a/benchmarks/test_diff_query.py b/benchmarks/test_diff_query.py new file mode 100644 index 0000000..ce36851 --- /dev/null +++ b/benchmarks/test_diff_query.py @@ -0,0 +1,58 @@ +""" +Benchmarks for v_ahb_diff query performance. + +These benchmarks measure SELECT query time only - database creation happens +in the module-scoped fixtures and is not included in the timing. + +Run with: pytest benchmarks/ --benchmark-only +Save results: pytest benchmarks/ --benchmark-only --benchmark-json=results.json +""" + +import pytest +from sqlmodel import Session, text + +# FV2410 -> FV2504 benchmarks with representative pruefidentifikators +PRUEFIS_FV2410_FV2504 = ["55109", "13002", "25001"] + + +@pytest.mark.benchmark(group="v_ahb_diff_fv2410_fv2504") +@pytest.mark.parametrize("pruefi", PRUEFIS_FV2410_FV2504) +def test_diff_query_fv2410_fv2504(benchmark, session_fv2410_fv2504_with_diff_view: Session, pruefi: str) -> None: + """Benchmark diff query: FV2410 -> FV2504 with same pruefidentifikator.""" + query = text( + """ + SELECT * FROM v_ahb_diff + WHERE new_pruefidentifikator = :pruefi + AND old_pruefidentifikator = :pruefi + AND new_format_version = 'FV2504' + AND old_format_version = 'FV2410' + ORDER BY sort_path ASC + """ + ) + + def run_query(): + return list(session_fv2410_fv2504_with_diff_view.execute(query, {"pruefi": pruefi})) + + result = benchmark.pedantic(run_query, iterations=10, rounds=1) + assert len(result) > 0, f"Query returned no results for pruefi {pruefi}" + + +@pytest.mark.benchmark(group="v_ahb_diff_fv2510_fv2604") +def test_diff_query_fv2510_fv2604_13009(benchmark, session_fv2510_fv2604_mscons_with_diff_view: Session) -> None: + """Benchmark diff query: FV2510 -> FV2604, pruefidentifikator 13009.""" + query = text( + """ + SELECT * FROM v_ahb_diff + WHERE new_pruefidentifikator = '13009' + AND old_pruefidentifikator = '13009' + AND new_format_version = 'FV2510' + AND old_format_version = 'FV2604' + ORDER BY sort_path ASC + """ + ) + + def run_query(): + return list(session_fv2510_fv2604_mscons_with_diff_view.execute(query, {"pruefi": "13009"})) + + result = benchmark.pedantic(run_query, iterations=10, rounds=1) + assert len(result) > 0, "Query returned no results for pruefi 13009" diff --git a/pyproject.toml b/pyproject.toml index f2df18c..68dcdf4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,10 @@ tests = [ "pytest==9.0.2", "syrupy==5.0.0" ] +benchmark = [ + "pytest==9.0.2", + "pytest-benchmark==5.1.0" +] type_check = [ "mypy==1.19.1" ] diff --git a/scripts/compare_benchmarks.py b/scripts/compare_benchmarks.py new file mode 100644 index 0000000..8c090c5 --- /dev/null +++ b/scripts/compare_benchmarks.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +"""Compare benchmark results and report regressions.""" +import argparse +import json +import sys +from pathlib import Path + +REGRESSION_THRESHOLD = 0.20 # 20% slower = regression + + +def load_benchmarks(path: Path) -> dict[str, float]: + """Load benchmark results and return dict of test name -> median time.""" + with open(path) as f: + data = json.load(f) + return {b["name"]: b["stats"]["median"] for b in data["benchmarks"]} + + +def main() -> int: + parser = argparse.ArgumentParser(description="Compare benchmark results and report regressions.") + parser.add_argument("baseline", type=Path, help="Path to baseline benchmark JSON file") + parser.add_argument("current", type=Path, help="Path to current benchmark JSON file") + args = parser.parse_args() + + baseline_path: Path = args.baseline + current_path: Path = args.current + + if not baseline_path.exists(): + print("No baseline found, skipping comparison.") + return 0 + + try: + baseline = load_benchmarks(baseline_path) + except (json.JSONDecodeError, KeyError) as e: + print(f"Error reading baseline: {e}") + return 0 + + try: + current = load_benchmarks(current_path) + except (json.JSONDecodeError, KeyError) as e: + print(f"Error reading current results: {e}") + return 1 + + regressions: list[tuple[str, float]] = [] + print("\n## Benchmark Comparison\n") + print("| Test | Baseline | Current | Change |") + print("|------|----------|---------|--------|") + + for name in sorted(set(baseline.keys()) | set(current.keys())): + if name not in current: + print(f"| {name} | {baseline[name]*1000:.1f}ms | - | REMOVED |") + continue + + current_time = current[name] + + if name not in baseline: + print(f"| {name} | - | {current_time*1000:.1f}ms | NEW |") + continue + + baseline_time = baseline[name] + change = (current_time - baseline_time) / baseline_time + + if change > REGRESSION_THRESHOLD: + status = "🔴" + elif change < -0.05: + status = "🟢" + else: + status = "" + + print(f"| {name} | {baseline_time*1000:.1f}ms | {current_time*1000:.1f}ms | {change:+.1%} {status} |") + + if change > REGRESSION_THRESHOLD: + regressions.append((name, change)) + + print() + if regressions: + print(f"❌ {len(regressions)} regression(s) detected (>{REGRESSION_THRESHOLD:.0%} slower)") + for name, change in regressions: + print(f" - {name}: {change:+.1%}") + return 1 + else: + print("✅ No regressions detected") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/xml-migs-and-ahbs b/xml-migs-and-ahbs index 9355743..c5d2687 160000 --- a/xml-migs-and-ahbs +++ b/xml-migs-and-ahbs @@ -1 +1 @@ -Subproject commit 93557438865a2846fed2b054bd59ad4b3f37a02f +Subproject commit c5d26876f1657b305995d0cb6c0d32a3f46b2abb