From 9d6ff1fa80cb9cdf771d073935ad5e1384e442f3 Mon Sep 17 00:00:00 2001 From: Konstantin Date: Tue, 16 Dec 2025 14:10:00 +0100 Subject: [PATCH 1/4] feat(benchmark): add database query benchmark infrastructure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add pytest-benchmark based benchmarking for v_ahb_diff query performance: - benchmarks/test_diff_query.py: 4 benchmark tests - 3x FV2410 -> FV2504 with pruefis 55109, 13002, 25001 - 1x FV2510 -> FV2604 with pruefi 13009 - scripts/compare_benchmarks.py: CI comparison script - Compares against baseline, reports regressions (>20% slower) - Outputs markdown table with timing changes - .github/workflows/benchmark.yml: CI workflow - Saves baseline on main branch pushes - Compares PRs against baseline (graceful if missing) - pyproject.toml: add [benchmark] optional dependency Only SELECT query time is measured - database creation happens in module-scoped fixtures and is excluded from timing. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .github/workflows/benchmark.yml | 58 +++++++++++++++++++++++ benchmarks/__init__.py | 1 + benchmarks/conftest.py | 15 ++++++ benchmarks/test_diff_query.py | 58 +++++++++++++++++++++++ pyproject.toml | 4 ++ scripts/compare_benchmarks.py | 84 +++++++++++++++++++++++++++++++++ 6 files changed, 220 insertions(+) create mode 100644 .github/workflows/benchmark.yml create mode 100644 benchmarks/__init__.py create mode 100644 benchmarks/conftest.py create mode 100644 benchmarks/test_diff_query.py create mode 100644 scripts/compare_benchmarks.py diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 0000000..a9ec53e --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,58 @@ +name: "Benchmark" + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + benchmark: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + with: + ref: ${{ github.event.workflow_run.head_branch }} + token: ${{ github.actor == 'dependabot[bot]' && secrets.REPO_AND_READ_PACKAGES_PAT || secrets.XML_SUBMODULE_PAT }} + submodules: "recursive" + + - name: Set up Python 3.12 + uses: actions/setup-python@v6 + with: + python-version: "3.12" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e ".[sqlmodels,ahbicht,benchmark]" + + - name: Download baseline (if PR) + id: download-baseline + if: github.event_name == 'pull_request' + uses: dawidd6/action-download-artifact@v6 + with: + name: benchmark-baseline + branch: main + path: .benchmarks/baseline + if_no_artifact_found: warn + + - name: Run benchmarks + run: | + pytest benchmarks/ --benchmark-only --benchmark-json=benchmark-results.json + + - name: Compare against baseline (if PR and baseline exists) + if: github.event_name == 'pull_request' + run: | + if [ -f ".benchmarks/baseline/benchmark-results.json" ]; then + python scripts/compare_benchmarks.py .benchmarks/baseline/benchmark-results.json benchmark-results.json + else + echo "No baseline found. Skipping comparison (first run or expired artifact)." + fi + + - name: Save baseline (if main) + if: github.ref == 'refs/heads/main' + uses: actions/upload-artifact@v4 + with: + name: benchmark-baseline + path: benchmark-results.json + retention-days: 90 diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 0000000..2c17b5f --- /dev/null +++ b/benchmarks/__init__.py @@ -0,0 +1 @@ +# Benchmark tests for database query performance diff --git a/benchmarks/conftest.py b/benchmarks/conftest.py new file mode 100644 index 0000000..7a25972 --- /dev/null +++ b/benchmarks/conftest.py @@ -0,0 +1,15 @@ +# Re-export fixtures from unittests for use in benchmarks +# The database creation happens in the fixture (module-scoped), not during benchmark timing. +# Only the SELECT queries inside benchmark() are timed. + +from unittests.conftest import ( + is_private_submodule_checked_out, + session_fv2410_fv2504_with_diff_view, + session_fv2510_fv2604_mscons_with_diff_view, +) + +__all__ = [ + "is_private_submodule_checked_out", + "session_fv2410_fv2504_with_diff_view", + "session_fv2510_fv2604_mscons_with_diff_view", +] diff --git a/benchmarks/test_diff_query.py b/benchmarks/test_diff_query.py new file mode 100644 index 0000000..93b0cf3 --- /dev/null +++ b/benchmarks/test_diff_query.py @@ -0,0 +1,58 @@ +""" +Benchmarks for v_ahb_diff query performance. + +These benchmarks measure SELECT query time only - database creation happens +in the module-scoped fixtures and is not included in the timing. + +Run with: pytest benchmarks/ --benchmark-only +Save results: pytest benchmarks/ --benchmark-only --benchmark-json=results.json +""" + +import pytest +from sqlmodel import Session, text + +# FV2410 -> FV2504 benchmarks with representative pruefidentifikators +PRUEFIS_FV2410_FV2504 = ["55109", "13002", "25001"] + + +@pytest.mark.benchmark(group="v_ahb_diff_fv2410_fv2504") +@pytest.mark.parametrize("pruefi", PRUEFIS_FV2410_FV2504) +def test_diff_query_fv2410_fv2504( + benchmark, session_fv2410_fv2504_with_diff_view: Session, pruefi: str +) -> None: + """Benchmark diff query: FV2410 -> FV2504 with same pruefidentifikator.""" + query = text(""" + SELECT * FROM v_ahb_diff + WHERE new_pruefidentifikator = :pruefi + AND old_pruefidentifikator = :pruefi + AND new_format_version = 'FV2504' + AND old_format_version = 'FV2410' + ORDER BY sort_path ASC + """) + + def run_query(): + return list(session_fv2410_fv2504_with_diff_view.execute(query, {"pruefi": pruefi})) + + result = benchmark.pedantic(run_query, iterations=10, rounds=1) + assert len(result) > 0, f"Query returned no results for pruefi {pruefi}" + + +@pytest.mark.benchmark(group="v_ahb_diff_fv2510_fv2604") +def test_diff_query_fv2510_fv2604_13009( + benchmark, session_fv2510_fv2604_mscons_with_diff_view: Session +) -> None: + """Benchmark diff query: FV2510 -> FV2604, pruefidentifikator 13009.""" + query = text(""" + SELECT * FROM v_ahb_diff + WHERE new_pruefidentifikator = '13009' + AND old_pruefidentifikator = '13009' + AND new_format_version = 'FV2510' + AND old_format_version = 'FV2604' + ORDER BY sort_path ASC + """) + + def run_query(): + return list(session_fv2510_fv2604_mscons_with_diff_view.execute(query, {"pruefi": "13009"})) + + result = benchmark.pedantic(run_query, iterations=10, rounds=1) + assert len(result) > 0, "Query returned no results for pruefi 13009" diff --git a/pyproject.toml b/pyproject.toml index f2df18c..68dcdf4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,10 @@ tests = [ "pytest==9.0.2", "syrupy==5.0.0" ] +benchmark = [ + "pytest==9.0.2", + "pytest-benchmark==5.1.0" +] type_check = [ "mypy==1.19.1" ] diff --git a/scripts/compare_benchmarks.py b/scripts/compare_benchmarks.py new file mode 100644 index 0000000..da891ec --- /dev/null +++ b/scripts/compare_benchmarks.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 +"""Compare benchmark results and report regressions.""" +import json +import sys +from pathlib import Path + +REGRESSION_THRESHOLD = 0.20 # 20% slower = regression + + +def load_benchmarks(path: Path) -> dict[str, float]: + """Load benchmark results and return dict of test name -> median time.""" + with open(path) as f: + data = json.load(f) + return {b["name"]: b["stats"]["median"] for b in data["benchmarks"]} + + +def main() -> int: + if len(sys.argv) != 3: + print("Usage: compare_benchmarks.py ") + return 1 + + baseline_path = Path(sys.argv[1]) + current_path = Path(sys.argv[2]) + + if not baseline_path.exists(): + print("No baseline found, skipping comparison.") + return 0 + + try: + baseline = load_benchmarks(baseline_path) + except (json.JSONDecodeError, KeyError) as e: + print(f"Error reading baseline: {e}") + return 0 + + try: + current = load_benchmarks(current_path) + except (json.JSONDecodeError, KeyError) as e: + print(f"Error reading current results: {e}") + return 1 + + regressions: list[tuple[str, float]] = [] + print("\n## Benchmark Comparison\n") + print("| Test | Baseline | Current | Change |") + print("|------|----------|---------|--------|") + + for name in sorted(set(baseline.keys()) | set(current.keys())): + if name not in current: + print(f"| {name} | {baseline[name]*1000:.1f}ms | - | REMOVED |") + continue + + current_time = current[name] + + if name not in baseline: + print(f"| {name} | - | {current_time*1000:.1f}ms | NEW |") + continue + + baseline_time = baseline[name] + change = (current_time - baseline_time) / baseline_time + + if change > REGRESSION_THRESHOLD: + status = "🔴" + elif change < -0.05: + status = "🟢" + else: + status = "" + + print(f"| {name} | {baseline_time*1000:.1f}ms | {current_time*1000:.1f}ms | {change:+.1%} {status} |") + + if change > REGRESSION_THRESHOLD: + regressions.append((name, change)) + + print() + if regressions: + print(f"❌ {len(regressions)} regression(s) detected (>{REGRESSION_THRESHOLD:.0%} slower)") + for name, change in regressions: + print(f" - {name}: {change:+.1%}") + return 1 + else: + print("✅ No regressions detected") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From 30419b00054d94239a09ec1f6bcd32295f08c572 Mon Sep 17 00:00:00 2001 From: Konstantin Date: Tue, 16 Dec 2025 14:28:24 +0100 Subject: [PATCH 2/4] black --- benchmarks/test_diff_query.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/benchmarks/test_diff_query.py b/benchmarks/test_diff_query.py index 93b0cf3..ce36851 100644 --- a/benchmarks/test_diff_query.py +++ b/benchmarks/test_diff_query.py @@ -17,18 +17,18 @@ @pytest.mark.benchmark(group="v_ahb_diff_fv2410_fv2504") @pytest.mark.parametrize("pruefi", PRUEFIS_FV2410_FV2504) -def test_diff_query_fv2410_fv2504( - benchmark, session_fv2410_fv2504_with_diff_view: Session, pruefi: str -) -> None: +def test_diff_query_fv2410_fv2504(benchmark, session_fv2410_fv2504_with_diff_view: Session, pruefi: str) -> None: """Benchmark diff query: FV2410 -> FV2504 with same pruefidentifikator.""" - query = text(""" + query = text( + """ SELECT * FROM v_ahb_diff WHERE new_pruefidentifikator = :pruefi AND old_pruefidentifikator = :pruefi AND new_format_version = 'FV2504' AND old_format_version = 'FV2410' ORDER BY sort_path ASC - """) + """ + ) def run_query(): return list(session_fv2410_fv2504_with_diff_view.execute(query, {"pruefi": pruefi})) @@ -38,18 +38,18 @@ def run_query(): @pytest.mark.benchmark(group="v_ahb_diff_fv2510_fv2604") -def test_diff_query_fv2510_fv2604_13009( - benchmark, session_fv2510_fv2604_mscons_with_diff_view: Session -) -> None: +def test_diff_query_fv2510_fv2604_13009(benchmark, session_fv2510_fv2604_mscons_with_diff_view: Session) -> None: """Benchmark diff query: FV2510 -> FV2604, pruefidentifikator 13009.""" - query = text(""" + query = text( + """ SELECT * FROM v_ahb_diff WHERE new_pruefidentifikator = '13009' AND old_pruefidentifikator = '13009' AND new_format_version = 'FV2510' AND old_format_version = 'FV2604' ORDER BY sort_path ASC - """) + """ + ) def run_query(): return list(session_fv2510_fv2604_mscons_with_diff_view.execute(query, {"pruefi": "13009"})) From 930be0ef30c6d0f5c18267627203a08e124382fc Mon Sep 17 00:00:00 2001 From: Konstantin Date: Tue, 16 Dec 2025 14:28:50 +0100 Subject: [PATCH 3/4] 314 --- .github/workflows/benchmark.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index a9ec53e..dc46bb7 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -16,10 +16,10 @@ jobs: token: ${{ github.actor == 'dependabot[bot]' && secrets.REPO_AND_READ_PACKAGES_PAT || secrets.XML_SUBMODULE_PAT }} submodules: "recursive" - - name: Set up Python 3.12 + - name: Set up Python 3.14 uses: actions/setup-python@v6 with: - python-version: "3.12" + python-version: "3.14" - name: Install dependencies run: | From 884fbf85ae0347832193a7e88ecd0e9f1f5f31d5 Mon Sep 17 00:00:00 2001 From: Konstantin Date: Tue, 16 Dec 2025 14:29:00 +0100 Subject: [PATCH 4/4] fix(benchmark): address PR review comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use Python 3.14 in benchmark workflow - Use argparse instead of manual sys.argv handling 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- scripts/compare_benchmarks.py | 12 +++++++----- xml-migs-and-ahbs | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/scripts/compare_benchmarks.py b/scripts/compare_benchmarks.py index da891ec..8c090c5 100644 --- a/scripts/compare_benchmarks.py +++ b/scripts/compare_benchmarks.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 """Compare benchmark results and report regressions.""" +import argparse import json import sys from pathlib import Path @@ -15,12 +16,13 @@ def load_benchmarks(path: Path) -> dict[str, float]: def main() -> int: - if len(sys.argv) != 3: - print("Usage: compare_benchmarks.py ") - return 1 + parser = argparse.ArgumentParser(description="Compare benchmark results and report regressions.") + parser.add_argument("baseline", type=Path, help="Path to baseline benchmark JSON file") + parser.add_argument("current", type=Path, help="Path to current benchmark JSON file") + args = parser.parse_args() - baseline_path = Path(sys.argv[1]) - current_path = Path(sys.argv[2]) + baseline_path: Path = args.baseline + current_path: Path = args.current if not baseline_path.exists(): print("No baseline found, skipping comparison.") diff --git a/xml-migs-and-ahbs b/xml-migs-and-ahbs index 9355743..c5d2687 160000 --- a/xml-migs-and-ahbs +++ b/xml-migs-and-ahbs @@ -1 +1 @@ -Subproject commit 93557438865a2846fed2b054bd59ad4b3f37a02f +Subproject commit c5d26876f1657b305995d0cb6c0d32a3f46b2abb