Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
name: "Benchmark"

on:
push:
branches: [main]
pull_request:
branches: [main]

jobs:
benchmark:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
with:
ref: ${{ github.event.workflow_run.head_branch }}
token: ${{ github.actor == 'dependabot[bot]' && secrets.REPO_AND_READ_PACKAGES_PAT || secrets.XML_SUBMODULE_PAT }}
submodules: "recursive"

- name: Set up Python 3.14
uses: actions/setup-python@v6
with:
python-version: "3.14"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[sqlmodels,ahbicht,benchmark]"

- name: Download baseline (if PR)
id: download-baseline
if: github.event_name == 'pull_request'
uses: dawidd6/action-download-artifact@v6
with:
name: benchmark-baseline
branch: main
path: .benchmarks/baseline
if_no_artifact_found: warn

- name: Run benchmarks
run: |
pytest benchmarks/ --benchmark-only --benchmark-json=benchmark-results.json

- name: Compare against baseline (if PR and baseline exists)
if: github.event_name == 'pull_request'
run: |
if [ -f ".benchmarks/baseline/benchmark-results.json" ]; then
python scripts/compare_benchmarks.py .benchmarks/baseline/benchmark-results.json benchmark-results.json
else
echo "No baseline found. Skipping comparison (first run or expired artifact)."
fi

- name: Save baseline (if main)
if: github.ref == 'refs/heads/main'
uses: actions/upload-artifact@v4
with:
name: benchmark-baseline
path: benchmark-results.json
retention-days: 90
1 change: 1 addition & 0 deletions benchmarks/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Benchmark tests for database query performance
15 changes: 15 additions & 0 deletions benchmarks/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Re-export fixtures from unittests for use in benchmarks
# The database creation happens in the fixture (module-scoped), not during benchmark timing.
# Only the SELECT queries inside benchmark() are timed.

from unittests.conftest import (
is_private_submodule_checked_out,
session_fv2410_fv2504_with_diff_view,
session_fv2510_fv2604_mscons_with_diff_view,
)

__all__ = [
"is_private_submodule_checked_out",
"session_fv2410_fv2504_with_diff_view",
"session_fv2510_fv2604_mscons_with_diff_view",
]
58 changes: 58 additions & 0 deletions benchmarks/test_diff_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""
Benchmarks for v_ahb_diff query performance.

These benchmarks measure SELECT query time only - database creation happens
in the module-scoped fixtures and is not included in the timing.

Run with: pytest benchmarks/ --benchmark-only
Save results: pytest benchmarks/ --benchmark-only --benchmark-json=results.json
"""

import pytest
from sqlmodel import Session, text

# FV2410 -> FV2504 benchmarks with representative pruefidentifikators
PRUEFIS_FV2410_FV2504 = ["55109", "13002", "25001"]


@pytest.mark.benchmark(group="v_ahb_diff_fv2410_fv2504")
@pytest.mark.parametrize("pruefi", PRUEFIS_FV2410_FV2504)
def test_diff_query_fv2410_fv2504(benchmark, session_fv2410_fv2504_with_diff_view: Session, pruefi: str) -> None:
"""Benchmark diff query: FV2410 -> FV2504 with same pruefidentifikator."""
query = text(
"""
SELECT * FROM v_ahb_diff
WHERE new_pruefidentifikator = :pruefi
AND old_pruefidentifikator = :pruefi
AND new_format_version = 'FV2504'
AND old_format_version = 'FV2410'
ORDER BY sort_path ASC
"""
)

def run_query():
return list(session_fv2410_fv2504_with_diff_view.execute(query, {"pruefi": pruefi}))

result = benchmark.pedantic(run_query, iterations=10, rounds=1)
assert len(result) > 0, f"Query returned no results for pruefi {pruefi}"


@pytest.mark.benchmark(group="v_ahb_diff_fv2510_fv2604")
def test_diff_query_fv2510_fv2604_13009(benchmark, session_fv2510_fv2604_mscons_with_diff_view: Session) -> None:
"""Benchmark diff query: FV2510 -> FV2604, pruefidentifikator 13009."""
query = text(
"""
SELECT * FROM v_ahb_diff
WHERE new_pruefidentifikator = '13009'
AND old_pruefidentifikator = '13009'
AND new_format_version = 'FV2510'
AND old_format_version = 'FV2604'
ORDER BY sort_path ASC
"""
)

def run_query():
return list(session_fv2510_fv2604_mscons_with_diff_view.execute(query, {"pruefi": "13009"}))

result = benchmark.pedantic(run_query, iterations=10, rounds=1)
assert len(result) > 0, "Query returned no results for pruefi 13009"
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ tests = [
"pytest==9.0.2",
"syrupy==5.0.0"
]
benchmark = [
"pytest==9.0.2",
"pytest-benchmark==5.1.0"
]
type_check = [
"mypy==1.19.1"
]
Expand Down
86 changes: 86 additions & 0 deletions scripts/compare_benchmarks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/usr/bin/env python3
"""Compare benchmark results and report regressions."""
import argparse
import json
import sys
from pathlib import Path

REGRESSION_THRESHOLD = 0.20 # 20% slower = regression


def load_benchmarks(path: Path) -> dict[str, float]:
"""Load benchmark results and return dict of test name -> median time."""
with open(path) as f:
data = json.load(f)
return {b["name"]: b["stats"]["median"] for b in data["benchmarks"]}


def main() -> int:
parser = argparse.ArgumentParser(description="Compare benchmark results and report regressions.")
parser.add_argument("baseline", type=Path, help="Path to baseline benchmark JSON file")
parser.add_argument("current", type=Path, help="Path to current benchmark JSON file")
args = parser.parse_args()

baseline_path: Path = args.baseline
current_path: Path = args.current

if not baseline_path.exists():
print("No baseline found, skipping comparison.")
return 0

try:
baseline = load_benchmarks(baseline_path)
except (json.JSONDecodeError, KeyError) as e:
print(f"Error reading baseline: {e}")
return 0

try:
current = load_benchmarks(current_path)
except (json.JSONDecodeError, KeyError) as e:
print(f"Error reading current results: {e}")
return 1

regressions: list[tuple[str, float]] = []
print("\n## Benchmark Comparison\n")
print("| Test | Baseline | Current | Change |")
print("|------|----------|---------|--------|")

for name in sorted(set(baseline.keys()) | set(current.keys())):
if name not in current:
print(f"| {name} | {baseline[name]*1000:.1f}ms | - | REMOVED |")
continue

current_time = current[name]

if name not in baseline:
print(f"| {name} | - | {current_time*1000:.1f}ms | NEW |")
continue

baseline_time = baseline[name]
change = (current_time - baseline_time) / baseline_time

if change > REGRESSION_THRESHOLD:
status = "🔴"
elif change < -0.05:
status = "🟢"
else:
status = ""

print(f"| {name} | {baseline_time*1000:.1f}ms | {current_time*1000:.1f}ms | {change:+.1%} {status} |")

if change > REGRESSION_THRESHOLD:
regressions.append((name, change))

print()
if regressions:
print(f"❌ {len(regressions)} regression(s) detected (>{REGRESSION_THRESHOLD:.0%} slower)")
for name, change in regressions:
print(f" - {name}: {change:+.1%}")
return 1
else:
print("✅ No regressions detected")
return 0


if __name__ == "__main__":
sys.exit(main())
2 changes: 1 addition & 1 deletion xml-migs-and-ahbs
Loading