diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py index d87050dc..c2cc3000 100644 --- a/benchmarks/__init__.py +++ b/benchmarks/__init__.py @@ -34,8 +34,11 @@ from benchmarks.snapshot import Metric # Importing the models / patterns packages triggers each module's -# ``register(...)`` / ``register_pattern(...)`` call at import time. -from benchmarks import bench, models, patterns # noqa: F401, E402 +# ``register(...)`` / ``register_pattern(...)`` call at import time. ``bench`` +# (analytics) is intentionally NOT eagerly imported — that keeps ``import +# benchmarks`` / the CodSpeed CI baseline free of the local-only layer; +# ``from benchmarks import bench`` still works (submodule import on demand). +from benchmarks import models, patterns # noqa: F401, E402 def load_long_df( diff --git a/benchmarks/_tests/test_memory_id_alignment.py b/benchmarks/_tests/test_memory_id_alignment.py deleted file mode 100644 index 5d2377c8..00000000 --- a/benchmarks/_tests/test_memory_id_alignment.py +++ /dev/null @@ -1,72 +0,0 @@ -""" -Guard test for the timing ↔ memory test-id seam. - -``memory.py`` hand-rolls f-strings to label each measurement with the -same node id pytest-benchmark produces (e.g. -``benchmarks/test_matrices.py::test_matrices[basic-n=10]``). If a -benchmark test function gets renamed and the matching f-string in -``memory.py`` isn't updated, ``plot`` would silently end up with -non-overlapping timing and memory sets — no error, just missing data. - -This test exercises both sides once and asserts every memory-emitted -id is present in pytest's collection. -""" - -from __future__ import annotations - -import re -import subprocess -import sys -from pathlib import Path - -from benchmarks.memory import MEMORY_PHASES, _measurements -from benchmarks.registry import REGISTRY - - -def _collect_benchmark_ids() -> set[str]: - """Return the set of node ids pytest collects under ``benchmarks/``.""" - repo_root = Path(__file__).resolve().parents[2] - result = subprocess.run( - [ - sys.executable, - "-m", - "pytest", - "benchmarks/", - "--collect-only", - "-q", - "--no-header", - "--co", - ], - capture_output=True, - text=True, - check=True, - cwd=repo_root, - ) - # pytest -q --co emits one node id per line; trailing summary lines - # like "N tests collected" can be ignored. - return { - line.strip() - for line in result.stdout.splitlines() - if re.match(r"^benchmarks/.*::.*\[.*\]$", line.strip()) - } - - -def test_memory_node_ids_match_pytest_collection() -> None: - collected = _collect_benchmark_ids() - assert collected, "pytest collected zero benchmark node ids — sanity broken" - - # ``basic`` at its smallest size is cheap and declares every default - # phase, so it exercises every node-id format ``_measurements`` emits. - spec = REGISTRY["basic"] - size = spec.sizes[0] - - mem_ids: set[str] = set() - for phase in MEMORY_PHASES: - for test_id, _ in _measurements(phase, spec, size): - mem_ids.add(test_id) - - missing = mem_ids - collected - assert not missing, ( - "memory.py emits node ids that pytest doesn't collect " - "(test rename drift?):\n" + "\n".join(f" {m}" for m in sorted(missing)) - ) diff --git a/benchmarks/cli/_base.py b/benchmarks/cli/_base.py index c0170777..730ca4a5 100644 --- a/benchmarks/cli/_base.py +++ b/benchmarks/cli/_base.py @@ -44,7 +44,7 @@ class Measure(StrEnum): ) PhaseName = Literal[ - "build", "matrices", "to_lp", "to_netcdf", "from_netcdf", "to_solver" + "build", "matrices", "to_lp", "to_netcdf", "from_netcdf", "to_solver", "pipeline" ] SpecKind = Literal["all", "models", "patterns"] @@ -56,6 +56,7 @@ class Measure(StrEnum): "to_netcdf": "benchmarks/test_netcdf.py::test_to_netcdf", "from_netcdf": "benchmarks/test_netcdf.py::test_from_netcdf", "to_solver": "benchmarks/test_to_solver.py", + "pipeline": "benchmarks/test_pipeline.py", } # pytest args that constitute a "smoke" run — quick sizes, no timings. diff --git a/benchmarks/cli/run.py b/benchmarks/cli/run.py index 6e42b990..f88b691c 100644 --- a/benchmarks/cli/run.py +++ b/benchmarks/cli/run.py @@ -163,6 +163,8 @@ def run( def _timing() -> None: args: list[str] = [] args.append(_PHASE_TEST_FILE[phase] if phase is not None else "benchmarks/") + if phase == "pipeline": + args.append("--pipeline") if quick: args.append("--quick") elif long: diff --git a/benchmarks/conftest.py b/benchmarks/conftest.py index eda9ed6c..2eb3be2a 100644 --- a/benchmarks/conftest.py +++ b/benchmarks/conftest.py @@ -2,10 +2,17 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import pytest from benchmarks.registry import BenchSpec, skip_reason +if TYPE_CHECKING: + from collections.abc import Callable + + from benchmarks.phases import PhaseCase + # Test modules the CodSpeed instruments measure (edit to change coverage). # build + the two export paths: to_lp (LP text) and to_solver (direct handoff, # which also exercises matrix-gen). matrices is dropped — a subset of to_solver; @@ -55,6 +62,16 @@ def pytest_addoption(parser: pytest.Parser) -> None: "--quick/--long for patterns, leaving models on the prevailing tier." ), ) + parser.addoption( + "--pipeline", + action="store_true", + default=False, + help=( + "Include the opt-in end-to-end pipeline benchmark (build → matrices " + "→ lp in one measured region). Off by default — it re-runs the " + "per-phase work and includes the build." + ), + ) def pytest_collection_modifyitems( @@ -63,6 +80,7 @@ def pytest_collection_modifyitems( """ ``--quick`` drops the PyPSA end-to-end test (~30s; minutes under cachegrind). ``--codspeed`` narrows the run to ``CODSPEED_MODULES`` (drops netcdf/matrices). + ``test_pipeline`` (end-to-end) is opt-in — deselected unless ``--pipeline``. """ if config.getoption("--quick"): skip = pytest.mark.skip(reason="--quick: pypsa end-to-end skipped") @@ -70,6 +88,12 @@ def pytest_collection_modifyitems( if "test_pypsa_carbon_management" in item.nodeid: item.add_marker(skip) + if not config.getoption("--pipeline"): + dropped = [i for i in items if i.path.stem == "test_pipeline"] + if dropped: + config.hook.pytest_deselected(items=dropped) + items[:] = [i for i in items if i.path.stem != "test_pipeline"] + if getattr(config.option, "codspeed", False): deselected = [i for i in items if i.path.stem not in CODSPEED_MODULES] if deselected: @@ -105,3 +129,22 @@ def maybe_skip(request: pytest.FixtureRequest, spec: BenchSpec, size: int) -> No ) if reason: pytest.skip(reason) + + +def run_case( + benchmark: Callable[..., object], + case: PhaseCase, + request: pytest.FixtureRequest, +) -> None: + """ + Shared pytest-benchmark driver body for one :class:`PhaseCase`. + + Honours the case's own ``skip`` (e.g. solver not installed) and the size + tiers (via :func:`maybe_skip`), then runs the case's measured action under + ``benchmark`` inside the case's setup/teardown context. + """ + if case.skip: + pytest.skip(case.skip) + maybe_skip(request, case.spec, case.value) + with case.run() as action: + benchmark(action) diff --git a/benchmarks/memory.py b/benchmarks/memory.py index 0f03de53..e22dfcc8 100644 --- a/benchmarks/memory.py +++ b/benchmarks/memory.py @@ -15,8 +15,8 @@ The per-phase peaks are *marginal* (each tracker sees only its own phase's allocations), so the end-to-end OOM ceiling can't be recovered from them: the opt-in ``pipeline`` phase (``--phase pipeline``) instead measures -build → matrices → to_lp under one tracker, keyed by a bare -``pipeline[-=]`` id. +build → matrices → to_lp under one tracker, keyed by the same node id as the +timing pipeline test (``test_pipeline.py::test_pipeline[...]``). """ from __future__ import annotations @@ -28,11 +28,11 @@ import subprocess import sys import tempfile -from collections.abc import Callable, Iterator +from collections.abc import Callable from pathlib import Path from typing import TYPE_CHECKING -from benchmarks.snapshot import spec_param_id, write_memory_snapshot +from benchmarks.snapshot import write_memory_snapshot if TYPE_CHECKING: from benchmarks.registry import BenchSpec @@ -73,28 +73,6 @@ def _require_memray() -> None: ALL_MEMORY_PHASES: tuple[str, ...] = (*MEMORY_PHASES, "pipeline") -def _phase_tag(phase: str) -> str: - """Map a phase name to the registry phase tag used by ``spec.applies_to``.""" - from benchmarks.registry import ( - BUILD, - FROM_NETCDF, - MATRICES, - TO_HIGHSPY, - TO_LP, - TO_NETCDF, - ) - - return { - "build": BUILD, - "matrices": MATRICES, - "to_lp": TO_LP, - "to_netcdf": TO_NETCDF, - "from_netcdf": FROM_NETCDF, - "to_solver": TO_HIGHSPY, # we always measure the highs handoff - "pipeline": BUILD, - }[phase] - - def measure_peak(action: Callable[[], object], repeats: int = 1) -> float: """ Run ``action()`` under ``memray.Tracker`` and return peak MiB. @@ -138,116 +116,14 @@ def measure_peak(action: Callable[[], object], repeats: int = 1) -> float: _measure_peak = measure_peak -def _measurements( - phase: str, spec: BenchSpec, size: int -) -> Iterator[tuple[str, Callable[[], object]]]: - """ - Yield ``(test_id, action)`` pairs for one ``(phase, spec, size)``. - - ``action`` is a zero-arg callable; the caller runs it inside a tracker. - For non-build phases, the model is built once up front (outside the - tracker) and the action closes over it so only the phase work is - counted. ``size`` is the swept value along ``spec.axis`` (model size or - pattern severity); the test ids match the shared phase drivers either way. - """ - name = spec.name - axis = spec.axis - - if phase == "build": - yield ( - f"benchmarks/test_build.py::test_build[{spec_param_id(name, axis, size)}]", - lambda: spec.build(size), - ) - return - - if phase == "pipeline": - from benchmarks.phases import touch_matrices, write_lp - - tmpdir = tempfile.TemporaryDirectory() - lp_path = Path(tmpdir.name) / "m.lp" - - def run_pipeline() -> None: - built = spec.build(size) - touch_matrices(built) - write_lp(built, lp_path) - +def _deps_available(spec: BenchSpec) -> bool: + """True if every module in ``spec.requires`` imports (e.g. pypsa).""" + for mod in spec.requires: try: - yield (f"pipeline[{spec_param_id(name, axis, size)}]", run_pipeline) - finally: - tmpdir.cleanup() - return - - m = spec.build(size) - - if phase == "matrices": - from benchmarks.phases import touch_matrices - - yield ( - f"benchmarks/test_matrices.py::test_matrices[{spec_param_id(name, axis, size)}]", - lambda: touch_matrices(m), - ) - - elif phase == "to_lp": - from benchmarks.phases import write_lp - - tmpdir = tempfile.TemporaryDirectory() - lp_path = Path(tmpdir.name) / "m.lp" - try: - yield ( - f"benchmarks/test_to_lp.py::test_to_lp[{spec_param_id(name, axis, size)}]", - lambda: write_lp(m, lp_path), - ) - finally: - tmpdir.cleanup() - - elif phase == "to_netcdf": - from benchmarks.phases import write_netcdf - - tmpdir = tempfile.TemporaryDirectory() - nc_path = Path(tmpdir.name) / "m.nc" - try: - yield ( - f"benchmarks/test_netcdf.py::test_to_netcdf[{spec_param_id(name, axis, size)}]", - lambda: write_netcdf(m, nc_path), - ) - finally: - tmpdir.cleanup() - - elif phase == "from_netcdf": - from benchmarks.phases import read_netcdf, write_netcdf - - tmpdir = tempfile.TemporaryDirectory() - nc_path = Path(tmpdir.name) / "m.nc" - write_netcdf(m, nc_path) # setup: written outside the tracker - try: - yield ( - f"benchmarks/test_netcdf.py::test_from_netcdf[{spec_param_id(name, axis, size)}]", - lambda: read_netcdf(nc_path), - ) - finally: - tmpdir.cleanup() - - elif phase == "to_solver": - from benchmarks.phases import SOLVER_HANDOFFS - - # Memory currently tracks only HiGHS — look it up by name so a - # reordering of SOLVER_HANDOFFS doesn't silently swap solvers. - # Older linopy releases without ``to_highspy`` skip the phase - # silently rather than emitting an id with no possible match. - highs = next((w for n, _, w in SOLVER_HANDOFFS if n == "highs"), None) - if highs is None: - return - - yield ( - ( - f"benchmarks/test_to_solver.py::test_to_solver" - f"[highs-{spec_param_id(name, axis, size)}]" - ), - lambda: highs(m), - ) - - else: - raise ValueError(f"unknown phase: {phase!r}") + __import__(mod) + except ImportError: + return False + return True def run_phase( @@ -260,67 +136,50 @@ def run_phase( severities: tuple[int, ...] = (), ) -> dict[str, float]: """ - Measure peak memory for every applicable ``(spec, size)`` under one phase. - - Returns a ``{test_id: peak_mib}`` mapping. Invoked once per phase as a - subprocess by :func:`measure` for isolation. ``repeats`` is forwarded to - :func:`measure_peak` so callers can dial up signal-to-noise. ``filter_expr`` - keeps only specs whose ``-=`` key contains it — e.g. - ``"nodal_balance"`` (one spec), ``"severity"`` (patterns), ``"n="`` (models). - Size selection (``quick`` / ``long`` / ``sizes`` / ``severities``) shares - :func:`benchmarks.registry.skip_reason` with pytest so the two never drift. + Measure peak memory for every applicable case under one phase. + + Returns a ``{test_id: peak_mib}`` mapping. The work, ids and size selection + come from :func:`benchmarks.phases.phase_cases` / ``skip_reason`` — the same + source the pytest drivers consume, so the two layers can't drift. Invoked + once per phase as a subprocess by :func:`measure` for isolation. + ``filter_expr`` keeps only cases whose id-suffix contains it (e.g. + ``"nodal_balance"``, ``"severity"``, ``"n="``); ``repeats`` is forwarded to + :func:`measure_peak`. """ _require_memray() - from benchmarks.registry import all_specs, skip_reason + from benchmarks.phases import PHASE_NODE, phase_cases + from benchmarks.registry import skip_reason - tag = _phase_tag(phase) + node = PHASE_NODE[phase] results: dict[str, float] = {} - for spec in all_specs(): - if not spec.applies_to(tag): + for case in phase_cases(phase): + if case.skip: + continue + if not _deps_available(case.spec): + continue + if skip_reason( + case.spec, + case.value, + quick=quick, + long=long, + sizes=sizes, + severities=severities, + ): + continue + if filter_expr and filter_expr not in case.id: continue - # Optional-dep gate (e.g. pypsa_scigrid needs pypsa). - for mod in spec.requires: - try: - __import__(mod) - except ImportError: - break - else: - for value in spec.sweep: - if skip_reason( - spec, - value, - quick=quick, - long=long, - sizes=sizes, - severities=severities, - ): - continue - key = spec_param_id(spec.name, spec.axis, value) - if filter_expr and filter_expr not in key: - continue - try: - for test_id, action in _measurements(phase, spec, value): - try: - results[test_id] = _measure_peak(action, repeats=repeats) - print( - f" {test_id} → {results[test_id]:.1f} MiB", - file=sys.stderr, - ) - except Exception as exc: # noqa: BLE001 - print( - f" skip {test_id}: {type(exc).__name__}: {exc}", - file=sys.stderr, - ) - except Exception as exc: # noqa: BLE001 - print( - f" setup failed {spec.name}/{value}: " - f"{type(exc).__name__}: {exc}", - file=sys.stderr, - ) - gc.collect() + test_id = f"{node}[{case.id}]" + try: + with case.run() as action: + peak = measure_peak(action, repeats=repeats) + results[test_id] = peak + print(f" {test_id} → {peak:.1f} MiB", file=sys.stderr) + except Exception as exc: # noqa: BLE001 + print(f" skip {test_id}: {type(exc).__name__}: {exc}", file=sys.stderr) + gc.collect() return results diff --git a/benchmarks/phases.py b/benchmarks/phases.py index 8a60fc81..6de89f55 100644 --- a/benchmarks/phases.py +++ b/benchmarks/phases.py @@ -14,13 +14,31 @@ from __future__ import annotations import inspect -from collections.abc import Callable +import tempfile +from collections.abc import Callable, Iterator +from contextlib import AbstractContextManager, contextmanager +from functools import partial from pathlib import Path +from typing import NamedTuple import linopy import linopy.io as lio -from benchmarks.registry import TO_GUROBIPY, TO_HIGHSPY, TO_MOSEK, TO_XPRESS +from benchmarks.registry import ( + BUILD, + FROM_NETCDF, + MATRICES, + TO_GUROBIPY, + TO_HIGHSPY, + TO_LP, + TO_MOSEK, + TO_NETCDF, + TO_XPRESS, + BenchSpec, + iter_params, + spec_param_id, +) from linopy import read_netcdf +from linopy.solvers import available_solvers # linopy <0.4.1's ``to_file`` doesn't accept ``progress``. Check once # at import so the benchmark loop stays branchless on the hot path. @@ -84,3 +102,121 @@ def write_netcdf(m: linopy.Model, path: Path) -> None: ) if wrapper is not None ) + + +Action = Callable[[], object] +CaseFactory = Callable[[], AbstractContextManager[Action]] + +PIPELINE = "pipeline" + +PHASE_NODE: dict[str, str] = { + BUILD: "benchmarks/test_build.py::test_build", + MATRICES: "benchmarks/test_matrices.py::test_matrices", + TO_LP: "benchmarks/test_to_lp.py::test_to_lp", + TO_NETCDF: "benchmarks/test_netcdf.py::test_to_netcdf", + FROM_NETCDF: "benchmarks/test_netcdf.py::test_from_netcdf", + "to_solver": "benchmarks/test_to_solver.py::test_to_solver", + PIPELINE: "benchmarks/test_pipeline.py::test_pipeline", +} + + +class PhaseCase(NamedTuple): + """One parametrization of a phase — what both drivers consume.""" + + spec: BenchSpec + value: int + id: str + run: CaseFactory + skip: str | None + + +@contextmanager +def _build_case(spec: BenchSpec, value: int) -> Iterator[Action]: + yield lambda: spec.build(value) + + +@contextmanager +def _matrices_case(spec: BenchSpec, value: int) -> Iterator[Action]: + m = spec.build(value) + yield lambda: touch_matrices(m) + + +@contextmanager +def _to_lp_case(spec: BenchSpec, value: int) -> Iterator[Action]: + m = spec.build(value) + with tempfile.TemporaryDirectory() as d: + path = Path(d) / "model.lp" + yield lambda: write_lp(m, path) + + +@contextmanager +def _to_netcdf_case(spec: BenchSpec, value: int) -> Iterator[Action]: + m = spec.build(value) + with tempfile.TemporaryDirectory() as d: + path = Path(d) / "model.nc" + yield lambda: write_netcdf(m, path) + + +@contextmanager +def _from_netcdf_case(spec: BenchSpec, value: int) -> Iterator[Action]: + m = spec.build(value) + with tempfile.TemporaryDirectory() as d: + path = Path(d) / "model.nc" + write_netcdf(m, path) + yield lambda: read_netcdf(path) + + +@contextmanager +def _solver_case( + spec: BenchSpec, value: int, wrapper: Callable[[linopy.Model], object] +) -> Iterator[Action]: + m = spec.build(value) + yield lambda: wrapper(m) + + +@contextmanager +def _pipeline_case(spec: BenchSpec, value: int) -> Iterator[Action]: + with tempfile.TemporaryDirectory() as d: + path = Path(d) / "model.lp" + + def action() -> None: + m = spec.build(value) + touch_matrices(m) + write_lp(m, path) + + yield action + + +_PHASE_CASE: dict[str, tuple[str, Callable[[BenchSpec, int], AbstractContextManager[Action]]]] = { + BUILD: (BUILD, _build_case), + MATRICES: (MATRICES, _matrices_case), + TO_LP: (TO_LP, _to_lp_case), + TO_NETCDF: (TO_NETCDF, _to_netcdf_case), + FROM_NETCDF: (FROM_NETCDF, _from_netcdf_case), + PIPELINE: (TO_LP, _pipeline_case), +} + + +def phase_cases(phase: str) -> Iterator[PhaseCase]: + """ + Yield every ``(spec, value)`` parametrization of one phase as a runnable + case — the single source of truth for "what runs + its id", shared by the + pytest drivers and the memray engine. + + ``to_solver`` expands to one case per available solver (the solver in the + id-suffix); every other phase yields one case per applicable ``(spec, + value)``. ``skip`` is set for solvers that aren't installed. + """ + if phase == "to_solver": + for name, tag, wrapper in SOLVER_HANDOFFS: + skip = None if name in available_solvers else f"{name} not installed" + for spec, value in iter_params(tag): + sfx = f"{name}-{spec_param_id(spec.name, spec.axis, value)}" + run = partial(_solver_case, spec, value, wrapper) + yield PhaseCase(spec, value, sfx, run, skip) + return + + tag, case = _PHASE_CASE[phase] + for spec, value in iter_params(tag): + sfx = spec_param_id(spec.name, spec.axis, value) + yield PhaseCase(spec, value, sfx, partial(case, spec, value), None) diff --git a/benchmarks/registry.py b/benchmarks/registry.py index a1ad3d41..87d5c5fe 100644 --- a/benchmarks/registry.py +++ b/benchmarks/registry.py @@ -244,9 +244,18 @@ def iter_params( ] -def param_ids(params: list[tuple[BenchSpec, int]]) -> list[str]: - from benchmarks.snapshot import spec_param_id +def spec_param_id(name: str, axis: str, value: object) -> str: + """ + The ``-=`` fragment that fills a test id's ``[...]``. + + Single source of truth for the parametrize-id shape — pytest param ids + (:func:`param_ids`), the memory grid's test ids, and the solver-handoff ids + all build on it; :func:`benchmarks.snapshot.parse_test_id` reads it back. + """ + return f"{name}-{axis}={value}" + +def param_ids(params: list[tuple[BenchSpec, int]]) -> list[str]: return [spec_param_id(spec.name, spec.axis, value) for spec, value in params] diff --git a/benchmarks/snapshot.py b/benchmarks/snapshot.py index 193c780e..163722fe 100644 --- a/benchmarks/snapshot.py +++ b/benchmarks/snapshot.py @@ -52,18 +52,6 @@ def parse_test_id(test_id: str) -> tuple[str, str, int | None, str]: return "other", "other", None, "other" -def spec_param_id(name: str, axis: str, value: object) -> str: - """ - The ``-=`` fragment that fills a test id's ``[...]``. - - The single source of truth for the parametrize-id shape — pytest param ids - (:func:`benchmarks.registry.param_ids`), the memory grid's test ids, and - the solver-handoff ids all build on it, and :func:`parse_test_id` reads it - back. Keep it in lock-step with ``_SIZE_RE``. - """ - return f"{name}-{axis}={value}" - - def synth_test_id( label: str, *, @@ -84,6 +72,8 @@ def synth_test_id( — still fine for ``compare``). A partial spec is ambiguous and rejected. """ if spec is not None and size is not None and phase is not None: + from benchmarks.registry import spec_param_id + return f"bench::{phase}[{spec_param_id(spec, axis, size)}]" if spec is not None or size is not None or phase is not None: raise ValueError( diff --git a/benchmarks/test_build.py b/benchmarks/test_build.py index 5bb3430b..119def51 100644 --- a/benchmarks/test_build.py +++ b/benchmarks/test_build.py @@ -6,18 +6,17 @@ import pytest -from benchmarks.conftest import maybe_skip -from benchmarks.registry import BUILD, ModelSpec, iter_params, param_ids +from benchmarks.conftest import run_case +from benchmarks.phases import PhaseCase, phase_cases +from benchmarks.registry import BUILD -_PARAMS = iter_params(BUILD) +_CASES = list(phase_cases(BUILD)) -@pytest.mark.parametrize("spec,size", _PARAMS, ids=param_ids(_PARAMS)) +@pytest.mark.parametrize("case", _CASES, ids=[c.id for c in _CASES]) def test_build( benchmark: Callable[..., object], - spec: ModelSpec, - size: int, + case: PhaseCase, request: pytest.FixtureRequest, ) -> None: - maybe_skip(request, spec, size) - benchmark(spec.build, size) + run_case(benchmark, case, request) diff --git a/benchmarks/test_matrices.py b/benchmarks/test_matrices.py index f985aec3..d97b5230 100644 --- a/benchmarks/test_matrices.py +++ b/benchmarks/test_matrices.py @@ -6,20 +6,17 @@ import pytest -from benchmarks.conftest import maybe_skip -from benchmarks.phases import touch_matrices -from benchmarks.registry import MATRICES, ModelSpec, iter_params, param_ids +from benchmarks.conftest import run_case +from benchmarks.phases import PhaseCase, phase_cases +from benchmarks.registry import MATRICES -_PARAMS = iter_params(MATRICES) +_CASES = list(phase_cases(MATRICES)) -@pytest.mark.parametrize("spec,size", _PARAMS, ids=param_ids(_PARAMS)) +@pytest.mark.parametrize("case", _CASES, ids=[c.id for c in _CASES]) def test_matrices( benchmark: Callable[..., object], - spec: ModelSpec, - size: int, + case: PhaseCase, request: pytest.FixtureRequest, ) -> None: - maybe_skip(request, spec, size) - m = spec.build(size) - benchmark(touch_matrices, m) + run_case(benchmark, case, request) diff --git a/benchmarks/test_netcdf.py b/benchmarks/test_netcdf.py index 072ba22e..ce48df7d 100644 --- a/benchmarks/test_netcdf.py +++ b/benchmarks/test_netcdf.py @@ -9,48 +9,30 @@ from __future__ import annotations from collections.abc import Callable -from pathlib import Path import pytest -from benchmarks.conftest import maybe_skip -from benchmarks.phases import read_netcdf, write_netcdf -from benchmarks.registry import ( - FROM_NETCDF, - TO_NETCDF, - ModelSpec, - iter_params, - param_ids, -) +from benchmarks.conftest import run_case +from benchmarks.phases import PhaseCase, phase_cases +from benchmarks.registry import FROM_NETCDF, TO_NETCDF -_WRITE_PARAMS = iter_params(TO_NETCDF) -_READ_PARAMS = iter_params(FROM_NETCDF) +_WRITE_CASES = list(phase_cases(TO_NETCDF)) +_READ_CASES = list(phase_cases(FROM_NETCDF)) -@pytest.mark.parametrize("spec,size", _WRITE_PARAMS, ids=param_ids(_WRITE_PARAMS)) +@pytest.mark.parametrize("case", _WRITE_CASES, ids=[c.id for c in _WRITE_CASES]) def test_to_netcdf( benchmark: Callable[..., object], - spec: ModelSpec, - size: int, + case: PhaseCase, request: pytest.FixtureRequest, - tmp_path: Path, ) -> None: - maybe_skip(request, spec, size) - m = spec.build(size) - out = tmp_path / "model.nc" - benchmark(write_netcdf, m, out) + run_case(benchmark, case, request) -@pytest.mark.parametrize("spec,size", _READ_PARAMS, ids=param_ids(_READ_PARAMS)) +@pytest.mark.parametrize("case", _READ_CASES, ids=[c.id for c in _READ_CASES]) def test_from_netcdf( benchmark: Callable[..., object], - spec: ModelSpec, - size: int, + case: PhaseCase, request: pytest.FixtureRequest, - tmp_path: Path, ) -> None: - maybe_skip(request, spec, size) - m = spec.build(size) - out = tmp_path / "model.nc" - write_netcdf(m, out) - benchmark(read_netcdf, out) + run_case(benchmark, case, request) diff --git a/benchmarks/test_pipeline.py b/benchmarks/test_pipeline.py new file mode 100644 index 00000000..77a3bc2b --- /dev/null +++ b/benchmarks/test_pipeline.py @@ -0,0 +1,29 @@ +""" +End-to-end pipeline benchmark: build → matrices → LP write in one region. + +Opt-in (deselected unless ``--pipeline``): it re-runs the per-phase work and, +unlike the individual phase benchmarks, *includes the model build* — so it +captures the end-to-end cost/peak a real build-then-export session hits, which +can't be recovered by summing the marginal per-phase numbers. The memory side +measures the same thing via ``... --metric memory --phase pipeline``. +""" + +from __future__ import annotations + +from collections.abc import Callable + +import pytest + +from benchmarks.conftest import run_case +from benchmarks.phases import PIPELINE, PhaseCase, phase_cases + +_CASES = list(phase_cases(PIPELINE)) + + +@pytest.mark.parametrize("case", _CASES, ids=[c.id for c in _CASES]) +def test_pipeline( + benchmark: Callable[..., object], + case: PhaseCase, + request: pytest.FixtureRequest, +) -> None: + run_case(benchmark, case, request) diff --git a/benchmarks/test_to_lp.py b/benchmarks/test_to_lp.py index de05e5b1..5adfe686 100644 --- a/benchmarks/test_to_lp.py +++ b/benchmarks/test_to_lp.py @@ -3,26 +3,20 @@ from __future__ import annotations from collections.abc import Callable -from pathlib import Path import pytest -from benchmarks.conftest import maybe_skip -from benchmarks.phases import write_lp -from benchmarks.registry import TO_LP, ModelSpec, iter_params, param_ids +from benchmarks.conftest import run_case +from benchmarks.phases import PhaseCase, phase_cases +from benchmarks.registry import TO_LP -_PARAMS = iter_params(TO_LP) +_CASES = list(phase_cases(TO_LP)) -@pytest.mark.parametrize("spec,size", _PARAMS, ids=param_ids(_PARAMS)) +@pytest.mark.parametrize("case", _CASES, ids=[c.id for c in _CASES]) def test_to_lp( benchmark: Callable[..., object], - spec: ModelSpec, - size: int, + case: PhaseCase, request: pytest.FixtureRequest, - tmp_path: Path, ) -> None: - maybe_skip(request, spec, size) - m = spec.build(size) - lp_file = tmp_path / "model.lp" - benchmark(write_lp, m, lp_file) + run_case(benchmark, case, request) diff --git a/benchmarks/test_to_solver.py b/benchmarks/test_to_solver.py index edc852e7..17da275a 100644 --- a/benchmarks/test_to_solver.py +++ b/benchmarks/test_to_solver.py @@ -18,40 +18,16 @@ import pytest -from benchmarks.conftest import maybe_skip -from benchmarks.phases import SOLVER_HANDOFFS -from benchmarks.registry import ModelSpec, iter_params -from benchmarks.snapshot import spec_param_id -from linopy.solvers import available_solvers - - -def _make_params() -> list[object]: - out: list[object] = [] - for solver_name, phase, wrapper in SOLVER_HANDOFFS: - for spec, size in iter_params(phase): - out.append( - pytest.param( - solver_name, - wrapper, - spec, - size, - id=f"{solver_name}-{spec_param_id(spec.name, spec.axis, size)}", - ) - ) - return out - - -@pytest.mark.parametrize("solver_name,wrapper,spec,size", _make_params()) +from benchmarks.conftest import run_case +from benchmarks.phases import PhaseCase, phase_cases + +_CASES = list(phase_cases("to_solver")) + + +@pytest.mark.parametrize("case", _CASES, ids=[c.id for c in _CASES]) def test_to_solver( benchmark: Callable[..., object], - solver_name: str, - wrapper: Callable[..., object], - spec: ModelSpec, - size: int, + case: PhaseCase, request: pytest.FixtureRequest, ) -> None: - if solver_name not in available_solvers: - pytest.skip(f"{solver_name} not installed") - maybe_skip(request, spec, size) - model = spec.build(size) - benchmark(wrapper, model) + run_case(benchmark, case, request)