diff --git a/.github/dependabot.yml b/.github/dependabot.yml index f8f779b5..e7750e98 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -12,3 +12,30 @@ updates: github-actions: patterns: - '*' + +# Pinned ``[benchmarks]`` extra in pyproject.toml. One PR per dep bump +# → CodSpeed CI runs and attributes any perf delta to that specific +# bump. Keeps the cross-version ``sweep`` baseline (lockfile-pinned) +# stable while still surfacing upstream perf changes per-PR with +# eyes-open review. Loose ``[project.dependencies]`` (numpy, scipy, ...) +# have no version specifier so Dependabot leaves them alone — only the +# ``==`` pins in ``[benchmarks]`` produce PRs. +- package-ecosystem: pip + directory: / + schedule: + interval: monthly + open-pull-requests-limit: 5 + groups: + # Measurement scaffolding + CLI/notebook tooling. Perf-irrelevant — + # they don't move CodSpeed signal, so batching into one PR cuts + # review noise. Perf-relevant deps (numpy, xarray, highspy, …) stay + # un-grouped so each gets its own attributed CodSpeed delta. + benchmark-tooling: + patterns: + - pytest + - pytest-benchmark + - pytest-memray + - pytest-codspeed + - nbconvert + - typer + - plotly diff --git a/.github/workflows/benchmark-smoke.yml b/.github/workflows/benchmark-smoke.yml new file mode 100644 index 00000000..689eaf35 --- /dev/null +++ b/.github/workflows/benchmark-smoke.yml @@ -0,0 +1,39 @@ +name: Benchmark smoke + +# Builds every spec and fires every phase once (--benchmark-disable): +# a "did a refactor break a spec?" check, not timing. + +on: + push: + branches: [ master ] + pull_request: + branches: [ '*' ] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + smoke: + name: Benchmark smoke (quick) + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 # setuptools_scm + + - name: Set up Python 3.12 + uses: actions/setup-python@v6 + with: + python-version: "3.12" + + - name: Install package and benchmark dependencies + run: | + python -m pip install uv + uv pip install --system -e ".[dev,benchmarks]" + + - name: Run benchmark smoke + # Every spec builds at one size and every phase fires once, no timings. + run: | + pytest benchmarks/ --benchmark-disable -q diff --git a/.github/workflows/codspeed-macro.yml b/.github/workflows/codspeed-macro.yml new file mode 100644 index 00000000..2d6ae3dd --- /dev/null +++ b/.github/workflows/codspeed-macro.yml @@ -0,0 +1,62 @@ +name: CodSpeed (walltime macro) + +# Wall-clock benchmarks on CodSpeed's dedicated bare-metal macro runners — the +# mode that reflects the real cost of dense-vs-sparse work (cache, allocation, +# native numpy/scipy), which instruction counting under-weights. +# +# Master push (updates the walltime baseline) + manual dispatch + opt-in per-PR +# via the ``trigger:benchmark`` label. Off every *unlabelled* PR: macro-runner +# minutes are metered (600/month free), and self-hosted bare-metal shouldn't run +# arbitrary PR code — the label is a maintainer-controlled gate, so only apply it +# to trusted (same-repo) PRs. +# +# Requires the repo under a GitHub org (macro runners are org-only) with the +# CodSpeed app connected to the repo (OIDC auth — no token secret needed). + +on: + push: + branches: [ master ] + pull_request: + types: [ labeled, synchronize ] + branches: [ master ] + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + macro: + name: CodSpeed walltime (macro runner) + # Always on master push / dispatch; on PRs only when explicitly labelled. + if: >- + ${{ github.event_name != 'pull_request' || + contains(github.event.pull_request.labels.*.name, 'trigger:benchmark') }} + runs-on: codspeed-macro + # Non-gating until the CodSpeed app is connected to the repo (OIDC auth). + continue-on-error: true + permissions: + contents: read # actions/checkout + id-token: write # OIDC auth with CodSpeed — no token secret + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 # setuptools_scm + + - name: Set up Python 3.12 + uses: actions/setup-python@v6 + with: + python-version: "3.12" + + - name: Install pinned benchmark environment + # Pinned ``[benchmarks]`` extra so Dependabot bumps → one CodSpeed delta each. + run: | + python -m pip install uv + uv pip install --system -e ".[dev,benchmarks]" + + - name: Run benchmarks under CodSpeed (walltime) + uses: CodSpeedHQ/action@v4 + with: + mode: walltime + run: | + pytest benchmarks/ --codspeed diff --git a/.github/workflows/codspeed-memory.yml b/.github/workflows/codspeed-memory.yml new file mode 100644 index 00000000..25df1b33 --- /dev/null +++ b/.github/workflows/codspeed-memory.yml @@ -0,0 +1,48 @@ +name: CodSpeed (memory) + +# Heap-allocation tracking — the always-on signal for this sparsity/memory fork. +# Fast (~2 min) and free on a GitHub runner, so it runs on master (baseline) and +# every PR. A solo instrument on ubuntu: its one upload per (commit, env) never +# clashes with the walltime run, which is a separate bare-metal environment. + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + memory: + name: CodSpeed memory + runs-on: ubuntu-latest + # Non-gating: informational, never blocks a merge. + continue-on-error: true + permissions: + contents: read # actions/checkout + id-token: write # OIDC auth with CodSpeed — no token secret + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 # setuptools_scm + + - name: Set up Python 3.12 + uses: actions/setup-python@v6 + with: + python-version: "3.12" + + - name: Install pinned benchmark environment + run: | + python -m pip install uv + uv pip install --system -e ".[dev,benchmarks]" + + - name: Run benchmarks under CodSpeed (memory) + uses: CodSpeedHQ/action@v4 + with: + mode: memory + run: | + pytest benchmarks/ --codspeed diff --git a/.gitignore b/.gitignore index 8b369aea..7e6d63e2 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,10 @@ benchmark/scripts/__pycache__ benchmark/scripts/benchmarks-pypsa-eur/__pycache__ benchmark/scripts/leftovers/ +# Benchmarks (internal suite): regenerable .ipynb viewing artifacts +benchmarks/walkthrough.ipynb +benchmarks/.ipynb_checkpoints/ + # IDE .idea/ diff --git a/benchmarks/README.md b/benchmarks/README.md index 22ac73ce..1362cb84 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -1,94 +1,74 @@ # Internal Performance Benchmarks -Measures linopy's own performance (build time, LP write speed, memory usage) across problem sizes using [pytest-benchmark](https://pytest-benchmark.readthedocs.io/) and [pytest-memray](https://pytest-memray.readthedocs.io/). Use these to check whether a code change introduces a regression or improvement. +End-to-end performance tracking for `linopy` — build → matrix generation → +LP / netCDF (de)serialization → solver handoff → a fixed PyPSA model. Solver +algorithm runtime is out of scope. -> **Note:** The `benchmark/` directory (singular) contains *external* benchmarks comparing linopy against other modeling frameworks. This directory (`benchmarks/`) is for *internal* performance tracking only. +The suite is a set of `pytest-benchmark` tests driven by a model registry. +**CodSpeed** measures them in CI (walltime on dedicated runners, memory on every +PR); locally you just run `pytest`. -## Setup +> `benchmark/` (singular) is the legacy external-framework suite. +> `benchmarks/` (plural) is this internal suite. -```bash -pip install -e ".[benchmarks]" -``` +## Models vs patterns -## Running benchmarks +Two kinds of benchmark spec, same harness and same phases, distinguished by +their sweep axis: -```bash -# Quick smoke test (small sizes only) -pytest benchmarks/ --quick +- **Models** (`models/`, `REGISTRY`) — whole `linopy.Model`s swept over + `size` (axis `n`): "how does cost scale with the problem?" +- **Patterns** (`patterns/`, `PATTERNS`) — fragments of realistic modelling + code (a balance constraint, a KVL contraction) swept over `severity` + (0–100, axis `severity`): "how does cost respond as one data shape goes + from benign to pathological?" -# Full timing benchmarks -pytest benchmarks/test_build.py benchmarks/test_lp_write.py benchmarks/test_matrices.py +Both kinds build a complete `linopy.Model`, so both run the **same phases** and +share the phase drivers (`test_build.py`, `test_matrices.py`, …) — they're just +more `(spec, value)` rows, tagged by `axis`. There is no separate pattern +driver. Running a pattern through `build` *and* `to_lp` shows whether a +dense-`_term` blow-up propagates to export or collapses. -# Run a specific model -pytest benchmarks/test_build.py -k basic -``` +Patterns target the operations where the dense-`_term` representation forces +materialisation — `groupby().sum()` padding, sparse `@` densification — so a +`severity` sweep draws the cost cliff. Adding either kind is one file: drop it +in `models/` or `patterns/`, call `register(...)` / `register_pattern(...)`. -## Comparing timing between branches +## Install ```bash -# Save baseline results on master -git checkout master -pytest benchmarks/test_build.py --benchmark-save=master - -# Switch to feature branch and compare -git checkout my-feature -pytest benchmarks/test_build.py --benchmark-save=my-feature --benchmark-compare=0001_master - -# Compare saved results without re-running -pytest-benchmark compare 0001_master 0002_my-feature --columns=median,iqr +uv sync --extra dev --extra benchmarks +source .venv/bin/activate ``` -Results are stored in `.benchmarks/` (gitignored). +`pypsa` is optional — `pypsa_scigrid` and `test_pypsa_carbon_management.py` +skip gracefully without it: `uv pip install pypsa`. -## Memory benchmarks +The `[benchmarks]` extra in `pyproject.toml` pins every direct dep that affects +measurement (`numpy`, `scipy`, `xarray`, `pandas`, `polars`, `dask`, …) so +run-to-run deltas reflect linopy changes, not dependency bumps. -`memory.py` runs each test in a separate process with pytest-memray to get accurate per-test peak memory (including C/numpy allocations). Results are saved as JSON and can be compared across branches. - -By default, only the build phase (`test_build.py`) is measured. Unlike timing benchmarks where `benchmark()` isolates the measured function, memray tracks all allocations within a test — including model construction in setup. This means LP write and matrix tests would report build + phase memory combined, making the phase-specific contribution impossible to isolate. Since model construction dominates memory usage, measuring build alone gives the most actionable numbers. +## Running ```bash -# Save baseline on master -git checkout master -python benchmarks/memory.py save master - -# Save feature branch -git checkout my-feature -python benchmarks/memory.py save my-feature - -# Compare -python benchmarks/memory.py compare master my-feature - -# Quick mode (smaller sizes, faster) -python benchmarks/memory.py save master --quick - -# Measure a specific phase (includes build overhead) -python benchmarks/memory.py save master --test-path benchmarks/test_lp_write.py +pytest benchmarks/ # the suite +pytest benchmarks/ --benchmark-disable -q # smoke: every spec builds once +pytest benchmarks/ --pipeline # + the opt-in end-to-end pipeline test ``` -Results are stored in `.benchmarks/memory/` (gitignored). Requires Linux or macOS (memray is not available on Windows). - -> **Note:** Small tests (~5 MiB) are near the import-overhead floor and may show noise of ~1 MiB between runs. Focus on larger tests for meaningful memory comparisons. Do not combine `--memray` with timing benchmarks — memray adds ~2x overhead that invalidates timing results. - -## Models - -| Model | Description | Sizes | -|-------|-------------|-------| -| `basic` | Dense N*N model, 2*N^2 vars/cons | 10 — 1600 | -| `knapsack` | N binary variables, 1 constraint | 100 — 1M | -| `expression_arithmetic` | Broadcasting, scaling, summation across dims | 10 — 1000 | -| `sparse_network` | Ring network with mismatched bus/line coords | 10 — 1000 | -| `pypsa_scigrid` | Real power system (requires `pypsa`) | 10 — 200 snapshots | - -## Phases +Each spec declares one `sizes` (models) / `severities` (patterns) tuple — a +small representative set, kept tight because CodSpeed measures it on every PR. +Need a scaling curve? That's a local pytest-benchmem job, not this suite. -| Phase | File | What it measures | -|-------|------|------------------| -| Build | `test_build.py` | Model construction (add_variables, add_constraints, add_objective) | -| LP write | `test_lp_write.py` | Writing the model to an LP file | -| Matrices | `test_matrices.py` | Generating sparse matrices (A, b, c, bounds) from the model | +## CI -## Adding a new model +- **Smoke** (`benchmark-smoke.yml`) — every PR: every spec builds and every + phase fires once under `--benchmark-disable`. A "did a refactor break a + spec?" check, not timing. +- **CodSpeed memory** (`codspeed-memory.yml`) — every PR: heap-allocation + tracking, informational, non-gating. +- **CodSpeed walltime** (`codspeed-macro.yml`) — on `master` or a PR labelled + `trigger:benchmark`: wall-clock on dedicated bare-metal runners. -1. Create `benchmarks/models/my_model.py` with a `build_my_model(n)` function and a `SIZES` list -2. Add parametrized tests in the relevant `test_*.py` files -3. Add a quick threshold in `conftest.py` +Activating CodSpeed upstream needs a maintainer to connect the repo to the +CodSpeed app (OIDC auth, no token secret); the workflows are already wired. diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py index 6bf202cc..48c26ef0 100644 --- a/benchmarks/__init__.py +++ b/benchmarks/__init__.py @@ -1 +1,15 @@ -"""Linopy benchmark suite — run with ``pytest benchmarks/`` (use ``--quick`` for smaller sizes).""" +""" +Linopy benchmark suite — run with ``pytest benchmarks/``. + +The model registry it drives is reusable on its own:: + + from benchmarks import REGISTRY + model = REGISTRY["basic"].build(100) +""" + +# Importing the models / patterns packages triggers each module's +# ``register(...)`` / ``register_pattern(...)`` call at import time. +from benchmarks import models, patterns # noqa: F401 +from benchmarks.registry import PATTERNS, REGISTRY + +__all__ = ["PATTERNS", "REGISTRY"] diff --git a/benchmarks/conftest.py b/benchmarks/conftest.py index 6f9a9467..b9ef6014 100644 --- a/benchmarks/conftest.py +++ b/benchmarks/conftest.py @@ -1,30 +1,104 @@ -"""Benchmark configuration and shared fixtures.""" +"""Benchmark configuration and shared test helpers.""" from __future__ import annotations +from typing import TYPE_CHECKING + import pytest -QUICK_THRESHOLD = { - "basic": 100, - "knapsack": 10_000, - "pypsa_scigrid": 50, - "expression_arithmetic": 100, - "sparse_network": 100, -} +from benchmarks.registry import iter_params, spec_param_id + +if TYPE_CHECKING: + import linopy + from benchmarks.registry import BenchSpec +# Test modules the CodSpeed instruments measure (edit to change coverage). +# build + the two export paths: to_lp (LP text) and to_solver (direct handoff, +# which also exercises matrix-gen). matrices is dropped — a subset of to_solver; +# netcdf excluded — disk I/O, noisy. All still run under the smoke job. +CODSPEED_MODULES = ( + "test_build", + "test_to_lp", + "test_to_solver", +) -def pytest_addoption(parser): + +def pytest_addoption(parser: pytest.Parser) -> None: parser.addoption( - "--quick", + "--pipeline", action="store_true", default=False, - help="Use smaller problem sizes for quick benchmarking", + help=( + "Include the opt-in end-to-end pipeline benchmark (build → matrices " + "→ lp in one measured region). Off by default — it re-runs the " + "per-phase work and includes the build." + ), + ) + + +def pytest_collection_modifyitems( + config: pytest.Config, items: list[pytest.Item] +) -> None: + """ + ``test_pipeline`` (end-to-end) is opt-in — deselected unless ``--pipeline``. + ``--codspeed`` narrows the run to ``CODSPEED_MODULES`` (drops netcdf/matrices). + """ + if not config.getoption("--pipeline"): + dropped = [i for i in items if i.path.stem == "test_pipeline"] + if dropped: + config.hook.pytest_deselected(items=dropped) + items[:] = [i for i in items if i.path.stem != "test_pipeline"] + + if getattr(config.option, "codspeed", False): + deselected = [i for i in items if i.path.stem not in CODSPEED_MODULES] + if deselected: + config.hook.pytest_deselected(items=deselected) + items[:] = [i for i in items if i.path.stem in CODSPEED_MODULES] + + +def cases(phase: str) -> pytest.MarkDecorator: + """Parametrize a phase driver over every ``(spec, n)`` that phase runs.""" + params = iter_params(phase) + return pytest.mark.parametrize( + ("spec", "n"), + params, + ids=[spec_param_id(s.name, s.axis, v) for s, v in params], ) -def skip_if_quick(request, model: str, size: int): - """Skip large sizes when --quick is passed.""" - if request.config.getoption("--quick"): - threshold = QUICK_THRESHOLD.get(model, float("inf")) - if size > threshold: - pytest.skip(f"--quick: skipping {model} size {size}") +def require(spec: BenchSpec) -> None: + """``importorskip`` a spec's optional dependencies before it runs.""" + for mod in spec.requires: + pytest.importorskip(mod) + + +def build_model(spec: BenchSpec, n: int) -> linopy.Model: + """Build ``spec`` at ``n`` — the untimed setup, after the requires-skip.""" + require(spec) + return spec.build(n) + + +@pytest.fixture(autouse=True) +def _benchmem_dims(request: pytest.FixtureRequest, benchmark: object) -> None: + """ + Mirror each case's ``spec``/``phase``/``axis`` into pytest-benchmark + ``extra_info`` as analysis dims, so a ``--benchmark-json`` run plots cleanly + under pytest-benchmem — which reads dims from ``params``/``extra_info`` and + can see neither the (unserialisable) spec param nor the phase, which lives in + the test-function name. The numeric ``n`` is already a clean param. No-op + under CodSpeed, whose fixture carries no ``extra_info``. + """ + callspec = getattr(request.node, "callspec", None) + info = getattr(benchmark, "extra_info", None) + func = getattr(request, "function", None) + if ( + callspec is None + or info is None + or func is None + or "spec" not in callspec.params + ): + return + spec = callspec.params["spec"] + info.update( + spec=spec.name, phase=func.__name__.removeprefix("test_"), axis=spec.axis + ) diff --git a/benchmarks/memory.py b/benchmarks/memory.py deleted file mode 100644 index 20af4b8a..00000000 --- a/benchmarks/memory.py +++ /dev/null @@ -1,199 +0,0 @@ -#!/usr/bin/env python -""" -Measure and compare peak memory using pytest-memray. - -Usage: - # Save a baseline (on master) - python benchmarks/memory.py save master - - # Save current branch - python benchmarks/memory.py save my-feature - - # Compare two saved runs - python benchmarks/memory.py compare master my-feature - - # Quick mode (smaller sizes) - python benchmarks/memory.py save master --quick - -Results are stored in .benchmarks/memory/. -""" - -from __future__ import annotations - -import argparse -import json -import platform -import re -import subprocess -import sys -from pathlib import Path - -if platform.system() == "Windows": - raise RuntimeError( - "memory.py requires pytest-memray which is not available on Windows. " - "Run memory benchmarks on Linux or macOS." - ) - -RESULTS_DIR = Path(".benchmarks/memory") -MEMORY_RE = re.compile( - r"Allocation results for (.+?) at the high watermark\s+" - r"📦 Total memory allocated: ([\d.]+)(MiB|KiB|GiB|B)", -) -# Only the build phase is measured by default. Unlike timing benchmarks (where -# pytest-benchmark isolates the measured function), memray tracks all allocations -# within a test — including model construction in setup. This means LP write and -# matrix tests would report build + phase memory combined, making the phase-specific -# contribution hard to isolate. Since model construction dominates memory usage, -# measuring build alone gives the most accurate and actionable numbers. -DEFAULT_TEST_PATHS = [ - "benchmarks/test_build.py", -] - - -def _to_mib(value: float, unit: str) -> float: - factors = {"B": 1 / 1048576, "KiB": 1 / 1024, "MiB": 1, "GiB": 1024} - return value * factors[unit] - - -def _collect_test_ids(test_paths: list[str], quick: bool) -> list[str]: - """Collect test IDs without running them.""" - cmd = [ - sys.executable, - "-m", - "pytest", - *test_paths, - "--collect-only", - "-q", - ] - if quick: - cmd.append("--quick") - result = subprocess.run(cmd, capture_output=True, text=True) - return [ - line.strip() - for line in result.stdout.splitlines() - if "::" in line and not line.startswith(("=", "-", " ")) - ] - - -def save(label: str, quick: bool = False, test_paths: list[str] | None = None) -> Path: - """Run each benchmark in a separate process for accurate memory measurement.""" - if test_paths is None: - test_paths = DEFAULT_TEST_PATHS - test_ids = _collect_test_ids(test_paths, quick) - if not test_ids: - print("No tests collected.", file=sys.stderr) - sys.exit(1) - - print(f"Running {len(test_ids)} tests (each in a separate process)...") - entries = {} - for i, test_id in enumerate(test_ids, 1): - short = test_id.split("::")[-1] - print(f" [{i}/{len(test_ids)}] {short}...", end=" ", flush=True) - - cmd = [ - sys.executable, - "-m", - "pytest", - test_id, - "--memray", - "--benchmark-disable", - "-v", - "--tb=short", - "-q", - ] - result = subprocess.run(cmd, capture_output=True, text=True) - output = result.stdout + result.stderr - - match = MEMORY_RE.search(output) - if match: - value = float(match.group(2)) - unit = match.group(3) - mib = round(_to_mib(value, unit), 3) - entries[test_id] = mib - print(f"{mib:.1f} MiB") - elif "SKIPPED" in output or "skipped" in output: - print("skipped") - else: - print( - "WARNING: no memray data (pytest-memray output format may have changed)", - file=sys.stderr, - ) - - if not entries: - print("No memray results found. Is pytest-memray installed?", file=sys.stderr) - sys.exit(1) - - RESULTS_DIR.mkdir(parents=True, exist_ok=True) - out_path = RESULTS_DIR / f"{label}.json" - out_path.write_text(json.dumps({"label": label, "peak_mib": entries}, indent=2)) - print(f"\nSaved {len(entries)} results to {out_path}") - return out_path - - -def compare(label_a: str, label_b: str) -> None: - """Compare two saved memory results.""" - path_a = RESULTS_DIR / f"{label_a}.json" - path_b = RESULTS_DIR / f"{label_b}.json" - for p in (path_a, path_b): - if not p.exists(): - print(f"Not found: {p}. Run 'save {p.stem}' first.", file=sys.stderr) - sys.exit(1) - - data_a = json.loads(path_a.read_text())["peak_mib"] - data_b = json.loads(path_b.read_text())["peak_mib"] - - all_tests = sorted(set(data_a) | set(data_b)) - - print(f"\n{'Test':<60} {label_a:>10} {label_b:>10} {'Change':>10}") - print("-" * 94) - - for test in all_tests: - a = data_a.get(test) - b = data_b.get(test) - a_str = f"{a:.1f}" if a is not None else "—" - b_str = f"{b:.1f}" if b is not None else "—" - if a is not None and b is not None and a > 0: - pct = (b - a) / a * 100 - change = f"{pct:+.1f}%" - else: - change = "—" - # Shorten test name for readability - short = test.split("::")[-1] if "::" in test else test - print(f"{short:<60} {a_str:>10} {b_str:>10} {change:>10}") - - print() - - -def main(): - parser = argparse.ArgumentParser( - description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter - ) - sub = parser.add_subparsers(dest="cmd", required=True) - - p_save = sub.add_parser("save", help="Run benchmarks and save memory results") - p_save.add_argument( - "label", help="Label for this run (e.g. 'master', 'my-feature')" - ) - p_save.add_argument( - "--quick", action="store_true", help="Use smaller problem sizes" - ) - p_save.add_argument( - "--test-path", - nargs="+", - default=None, - help="Test file(s) to run (default: all phases)", - ) - - p_cmp = sub.add_parser("compare", help="Compare two saved runs") - p_cmp.add_argument("label_a", help="First run label (baseline)") - p_cmp.add_argument("label_b", help="Second run label") - - args = parser.parse_args() - if args.cmd == "save": - save(args.label, quick=args.quick, test_paths=args.test_path) - elif args.cmd == "compare": - compare(args.label_a, args.label_b) - - -if __name__ == "__main__": - main() diff --git a/benchmarks/models/__init__.py b/benchmarks/models/__init__.py index fcff9caf..66c9a7c7 100644 --- a/benchmarks/models/__init__.py +++ b/benchmarks/models/__init__.py @@ -1,21 +1,25 @@ -"""Model builders for benchmarks.""" +""" +Model builders for benchmarks. -from benchmarks.models.basic import SIZES as BASIC_SIZES -from benchmarks.models.basic import build_basic -from benchmarks.models.expression_arithmetic import SIZES as EXPR_SIZES -from benchmarks.models.expression_arithmetic import build_expression_arithmetic -from benchmarks.models.knapsack import SIZES as KNAPSACK_SIZES -from benchmarks.models.knapsack import build_knapsack -from benchmarks.models.sparse_network import SIZES as SPARSE_SIZES -from benchmarks.models.sparse_network import build_sparse_network +Importing this package triggers every submodule's ``register(...)`` call, +populating :data:`benchmarks.registry.REGISTRY`. Each submodule exposes a +``build_(size) -> linopy.Model`` callable and a module-level ``SPEC`` +:class:`~benchmarks.registry.BenchSpec`. The documented access path is +``REGISTRY[""]``; submodule re-exports are intentionally not exposed +here so that adding a new model is one new file plus one import below. +""" -__all__ = [ - "BASIC_SIZES", - "EXPR_SIZES", - "KNAPSACK_SIZES", - "SPARSE_SIZES", - "build_basic", - "build_expression_arithmetic", - "build_knapsack", - "build_sparse_network", -] +# Side-effect imports — each module calls ``register(...)`` at import time. +from benchmarks.models import ( # noqa: F401 + basic, + expression_arithmetic, + knapsack, + masked, + milp, + piecewise, + pypsa_scigrid, + qp, + sos, + sparse_network, + storage, +) diff --git a/benchmarks/models/basic.py b/benchmarks/models/basic.py index 2aea49d9..554ad05e 100644 --- a/benchmarks/models/basic.py +++ b/benchmarks/models/basic.py @@ -1,10 +1,11 @@ -"""Basic benchmark model: 2*N^2 variables and constraints.""" +"""Basic benchmark model: 2*N^2 variables and constraints (continuous LP).""" from __future__ import annotations import linopy +from benchmarks.registry import BenchSpec, register -SIZES = [10, 50, 100, 250, 500, 1000, 1600] +SIZES = (10, 250) def build_basic(n: int) -> linopy.Model: @@ -16,3 +17,12 @@ def build_basic(n: int) -> linopy.Model: m.add_constraints(x - y >= -5, name="lower") m.add_objective(x.sum() + 2 * y.sum()) return m + + +SPEC = register( + BenchSpec( + name="basic", + build=build_basic, + sweep=SIZES, + ) +) diff --git a/benchmarks/models/expression_arithmetic.py b/benchmarks/models/expression_arithmetic.py index 339c651d..0d5af581 100644 --- a/benchmarks/models/expression_arithmetic.py +++ b/benchmarks/models/expression_arithmetic.py @@ -5,8 +5,9 @@ import numpy as np import linopy +from benchmarks.registry import BenchSpec, register -SIZES = [10, 50, 100, 250, 500, 1000] +SIZES = (10, 250) def build_expression_arithmetic(n: int) -> linopy.Model: @@ -28,3 +29,12 @@ def build_expression_arithmetic(n: int) -> linopy.Model: m.add_constraints(expr1.sum("j") >= -10, name="row_sum") m.add_objective(combined.sum()) return m + + +SPEC = register( + BenchSpec( + name="expression_arithmetic", + build=build_expression_arithmetic, + sweep=SIZES, + ) +) diff --git a/benchmarks/models/knapsack.py b/benchmarks/models/knapsack.py index 83ce7394..fe01ad8b 100644 --- a/benchmarks/models/knapsack.py +++ b/benchmarks/models/knapsack.py @@ -1,12 +1,13 @@ -"""Knapsack benchmark model: N binary variables, 1 constraint.""" +"""Knapsack benchmark model: N binary variables, 1 constraint (MILP, binary).""" from __future__ import annotations import numpy as np import linopy +from benchmarks.registry import DEFAULT_PHASES, BenchSpec, register -SIZES = [100, 1_000, 10_000, 100_000, 1_000_000] +SIZES = (100, 10_000) def build_knapsack(n: int) -> linopy.Model: @@ -21,3 +22,13 @@ def build_knapsack(n: int) -> linopy.Model: m.add_constraints((x * weights).sum() <= capacity, name="capacity") m.add_objective(-(x * values).sum()) return m + + +SPEC = register( + BenchSpec( + name="knapsack", + build=build_knapsack, + sweep=SIZES, + phases=DEFAULT_PHASES, # HiGHS handles binary; matrices handles MILP + ) +) diff --git a/benchmarks/models/masked.py b/benchmarks/models/masked.py new file mode 100644 index 00000000..eb9255fb --- /dev/null +++ b/benchmarks/models/masked.py @@ -0,0 +1,86 @@ +""" +Masked-variables benchmark: transportation with sparse allowed routes. + +A standard transportation LP, but only a sparse subset of (origin, dest) pairs +are valid routes. The ``mask=`` keyword on ``add_variables`` skips the rest, +keeping the variable count sub-quadratic. + +Decision variables: + x[origin, dest] >= 0 continuous, only created for allowed routes + +Constraints: + sum_dest x[o, .] <= supply[o] + sum_orig x[., d] == demand[d] + +Objective: + minimize sum cost[o, d] * x[o, d] + +The mask is dense at small sizes and sparser at large sizes, mimicking +real-world transport networks where each origin only serves a fixed +fan-out regardless of total node count. +""" + +from __future__ import annotations + +import numpy as np +import xarray as xr + +import linopy +from benchmarks.registry import ( + DEFAULT_PHASES, + BenchSpec, + register, +) + +SIZES = (10, 100) + + +def build_masked(n: int) -> linopy.Model: + rng = np.random.default_rng(42) + origins = np.arange(n) + dests = np.arange(n) + + # Each origin serves at most ~min(20, n) destinations. + fan_out = min(20, n) + mask_np = np.zeros((n, n), dtype=bool) + for o in range(n): + # Deterministic fan-out so size determines connectivity. + targets = rng.choice(n, size=fan_out, replace=False) + mask_np[o, targets] = True + + mask = xr.DataArray(mask_np, coords=[("origin", origins), ("dest", dests)]) + cost = xr.DataArray( + rng.uniform(1, 10, size=(n, n)), + coords=[("origin", origins), ("dest", dests)], + ) + + # Supply scaled so the problem stays feasible at any size: + # each origin can ship up to ``demand_per_dest * fan_out`` units. + demand_per_dest = 5.0 + supply_per_origin = demand_per_dest * n # plenty of slack + supply = xr.DataArray(np.full(n, supply_per_origin), coords=[("origin", origins)]) + demand = xr.DataArray(np.full(n, demand_per_dest), coords=[("dest", dests)]) + + m = linopy.Model() + x = m.add_variables( + lower=0, + coords=[("origin", origins), ("dest", dests)], + mask=mask, + name="x", + ) + + m.add_constraints(x.sum("dest") <= supply, name="supply", mask=mask.any("dest")) + m.add_constraints(x.sum("origin") == demand, name="demand", mask=mask.any("origin")) + + m.add_objective((cost * x).sum()) + return m + + +SPEC = register( + BenchSpec( + name="masked", + build=build_masked, + sweep=SIZES, + phases=DEFAULT_PHASES, + ) +) diff --git a/benchmarks/models/milp.py b/benchmarks/models/milp.py new file mode 100644 index 00000000..f6058cc8 --- /dev/null +++ b/benchmarks/models/milp.py @@ -0,0 +1,75 @@ +""" +MILP benchmark: capacitated facility location with general integers. + +Decision variables: + y_f in {0,1,...,K} integer "modules" to open at facility f + x_{f,c} >= 0 continuous flow from facility f to customer c + +Constraints: + sum_c x_{f,c} <= cap * y_f (capacity per facility) + sum_f x_{f,c} == d_c (demand at each customer) + +Objective: + minimize sum_{f,c} t_{f,c} * x_{f,c} + sum_f f_f * y_f + +The general-integer ``y`` exercises the matrix accessor's MIP integer-section +path and the LP-writer's general-integer block — neither the binary knapsack +nor the continuous LPs hit those paths. +""" + +from __future__ import annotations + +import numpy as np + +import linopy +from benchmarks.registry import ( + DEFAULT_PHASES, + BenchSpec, + register, +) + +SIZES = (10, 50) + + +def build_milp(n: int) -> linopy.Model: + rng = np.random.default_rng(42) + facilities = np.arange(n) + customers = np.arange(n) + + cap = 100.0 # capacity per module + Y_MAX = 5 # max modules per facility + transport = rng.uniform(1, 20, size=(n, n)) # per-unit shipping cost + fixed = rng.uniform(50, 200, size=n) # cost per facility module + demand = rng.uniform(20, 80, size=n) # demand at each customer + + m = linopy.Model() + y = m.add_variables( + lower=0, + upper=Y_MAX, + coords=[facilities], + dims=["facility"], + integer=True, + name="y", + ) + x = m.add_variables( + lower=0, + coords=[facilities, customers], + dims=["facility", "customer"], + name="x", + ) + + m.add_constraints(x.sum("customer") - cap * y <= 0, name="capacity") + m.add_constraints(x.sum("facility") == demand, name="demand") + + m.add_objective((transport * x).sum() + (fixed * y).sum()) + return m + + +SPEC = register( + BenchSpec( + name="milp", + build=build_milp, + sweep=SIZES, + phases=DEFAULT_PHASES, + ) +) diff --git a/benchmarks/models/piecewise.py b/benchmarks/models/piecewise.py new file mode 100644 index 00000000..895e854a --- /dev/null +++ b/benchmarks/models/piecewise.py @@ -0,0 +1,89 @@ +""" +Piecewise-linear benchmark: generation with piecewise fuel-cost curves. + +Each generator has a piecewise fuel cost curve pinned via +``add_piecewise_formulation``. The default ``method="auto"`` picks an +SOS2 or incremental expansion, generating auxiliary variables and +constraints — that overhead is what we want to measure. + +Decision variables: + power[gen] in [0, 100] (continuous) + fuel[gen] in [0, inf) (continuous, pinned to piecewise curve) + +Constraints: + sum_gen power[gen] >= demand + piecewise: fuel[gen] = f(power[gen]) for each gen + +Objective: + minimize sum_gen fuel[gen] +""" + +from __future__ import annotations + +import warnings + +import linopy +from benchmarks.registry import ( + DEFAULT_PHASES, + BenchSpec, + register, +) + +SIZES = (10, 1_000) + +_API_AVAILABLE = hasattr(linopy.Model, "add_piecewise_formulation") and hasattr( + linopy, "EvolvingAPIWarning" +) + + +def build_piecewise(n_gens: int) -> linopy.Model: + # Shared breakpoints, broadcast across generators. + x_pts = [0.0, 30.0, 60.0, 100.0] + y_pts = [0.0, 36.0, 84.0, 170.0] # convex-ish fuel curve + + m = linopy.Model() + power = m.add_variables( + lower=0, + upper=100, + coords=[range(n_gens)], + dims=["gen"], + name="power", + ) + fuel = m.add_variables( + lower=0, + coords=[range(n_gens)], + dims=["gen"], + name="fuel", + ) + + demand = 0.5 * n_gens * x_pts[-1] + m.add_constraints(power.sum() >= demand, name="demand") + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=linopy.EvolvingAPIWarning) + m.add_piecewise_formulation( + (power, x_pts), + (fuel, y_pts), + ) + + m.add_objective(fuel.sum()) + return m + + +# ``add_piecewise_formulation`` is a recent (still-evolving) API. Skip +# registration silently on older linopy so the rest of the suite stays usable. +SPEC: BenchSpec | None +if _API_AVAILABLE: + SPEC = register( + BenchSpec( + name="piecewise", + build=build_piecewise, + sweep=SIZES, + # Monotonic breakpoints + ``method="auto"`` → incremental + # reformulation (pure MILP with binaries), which every supported + # solver handles. + phases=DEFAULT_PHASES, + ) + ) +else: + SPEC = None diff --git a/benchmarks/models/pypsa_scigrid.py b/benchmarks/models/pypsa_scigrid.py index 2fcce217..bb6e8653 100644 --- a/benchmarks/models/pypsa_scigrid.py +++ b/benchmarks/models/pypsa_scigrid.py @@ -1,20 +1,36 @@ -"""PyPSA SciGrid-DE benchmark model.""" +"""PyPSA SciGrid-DE benchmark model (requires pypsa).""" from __future__ import annotations from typing import TYPE_CHECKING +from benchmarks.registry import BenchSpec, register + if TYPE_CHECKING: import linopy -SIZES = [10, 50, 100, 200] +SIZES = (10, 50) # small networks — PyPSA import already dominates the cost def build_pypsa_scigrid(snapshots: int = 100) -> linopy.Model: """Build PyPSA SciGrid model. Requires pypsa to be installed.""" import pypsa + import pytest - n = pypsa.examples.scigrid_de() + try: + n = pypsa.examples.scigrid_de() + except Exception as exc: # network / example-data drift, not a linopy signal + pytest.skip(f"pypsa example data unavailable: {exc}") n.set_snapshots(n.snapshots[:snapshots]) - n.optimize.create_model() + n.optimize.create_model() # the linopy build under benchmark — unguarded return n.model + + +SPEC = register( + BenchSpec( + name="pypsa_scigrid", + build=build_pypsa_scigrid, + sweep=SIZES, + requires=("pypsa",), + ) +) diff --git a/benchmarks/models/qp.py b/benchmarks/models/qp.py new file mode 100644 index 00000000..50e39e7b --- /dev/null +++ b/benchmarks/models/qp.py @@ -0,0 +1,61 @@ +""" +QP benchmark: continuous quadratic objective on a portfolio-style model. + +Decision variables: + x_i >= 0 (weight on asset i, continuous) + +Constraints: + sum_i x_i == 1 + x_i <= 0.3 (no asset > 30% of portfolio) + +Objective: + minimize sum_i q_i * x_i^2 - sum_i r_i * x_i + +A pure diagonal quadratic — enough to exercise the QP build / write / matrix +paths without paying for cross-terms. Cross-term coupling needs single-term +factors on both sides (see ``LinearExpression._multiply_by_linear_expression``), +which is awkward to set up cleanly via the public API. +""" + +from __future__ import annotations + +import numpy as np + +import linopy +from benchmarks.registry import ( + DEFAULT_PHASES, + BenchSpec, + register, +) + +SIZES = (10, 1_000) + + +def build_qp(n_assets: int) -> linopy.Model: + rng = np.random.default_rng(42) + q = rng.uniform(0.5, 2.0, size=n_assets) + r = rng.uniform(0.05, 0.15, size=n_assets) + + m = linopy.Model() + x = m.add_variables( + lower=0, + upper=0.3, + coords=[range(n_assets)], + dims=["asset"], + name="x", + ) + + m.add_constraints(x.sum() == 1, name="budget") + + m.add_objective((q * x**2).sum() - (r * x).sum()) + return m + + +SPEC = register( + BenchSpec( + name="qp", + build=build_qp, + sweep=SIZES, + phases=DEFAULT_PHASES, + ) +) diff --git a/benchmarks/models/sos.py b/benchmarks/models/sos.py new file mode 100644 index 00000000..3c1e2db8 --- /dev/null +++ b/benchmarks/models/sos.py @@ -0,0 +1,96 @@ +""" +SOS1 benchmark: multi-mode generation with at-most-one-mode-per-generator. + +Each generator has ``n_modes`` operating modes (different cap/cost tradeoff). +SOS1 over the ``mode`` dimension enforces that each generator picks at most +one mode. + +Decision variables: + y[gen, mode] >= 0 continuous output per (generator, mode) + +Constraints: + y[gen, mode] <= cap[mode] + sum_{gen,mode} y >= demand_total + SOS1 over "mode" for each gen + +This benchmark exercises ``Model.add_sos_constraints`` (commits be6d3a3 / +8aa8d0c) and the LP-writer's SOS section. In linopy, native SOS support is +declared by Gurobi / Cplex / Xpress only (see ``SolverFeature.SOS_CONSTRAINTS``). +HiGHS and Mosek would need ``apply_sos_reformulation()`` first. +""" + +from __future__ import annotations + +import numpy as np +import xarray as xr + +import linopy +from benchmarks.registry import ( + BUILD, + FROM_NETCDF, + MATRICES, + TO_GUROBIPY, + TO_LP, + TO_NETCDF, + TO_XPRESS, + BenchSpec, + register, +) + +SIZES = (10, 1_000) + +_N_MODES = 5 +_API_AVAILABLE = hasattr(linopy.Model, "add_sos_constraints") + + +def build_sos(n_gens: int) -> linopy.Model: + modes = np.arange(_N_MODES) + cap = xr.DataArray(np.linspace(20.0, 100.0, _N_MODES), coords=[("mode", modes)]) + cost = xr.DataArray(np.linspace(1.0, 8.0, _N_MODES), coords=[("mode", modes)]) + + m = linopy.Model() + y = m.add_variables( + lower=0, + upper=float(cap.max()), + coords=[range(n_gens), modes], + dims=["gen", "mode"], + name="y", + ) + + m.add_constraints(y <= cap, name="mode_cap") + demand_total = 0.4 * n_gens * float(cap.max()) + m.add_constraints(y.sum() >= demand_total, name="demand") + + m.add_sos_constraints(y, sos_type=1, sos_dim="mode") + + m.add_objective((cost * y).sum()) + return m + + +# ``add_sos_constraints`` is a recent API. On older linopy we silently skip +# registering this model — the rest of the suite stays usable. +SPEC: BenchSpec | None +if _API_AVAILABLE: + SPEC = register( + BenchSpec( + name="sos", + build=build_sos, + sweep=SIZES, + # HiGHS / Mosek lack native SOS in linopy — would need + # ``reformulate_sos=True``, which mutates the model and defeats + # the benchmark. Only solvers with native SOS appear here. + phases=frozenset( + { + BUILD, + MATRICES, + TO_LP, + TO_NETCDF, + FROM_NETCDF, + TO_GUROBIPY, + TO_XPRESS, + } + ), + ) + ) +else: + SPEC = None diff --git a/benchmarks/models/sparse_network.py b/benchmarks/models/sparse_network.py index afc6be06..13d6c3ad 100644 --- a/benchmarks/models/sparse_network.py +++ b/benchmarks/models/sparse_network.py @@ -7,8 +7,9 @@ import xarray as xr import linopy +from benchmarks.registry import BenchSpec, register -SIZES = [10, 50, 100, 250, 500, 1000] +SIZES = (10, 250) def build_sparse_network(n_buses: int) -> linopy.Model: @@ -48,3 +49,12 @@ def build_sparse_network(n_buses: int) -> linopy.Model: m.add_objective(gen.sum()) return m + + +SPEC = register( + BenchSpec( + name="sparse_network", + build=build_sparse_network, + sweep=SIZES, + ) +) diff --git a/benchmarks/models/storage.py b/benchmarks/models/storage.py new file mode 100644 index 00000000..5e841728 --- /dev/null +++ b/benchmarks/models/storage.py @@ -0,0 +1,53 @@ +""" +Storage state-of-charge model — intertemporal coupling via ``.shift()``. + +A fleet of storage units, each with a bidiagonal SoC recursion +``soc[t] - decay*soc[t-1] - eff*charge[t] + discharge[t]/eff == 0`` built with +``soc.shift(time=1)`` (``t=0`` falls off as the boundary). This is the one op +family no other model exercises — the ``.shift()``/``.isel()`` intertemporal +coupling that PyPSA's SoC and flixopt's ``charge_state.isel`` recursion lean on. + +It is a *model*, not a pattern: each balance row has a fixed ~4 terms regardless +of horizon or unit count, so it scales with ``size`` (units × timesteps) and has +no benign→worst data-shape dial. ``size`` is the number of storage units. +""" + +from __future__ import annotations + +import pandas as pd + +import linopy +from benchmarks.registry import BenchSpec, register + +SIZES = (10, 250) +N_TIME = 168 +DECAY = 0.99 +ETA = 0.95 + + +def build_storage(n_storage: int) -> linopy.Model: + storages = pd.RangeIndex(n_storage, name="storage") + time = pd.RangeIndex(N_TIME, name="time") + + m = linopy.Model() + soc = m.add_variables(lower=0, upper=100, coords=[storages, time], name="soc") + charge = m.add_variables(lower=0, upper=50, coords=[storages, time], name="charge") + discharge = m.add_variables( + lower=0, upper=50, coords=[storages, time], name="discharge" + ) + + prev = soc.shift(time=1) # soc[t-1]; t=0 shifted out (initial-SoC boundary) + m.add_constraints( + soc - DECAY * prev - ETA * charge + discharge / ETA == 0, name="soc_balance" + ) + m.add_objective((charge + discharge).sum()) + return m + + +SPEC = register( + BenchSpec( + name="storage", + build=build_storage, + sweep=SIZES, + ) +) diff --git a/benchmarks/patterns/__init__.py b/benchmarks/patterns/__init__.py new file mode 100644 index 00000000..09097674 --- /dev/null +++ b/benchmarks/patterns/__init__.py @@ -0,0 +1,22 @@ +""" +Benchmark *patterns* — realistic modelling idioms swept over a severity dial. + +A pattern is a fragment of real modelling code (a balance constraint, a KVL +contraction), not a whole model and not an isolated method call. Each is +measured the same way a model is — time and peak memory, through the shared +phases — but parametrised by ``severity`` (0–100, how pathological the data +shape is) instead of ``size``. See :class:`benchmarks.registry.BenchSpec`. + +Importing this package registers every idiom into +:data:`benchmarks.registry.PATTERNS` (mirrors :mod:`benchmarks.models`); adding +a pattern is one new file plus one import below. +""" + +# Side-effect imports — each module calls ``register_pattern(...)`` at import. +from benchmarks.patterns import ( # noqa: F401 + cumsum, + kvl_cycles, + merge_balance, + nodal_balance, + rolling, +) diff --git a/benchmarks/patterns/cumsum.py b/benchmarks/patterns/cumsum.py new file mode 100644 index 00000000..212e96e7 --- /dev/null +++ b/benchmarks/patterns/cumsum.py @@ -0,0 +1,44 @@ +""" +Cumulative-sum fold — ``.cumsum(dim)`` stacks a growing window into ``_term``. + +A running total over time — cumulative energy, a rolling budget: +``(1 * x).cumsum("time")``. linopy currently routes ``cumsum`` through +``rolling(window=full_dim)`` (``expressions.py``), so its ``_term`` grows +triangularly to the dim size. It is benchmarked as its own op — not folded into +``rolling`` — because it is a distinct public op and a natural de-densification +target (a prefix sum need not materialise the triangle), so this is the +instrument that would show such a kernel change land. ``severity`` dials the +size of the cumulated dimension. +""" + +from __future__ import annotations + +import pandas as pd + +import linopy +from benchmarks.registry import SEVERITIES, BenchSpec, register_pattern + +N_ROW = 64 # broadcast/volume dim — the triangular fold is on t, not row +DIM_MAX = 200 + + +def build_cumsum(severity: int) -> linopy.Model: + rows = pd.RangeIndex(N_ROW, name="row") + n = max(2, round(severity / 100 * DIM_MAX)) + + m = linopy.Model() + x = m.add_variables(coords=[rows, pd.RangeIndex(n, name="t")], name="x") + running = (1 * x).cumsum("t") # (row, t); _term grows triangularly to n + m.add_constraints(running == 0, name="cumulative") + m.add_objective((1 * x).sum()) + return m + + +SPEC = register_pattern( + BenchSpec( + name="cumsum", + build=build_cumsum, + sweep=SEVERITIES, + axis="severity", + ) +) diff --git a/benchmarks/patterns/kvl_cycles.py b/benchmarks/patterns/kvl_cycles.py new file mode 100644 index 00000000..5657eedd --- /dev/null +++ b/benchmarks/patterns/kvl_cycles.py @@ -0,0 +1,73 @@ +""" +KVL-cycles pattern — sparse ``@`` densifies the result to a full ``_term`` (#748). + +The idiom: contract a per-branch flow against a (branch × cycle) cycle matrix — +Kirchhoff's voltage law, ``flow @ C``. ``__matmul__`` is ``(flow * C).sum(...)``, +which stacks *every* branch into ``_term`` regardless of whether ``C`` is zero +there. ``severity`` dials ``C``'s sparsity: at 0 it is dense (every branch in +every cycle — nothing to gain), at 100 only ~3 branches per cycle carry a +nonzero (the real grid shape), yet the current kernel still produces +``_term == n_branch``. So the *cost is flat* across severity on today's kernel +— the win from a sparse-aware ``@`` is what grows with it. +""" + +from __future__ import annotations + +import numpy as np +import pandas as pd +import xarray as xr + +import linopy +from benchmarks.registry import SEVERITIES, BenchSpec, register_pattern + +N_BRANCH = 300 +N_CYCLE = 100 +N_TIME = 168 # snapshot horizon — sets the always-paid flat level (the +# densification width is branch; severity dials C's sparsity, which today's +# kernel ignores, so memory stays flat across severity) +MIN_PER_CYCLE = 3 + + +def _cycle_matrix(severity: int, branches: pd.Index, cycles: pd.Index) -> xr.DataArray: + """ + Branch×cycle incidence whose density falls as ``severity`` rises. + + - ``severity == 0`` → dense: every branch participates in every cycle. + - ``severity == 100`` → ~``MIN_PER_CYCLE`` branches per cycle (real KVL). + + Entries are ±1. The number of nonzeros per cycle interpolates linearly + between ``N_BRANCH`` (dense) and ``MIN_PER_CYCLE`` (sparse). + """ + rng = np.random.default_rng(0) + n_branch = len(branches) + per_cycle = round(n_branch - severity / 100 * (n_branch - MIN_PER_CYCLE)) + per_cycle = max(MIN_PER_CYCLE, per_cycle) + c_mat = np.zeros((n_branch, len(cycles))) + for col in range(len(cycles)): + idx = rng.choice(n_branch, size=per_cycle, replace=False) + c_mat[idx, col] = rng.choice([-1.0, 1.0], size=per_cycle) + return xr.DataArray(c_mat, coords=[branches, cycles]) + + +def build_kvl_cycles(severity: int) -> linopy.Model: + branches = pd.RangeIndex(N_BRANCH, name="branch") + cycles = pd.RangeIndex(N_CYCLE, name="cycle") + time = pd.RangeIndex(N_TIME, name="time") + + m = linopy.Model() + flow = m.add_variables(lower=-100, upper=100, coords=[time, branches], name="flow") + cycle_matrix = _cycle_matrix(severity, branches, cycles) + kvl = (flow * cycle_matrix).sum("branch") + m.add_constraints(kvl == 0.0, name="kvl") + m.add_objective(flow.sum()) + return m + + +SPEC = register_pattern( + BenchSpec( + name="kvl_cycles", + build=build_kvl_cycles, + sweep=SEVERITIES, + axis="severity", + ) +) diff --git a/benchmarks/patterns/merge_balance.py b/benchmarks/patterns/merge_balance.py new file mode 100644 index 00000000..84bb1d91 --- /dev/null +++ b/benchmarks/patterns/merge_balance.py @@ -0,0 +1,57 @@ +""" +Ragged merge — concat of mixed-width blocks pads all to the global max (#749). + +The documented build peak: a balance assembled by merging sub-expressions of +*different* ``_term`` widths along a shared dim. PyPSA's nodal balance does +``merge(gen + storage + lines + links, join="outer")`` (the single largest +allocation in a SciGRID build); flixopt's bus balance is the sibling +``sum([flow_rate for flow in flows])``. Merging along a non-``_term`` dim makes +linopy align the ``_term`` axes by padding every block to the widest one — so +one fat block leaves the narrow blocks mostly fill. ``severity`` dials the +widest block's term count. +""" + +from __future__ import annotations + +import pandas as pd + +import linopy +from benchmarks.registry import SEVERITIES, BenchSpec, register_pattern + +N_BLOCKS = 30 +N_ROW = 128 # broadcast/volume dim — the ragged padding is on _term, not row +NARROW = 3 +WIDE = 200 + + +def _block( + m: linopy.Model, rows: pd.Index, name: str, width: int +) -> linopy.LinearExpression: + """A ``(row,)`` expression with ``width`` terms (a ``(row, k)`` var folded over ``k``).""" + k = pd.RangeIndex(width, name=f"k_{name}") + x = m.add_variables(coords=[rows, k], name=name) + return (1 * x).sum(f"k_{name}") + + +def build_merge_balance(severity: int) -> linopy.Model: + rows = pd.RangeIndex(N_ROW, name="row") + widest = max(NARROW, round(NARROW + severity / 100 * (WIDE - NARROW))) + + m = linopy.Model() + blocks = [_block(m, rows, f"narrow{i}", NARROW) for i in range(N_BLOCKS - 1)] + blocks.append(_block(m, rows, "wide", widest)) + + lhs = linopy.merge(blocks, dim="block", join="outer") + m.add_constraints(lhs == 0, name="balance") + m.add_objective(blocks[0]) + return m + + +SPEC = register_pattern( + BenchSpec( + name="merge_balance", + build=build_merge_balance, + sweep=SEVERITIES, + axis="severity", + ) +) diff --git a/benchmarks/patterns/nodal_balance.py b/benchmarks/patterns/nodal_balance.py new file mode 100644 index 00000000..458df39a --- /dev/null +++ b/benchmarks/patterns/nodal_balance.py @@ -0,0 +1,72 @@ +""" +Nodal-balance pattern — grouped-sum padding under bus-connectivity skew (#745). + +The idiom: sum each bus's generators (``groupby(bus).sum()``) and balance the +result against demand. ``LinearExpression.groupby(...).sum()`` pads every group +to the largest group's term count, so as generators concentrate on one hub the +result's ``_term`` axis blows up — most of it fill. ``severity`` dials that +skew; the build's peak memory is expected to climb steeply with it on the +current (dense) kernel. +""" + +from __future__ import annotations + +import numpy as np +import pandas as pd +import xarray as xr + +import linopy +from benchmarks.registry import SEVERITIES, BenchSpec, register_pattern + +N_GEN = 2000 +N_BUS = 50 +N_TIME = 8 # broadcast/volume dim — the groupby pathology is on gen, not time + + +def _bus_of_gen(severity: int) -> np.ndarray: + """ + Assign each generator to a bus, skewed toward one hub by ``severity``. + + - ``severity == 0`` → round-robin: every bus holds ~``N_GEN / N_BUS``. + - ``severity == 100`` → bus 0 holds almost all generators. + + The first ``N_BUS`` generators anchor one bus each, so no bus is ever empty + — the constraint *shape* (``N_BUS`` rows) is fixed across the sweep and only + the per-group term count (the padding) varies. + """ + rng = np.random.default_rng(0) + bus = np.arange(N_GEN) % N_BUS # uniform baseline + anchor = np.zeros(N_GEN, dtype=bool) + anchor[:N_BUS] = True # pin one generator per bus + move = (~anchor) & (rng.random(N_GEN) < severity / 100) + bus[move] = 0 # reassign a severity-fraction of the rest onto the hub + return bus + + +def build_nodal_balance(severity: int) -> linopy.Model: + gens = pd.RangeIndex(N_GEN, name="gen") + time = pd.RangeIndex(N_TIME, name="time") + buses = pd.RangeIndex(N_BUS, name="bus") + rng = np.random.default_rng(1) + + m = linopy.Model() + gen = m.add_variables(lower=0, coords=[gens, time], name="gen") + + bus_of_gen = pd.Series(_bus_of_gen(severity), index=gens, name="bus") + supply = (1 * gen).groupby(bus_of_gen).sum() + demand = xr.DataArray( + rng.uniform(10.0, 100.0, size=(N_BUS, N_TIME)), coords=[buses, time] + ) + m.add_constraints(supply == demand, name="balance") + m.add_objective(gen.sum()) + return m + + +SPEC = register_pattern( + BenchSpec( + name="nodal_balance", + build=build_nodal_balance, + sweep=SEVERITIES, + axis="severity", + ) +) diff --git a/benchmarks/patterns/rolling.py b/benchmarks/patterns/rolling.py new file mode 100644 index 00000000..30065179 --- /dev/null +++ b/benchmarks/patterns/rolling.py @@ -0,0 +1,46 @@ +""" +Rolling-window coupling — ``rolling(K).sum()`` stacks K terms into ``_term``. + +The *windowed* form of intertemporal coupling (unlike the 1-step storage SoC, +this one has a real density dial): minimum up/down time and windowed energy / +ramp limits sum a variable over a sliding window of K timesteps +(PyPSA ``status.rolling(K).sum()`` for min-up-time, ``constraints.py:450``). +``rolling(K).sum()`` builds a result with **K terms per row** — so the window +width is a clean severity dial. ``severity`` dials K from a single step to the +full horizon. +""" + +from __future__ import annotations + +import pandas as pd + +import linopy +from benchmarks.registry import SEVERITIES, BenchSpec, register_pattern + +N_UNIT = 8 # broadcast dim — the window densification is on time, not unit +N_TIME = 1000 +MIN_WINDOW = 1 + + +def build_rolling(severity: int) -> linopy.Model: + units = pd.RangeIndex(N_UNIT, name="unit") + time = pd.RangeIndex(N_TIME, name="time") + window = max(MIN_WINDOW, round(MIN_WINDOW + severity / 100 * (N_TIME - MIN_WINDOW))) + + m = linopy.Model() + status = m.add_variables(lower=0, upper=1, coords=[units, time], name="status") + # min-up-time style: every K-step window carries at most K active steps. + windowed = status.rolling(time=window).sum() + m.add_constraints(windowed <= window, name="window_limit") + m.add_objective(status.sum()) + return m + + +SPEC = register_pattern( + BenchSpec( + name="rolling", + build=build_rolling, + sweep=SEVERITIES, + axis="severity", + ) +) diff --git a/benchmarks/phases.py b/benchmarks/phases.py new file mode 100644 index 00000000..983fb9c1 --- /dev/null +++ b/benchmarks/phases.py @@ -0,0 +1,74 @@ +""" +The measured operations — what each benchmark phase *does to a model*. + +The ``test_.py`` drivers wrap these verbs in ``benchmark(...)``; setup +(building the model, scratch files) stays in the driver, only the verb itself +lives here. +""" + +from __future__ import annotations + +import inspect +from collections.abc import Callable +from pathlib import Path + +import linopy +import linopy.io as lio +from benchmarks.registry import TO_GUROBIPY, TO_HIGHSPY, TO_MOSEK, TO_XPRESS +from linopy import read_netcdf + +# linopy <0.4.1's ``to_file`` doesn't accept ``progress``. Checked once at import +# so the suite stays runnable against older linopy (e.g. cross-version sweeps), +# and the benchmark loop stays branchless. +_TO_FILE_HAS_PROGRESS = "progress" in inspect.signature(linopy.Model.to_file).parameters + +# Re-export so a driver can ``from benchmarks.phases import read_netcdf``. +__all__ = [ + "SOLVER_HANDOFFS", + "read_netcdf", + "touch_matrices", + "write_lp", + "write_netcdf", +] + + +def touch_matrices(m: linopy.Model) -> None: + """Force every matrix block to materialise — the thing we measure.""" + mats = m.matrices + for attr in ("A", "b", "c", "lb", "ub", "sense", "vlabels", "clabels"): + getattr(mats, attr) + if m.is_quadratic: + mats.Q + + +def write_lp(m: linopy.Model, path: Path) -> None: + """ + Write the model as an LP file. + + Where supported, ``progress=False`` is pinned so the progress bar's overhead + doesn't leak into the measurement; linopy <0.4.1 doesn't accept the kwarg. + """ + if _TO_FILE_HAS_PROGRESS: + m.to_file(path, progress=False) + else: + m.to_file(path) + + +def write_netcdf(m: linopy.Model, path: Path) -> None: + m.to_netcdf(path) + + +# (solver_name, registry phase tag, wrapper) — consumed by test_to_solver.py. +# Each wrapper is fetched via ``getattr`` so the tuple silently drops any wrapper +# missing from the installed linopy (e.g. ``to_xpress`` is absent before linopy +# 0.7.1) — keeping the suite runnable on older releases for cross-version sweeps. +SOLVER_HANDOFFS: tuple[tuple[str, str, Callable[[linopy.Model], object]], ...] = tuple( + (name, tag, wrapper) + for name, tag, wrapper in ( + ("highs", TO_HIGHSPY, getattr(lio, "to_highspy", None)), + ("gurobi", TO_GUROBIPY, getattr(lio, "to_gurobipy", None)), + ("mosek", TO_MOSEK, getattr(lio, "to_mosek", None)), + ("xpress", TO_XPRESS, getattr(lio, "to_xpress", None)), + ) + if wrapper is not None +) diff --git a/benchmarks/registry.py b/benchmarks/registry.py new file mode 100644 index 00000000..5f3f98ef --- /dev/null +++ b/benchmarks/registry.py @@ -0,0 +1,137 @@ +""" +Registry of benchmark models and patterns. + +A :class:`BenchSpec` declares how to build a model and which values (sizes for a +model, ``axis="n"``; severities for a pattern, ``axis="severity"``) and phases +it runs; ``register`` / ``register_pattern`` add it to :data:`REGISTRY` / +:data:`PATTERNS`:: + + from benchmarks import REGISTRY + model = REGISTRY["basic"].build(100) +""" + +from __future__ import annotations + +from collections.abc import Callable, Iterable +from dataclasses import dataclass + +import linopy + +# --- Phase tags ------------------------------------------------------------- + +BUILD = "build" +MATRICES = "matrices" +TO_LP = "to_lp" +TO_NETCDF = "to_netcdf" +FROM_NETCDF = "from_netcdf" +TO_HIGHSPY = "to_highspy" +TO_GUROBIPY = "to_gurobipy" +TO_MOSEK = "to_mosek" +TO_XPRESS = "to_xpress" + +ALL_PHASES = frozenset( + { + BUILD, + MATRICES, + TO_LP, + TO_NETCDF, + FROM_NETCDF, + TO_HIGHSPY, + TO_GUROBIPY, + TO_MOSEK, + TO_XPRESS, + } +) + +# The default phase set; a spec overrides with a narrower one when the default +# solvers can't ingest it natively (e.g. native SOS for HiGHS). +DEFAULT_PHASES = ALL_PHASES + +# The severity sweep every pattern runs (axis "severity"). +SEVERITIES: tuple[int, ...] = (0, 50, 100) + + +@dataclass(frozen=True, repr=False) +class BenchSpec: + """ + One benchmark spec. A model is swept over ``sweep`` sizes (``axis="n"``); a + pattern over a 0–100 severity dial (``axis="severity"``). Both build a + :class:`linopy.Model` from one integer and run the same ``phases`` — the + model-vs-pattern distinction lives in :func:`register` vs + :func:`register_pattern` (and the ``models/`` vs ``patterns/`` dirs). + """ + + name: str + build: Callable[[int], linopy.Model] + sweep: tuple[int, ...] + axis: str = "n" + phases: frozenset[str] = DEFAULT_PHASES + requires: tuple[str, ...] = () + + def applies_to(self, phase: str) -> bool: + return phase in self.phases + + def __repr__(self) -> str: + return f"BenchSpec({self.name!r}, axis={self.axis!r}, sweep={self.sweep})" + + +REGISTRY: dict[str, BenchSpec] = {} +PATTERNS: dict[str, BenchSpec] = {} + + +def _validate(spec: BenchSpec, registry: dict[str, BenchSpec], kind: str) -> None: + if spec.name in registry: + raise ValueError(f"{kind} {spec.name!r} already registered") + unknown = spec.phases - ALL_PHASES + if unknown: + raise ValueError(f"{kind} {spec.name!r}: unknown phases {sorted(unknown)}") + + +def register(spec: BenchSpec) -> BenchSpec: + """Add a model ``spec`` to :data:`REGISTRY`. Returns it for chaining.""" + _validate(spec, REGISTRY, "model") + REGISTRY[spec.name] = spec + return spec + + +def register_pattern(spec: BenchSpec) -> BenchSpec: + """Add a pattern ``spec`` (``axis="severity"``) to :data:`PATTERNS`.""" + _validate(spec, PATTERNS, "pattern") + if spec.axis != "severity" or not all(0 <= s <= 100 for s in spec.sweep): + raise ValueError( + f"pattern {spec.name!r}: needs axis='severity' and sweep in [0, 100], " + f"got axis={spec.axis!r} sweep={spec.sweep}" + ) + PATTERNS[spec.name] = spec + return spec + + +def all_specs() -> list[BenchSpec]: + """Every spec in the suite — models then patterns.""" + return [*REGISTRY.values(), *PATTERNS.values()] + + +def iter_params( + phase: str, specs: Iterable[BenchSpec] | None = None +) -> list[tuple[BenchSpec, int]]: + """ + Flatten ``(spec, value)`` pairs for one phase — the pytest parametrize + source. ``specs`` defaults to every model and pattern in the suite. + """ + specs = all_specs() if specs is None else specs + return [ + (spec, value) + for spec in specs + if spec.applies_to(phase) + for value in spec.sweep + ] + + +def spec_param_id(name: str, axis: str, value: object) -> str: + """ + The ``-=`` fragment that fills a test id's ``[...]``. + + Single source of truth for the parametrize-id shape — the pytest param + ids and the solver-handoff ids all build on it. + """ + return f"{name}-{axis}={value}" diff --git a/benchmarks/test_build.py b/benchmarks/test_build.py index f657715e..8d6e536a 100644 --- a/benchmarks/test_build.py +++ b/benchmarks/test_build.py @@ -2,52 +2,17 @@ from __future__ import annotations -import pytest +from collections.abc import Callable +from typing import TYPE_CHECKING -from benchmarks.conftest import skip_if_quick -from benchmarks.models import ( - BASIC_SIZES, - EXPR_SIZES, - KNAPSACK_SIZES, - SPARSE_SIZES, - build_basic, - build_expression_arithmetic, - build_knapsack, - build_sparse_network, -) -from benchmarks.models.pypsa_scigrid import SIZES as PYPSA_SIZES +from benchmarks.conftest import cases, require +from benchmarks.registry import BUILD +if TYPE_CHECKING: + from benchmarks.registry import BenchSpec -@pytest.mark.parametrize("n", BASIC_SIZES, ids=[f"n={n}" for n in BASIC_SIZES]) -def test_build_basic(benchmark, n, request): - skip_if_quick(request, "basic", n) - benchmark(build_basic, n) - -@pytest.mark.parametrize("n", KNAPSACK_SIZES, ids=[f"n={n}" for n in KNAPSACK_SIZES]) -def test_build_knapsack(benchmark, n, request): - skip_if_quick(request, "knapsack", n) - benchmark(build_knapsack, n) - - -@pytest.mark.parametrize("n", EXPR_SIZES, ids=[f"n={n}" for n in EXPR_SIZES]) -def test_build_expression_arithmetic(benchmark, n, request): - skip_if_quick(request, "expression_arithmetic", n) - benchmark(build_expression_arithmetic, n) - - -@pytest.mark.parametrize("n", SPARSE_SIZES, ids=[f"n={n}" for n in SPARSE_SIZES]) -def test_build_sparse_network(benchmark, n, request): - skip_if_quick(request, "sparse_network", n) - benchmark(build_sparse_network, n) - - -@pytest.mark.parametrize( - "snapshots", PYPSA_SIZES, ids=[f"snapshots={s}" for s in PYPSA_SIZES] -) -def test_build_pypsa_scigrid(benchmark, snapshots, request): - pytest.importorskip("pypsa") - skip_if_quick(request, "pypsa_scigrid", snapshots) - from benchmarks.models.pypsa_scigrid import build_pypsa_scigrid - - benchmark(build_pypsa_scigrid, snapshots) +@cases(BUILD) +def test_build(benchmark: Callable[..., object], spec: BenchSpec, n: int) -> None: + require(spec) + benchmark(lambda: spec.build(n)) diff --git a/benchmarks/test_lp_write.py b/benchmarks/test_lp_write.py deleted file mode 100644 index 6442ccd6..00000000 --- a/benchmarks/test_lp_write.py +++ /dev/null @@ -1,63 +0,0 @@ -"""Benchmarks for LP file writing speed.""" - -from __future__ import annotations - -import pytest - -from benchmarks.conftest import skip_if_quick -from benchmarks.models import ( - BASIC_SIZES, - EXPR_SIZES, - KNAPSACK_SIZES, - SPARSE_SIZES, - build_basic, - build_expression_arithmetic, - build_knapsack, - build_sparse_network, -) -from benchmarks.models.pypsa_scigrid import SIZES as PYPSA_SIZES - - -@pytest.mark.parametrize("n", BASIC_SIZES, ids=[f"n={n}" for n in BASIC_SIZES]) -def test_lp_write_basic(benchmark, n, request, tmp_path): - skip_if_quick(request, "basic", n) - m = build_basic(n) - lp_file = tmp_path / "model.lp" - benchmark(m.to_file, lp_file, progress=False) - - -@pytest.mark.parametrize("n", KNAPSACK_SIZES, ids=[f"n={n}" for n in KNAPSACK_SIZES]) -def test_lp_write_knapsack(benchmark, n, request, tmp_path): - skip_if_quick(request, "knapsack", n) - m = build_knapsack(n) - lp_file = tmp_path / "model.lp" - benchmark(m.to_file, lp_file, progress=False) - - -@pytest.mark.parametrize("n", EXPR_SIZES, ids=[f"n={n}" for n in EXPR_SIZES]) -def test_lp_write_expression_arithmetic(benchmark, n, request, tmp_path): - skip_if_quick(request, "expression_arithmetic", n) - m = build_expression_arithmetic(n) - lp_file = tmp_path / "model.lp" - benchmark(m.to_file, lp_file, progress=False) - - -@pytest.mark.parametrize("n", SPARSE_SIZES, ids=[f"n={n}" for n in SPARSE_SIZES]) -def test_lp_write_sparse_network(benchmark, n, request, tmp_path): - skip_if_quick(request, "sparse_network", n) - m = build_sparse_network(n) - lp_file = tmp_path / "model.lp" - benchmark(m.to_file, lp_file, progress=False) - - -@pytest.mark.parametrize( - "snapshots", PYPSA_SIZES, ids=[f"snapshots={s}" for s in PYPSA_SIZES] -) -def test_lp_write_pypsa_scigrid(benchmark, snapshots, request, tmp_path): - pytest.importorskip("pypsa") - skip_if_quick(request, "pypsa_scigrid", snapshots) - from benchmarks.models.pypsa_scigrid import build_pypsa_scigrid - - m = build_pypsa_scigrid(snapshots) - lp_file = tmp_path / "model.lp" - benchmark(m.to_file, lp_file, progress=False) diff --git a/benchmarks/test_matrices.py b/benchmarks/test_matrices.py index 352844fb..a7e61b05 100644 --- a/benchmarks/test_matrices.py +++ b/benchmarks/test_matrices.py @@ -2,48 +2,18 @@ from __future__ import annotations -import pytest +from collections.abc import Callable +from typing import TYPE_CHECKING -from benchmarks.conftest import skip_if_quick -from benchmarks.models import ( - BASIC_SIZES, - EXPR_SIZES, - SPARSE_SIZES, - build_basic, - build_expression_arithmetic, - build_sparse_network, -) +from benchmarks.conftest import build_model, cases +from benchmarks.phases import touch_matrices +from benchmarks.registry import MATRICES +if TYPE_CHECKING: + from benchmarks.registry import BenchSpec -def _access_matrices(m): - """Access all matrix properties to force computation.""" - matrices = m.matrices - _ = matrices.A - _ = matrices.b - _ = matrices.c - _ = matrices.lb - _ = matrices.ub - _ = matrices.sense - _ = matrices.vlabels - _ = matrices.clabels - -@pytest.mark.parametrize("n", BASIC_SIZES, ids=[f"n={n}" for n in BASIC_SIZES]) -def test_matrices_basic(benchmark, n, request): - skip_if_quick(request, "basic", n) - m = build_basic(n) - benchmark(_access_matrices, m) - - -@pytest.mark.parametrize("n", EXPR_SIZES, ids=[f"n={n}" for n in EXPR_SIZES]) -def test_matrices_expression_arithmetic(benchmark, n, request): - skip_if_quick(request, "expression_arithmetic", n) - m = build_expression_arithmetic(n) - benchmark(_access_matrices, m) - - -@pytest.mark.parametrize("n", SPARSE_SIZES, ids=[f"n={n}" for n in SPARSE_SIZES]) -def test_matrices_sparse_network(benchmark, n, request): - skip_if_quick(request, "sparse_network", n) - m = build_sparse_network(n) - benchmark(_access_matrices, m) +@cases(MATRICES) +def test_matrices(benchmark: Callable[..., object], spec: BenchSpec, n: int) -> None: + m = build_model(spec, n) + benchmark(lambda: touch_matrices(m)) diff --git a/benchmarks/test_netcdf.py b/benchmarks/test_netcdf.py new file mode 100644 index 00000000..3764e31d --- /dev/null +++ b/benchmarks/test_netcdf.py @@ -0,0 +1,39 @@ +""" +Benchmarks for the netCDF persistence round-trip. + +We track ``to_netcdf`` and ``read_netcdf`` separately because the cost split +matters in practice: distributed workflows tend to do many reads of a single +written artifact. +""" + +from __future__ import annotations + +from collections.abc import Callable +from typing import TYPE_CHECKING + +from benchmarks.conftest import build_model, cases +from benchmarks.phases import read_netcdf, write_netcdf +from benchmarks.registry import FROM_NETCDF, TO_NETCDF + +if TYPE_CHECKING: + from pathlib import Path + + from benchmarks.registry import BenchSpec + + +@cases(TO_NETCDF) +def test_to_netcdf( + benchmark: Callable[..., object], spec: BenchSpec, n: int, tmp_path: Path +) -> None: + m = build_model(spec, n) + benchmark(lambda: write_netcdf(m, tmp_path / "model.nc")) + + +@cases(FROM_NETCDF) +def test_from_netcdf( + benchmark: Callable[..., object], spec: BenchSpec, n: int, tmp_path: Path +) -> None: + m = build_model(spec, n) + path = tmp_path / "model.nc" + write_netcdf(m, path) # setup — untimed + benchmark(lambda: read_netcdf(path)) diff --git a/benchmarks/test_pipeline.py b/benchmarks/test_pipeline.py new file mode 100644 index 00000000..1033ad2e --- /dev/null +++ b/benchmarks/test_pipeline.py @@ -0,0 +1,38 @@ +""" +End-to-end pipeline benchmark: build → matrices → LP write in one region. + +Opt-in (deselected unless ``--pipeline``): it re-runs the per-phase work and, +unlike the individual phase benchmarks, *includes the model build* — so it +captures the end-to-end cost/peak a real build-then-export session hits, which +can't be recovered by summing the marginal per-phase numbers. Parametrized over +the ``to_lp`` specs (it ends in an LP write). +""" + +from __future__ import annotations + +from collections.abc import Callable +from typing import TYPE_CHECKING + +from benchmarks.conftest import cases, require +from benchmarks.phases import touch_matrices, write_lp +from benchmarks.registry import TO_LP + +if TYPE_CHECKING: + from pathlib import Path + + from benchmarks.registry import BenchSpec + + +@cases(TO_LP) +def test_pipeline( + benchmark: Callable[..., object], spec: BenchSpec, n: int, tmp_path: Path +) -> None: + require(spec) + path = tmp_path / "model.lp" + + def pipeline() -> None: + m = spec.build(n) + touch_matrices(m) + write_lp(m, path) + + benchmark(pipeline) diff --git a/benchmarks/test_pypsa_carbon_management.py b/benchmarks/test_pypsa_carbon_management.py index 7f29a52e..209416ba 100644 --- a/benchmarks/test_pypsa_carbon_management.py +++ b/benchmarks/test_pypsa_carbon_management.py @@ -1,43 +1,60 @@ -import pypsa +from __future__ import annotations + +from collections.abc import Callable +from typing import Any + import pytest import linopy as lp +# pypsa is an optional benchmark dep. Skip the whole module if it's missing +# so the rest of the suite stays collectable without it. +pypsa = pytest.importorskip("pypsa") + @pytest.fixture(scope="module") -def network(): - return pypsa.examples.carbon_management() +def network() -> Any: + try: + return pypsa.examples.carbon_management() + except Exception as exc: # network / example-data drift, not a linopy signal + pytest.skip(f"pypsa example data unavailable: {exc}") -def test_create_model_frozen(benchmark, network): +def test_create_model_frozen(benchmark: Callable[..., object], network: Any) -> None: benchmark(network.optimize.create_model, freeze_constraints=True) -def test_create_model_mutable(benchmark, network): +def test_create_model_mutable(benchmark: Callable[..., object], network: Any) -> None: benchmark(network.optimize.create_model, freeze_constraints=False) @pytest.fixture(scope="module") -def model_frozen(network): +def model_frozen(network: Any) -> Any: return network.optimize.create_model(freeze_constraints=True) @pytest.fixture(scope="module") -def model_mutable(network): +def model_mutable(network: Any) -> Any: return network.optimize.create_model(freeze_constraints=False) -def test_to_highspy_frozen(benchmark, model_frozen): +def test_to_highspy_frozen(benchmark: Callable[..., object], model_frozen: Any) -> None: benchmark(lp.io.to_highspy, model_frozen) -def test_to_highspy_mutable(benchmark, model_mutable): +def test_to_highspy_mutable( + benchmark: Callable[..., object], model_mutable: Any +) -> None: benchmark(lp.io.to_highspy, model_mutable) -def test_to_highspy_mutable_no_names(benchmark, model_mutable): +def test_to_highspy_mutable_no_names( + benchmark: Callable[..., object], model_mutable: Any +) -> None: benchmark(lp.io.to_highspy, model_mutable, set_names=False) -def test_to_highspy_frozen_no_names(benchmark, model_frozen): +def test_to_highspy_frozen_no_names( + benchmark: Callable[..., object], model_frozen: Any +) -> None: benchmark(lp.io.to_highspy, model_frozen, set_names=False) diff --git a/benchmarks/test_to_lp.py b/benchmarks/test_to_lp.py new file mode 100644 index 00000000..2303d7cb --- /dev/null +++ b/benchmarks/test_to_lp.py @@ -0,0 +1,24 @@ +"""Benchmarks for LP file writing speed.""" + +from __future__ import annotations + +from collections.abc import Callable +from typing import TYPE_CHECKING + +from benchmarks.conftest import build_model, cases +from benchmarks.phases import write_lp +from benchmarks.registry import TO_LP + +if TYPE_CHECKING: + from pathlib import Path + + from benchmarks.registry import BenchSpec + + +@cases(TO_LP) +def test_to_lp( + benchmark: Callable[..., object], spec: BenchSpec, n: int, tmp_path: Path +) -> None: + m = build_model(spec, n) + path = tmp_path / "model.lp" + benchmark(lambda: write_lp(m, path)) diff --git a/benchmarks/test_to_solver.py b/benchmarks/test_to_solver.py new file mode 100644 index 00000000..defb14d2 --- /dev/null +++ b/benchmarks/test_to_solver.py @@ -0,0 +1,50 @@ +""" +Benchmarks for solver handoff (model -> native solver instance). + +Times each ``linopy.io.to_`` wrapper. These wrappers delegate to the +same direct-API build path as the new stateful Solver API +(``Solver.from_name(name, model, io_api="direct")``), so the numbers serve +double duty: regression tracking for the wrappers, *and* for the underlying +``Solver._build_direct`` paths. They've also been available for many releases +— using them keeps the suite runnable on older linopy versions. + +The actual ``Solver.solve()`` runtime (i.e. solver-side algorithm time) is +intentionally not benchmarked. +""" + +from __future__ import annotations + +from collections.abc import Callable +from typing import TYPE_CHECKING + +import pytest + +from benchmarks.conftest import build_model +from benchmarks.phases import SOLVER_HANDOFFS +from benchmarks.registry import iter_params, spec_param_id +from linopy.solvers import available_solvers + +if TYPE_CHECKING: + from benchmarks.registry import BenchSpec + +# One case per (available solver wrapper) × (spec, value) it applies to. +_PARAMS = [ + (name, wrapper, spec, n) + for name, tag, wrapper in SOLVER_HANDOFFS + for spec, n in iter_params(tag) +] +_IDS = [f"{name}-{spec_param_id(s.name, s.axis, v)}" for name, _w, s, v in _PARAMS] + + +@pytest.mark.parametrize(("name", "wrapper", "spec", "n"), _PARAMS, ids=_IDS) +def test_to_solver( + benchmark: Callable[..., object], + name: str, + wrapper: Callable[..., object], + spec: BenchSpec, + n: int, +) -> None: + if name not in available_solvers: + pytest.skip(f"{name} not installed") + m = build_model(spec, n) + benchmark(lambda: wrapper(m)) diff --git a/pyproject.toml b/pyproject.toml index 19d0abb3..90434190 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,11 +82,21 @@ dev = [ "highspy", "jupyter", ] +# Perf-relevant deps pinned exactly so run-to-run deltas reflect linopy +# changes, not dependency bumps. benchmarks = [ - "pytest-benchmark", - "pypsa", - "highspy>=1.7.1", - "pytest-memray", + "highspy==1.13.1", + "netcdf4==1.7.4", + "numpy==1.26.4", + "scipy==1.16.3", + "xarray==2025.1.2", + "pandas==2.3.3", + "polars==1.35.2", + "dask==2025.11.0", + "pytest==9.0.3", + "pytest-benchmark==5.2.3", + "pytest-memray==1.8.0", + "pytest-codspeed==5.0.3", ] solvers = [ "gurobipy", @@ -139,7 +149,7 @@ omit = ["test/*"] exclude_also = ["if TYPE_CHECKING:"] [tool.mypy] -exclude = ['dev/*', 'examples/*', 'benchmark/*', 'benchmarks/*', 'doc/*'] +exclude = ['dev/*', 'examples/*', '^benchmark/', 'doc/*'] ignore_missing_imports = true no_implicit_optional = true warn_unused_ignores = true