Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ This README only covers install and how to open the walkthrough.

## Models vs patterns

Two kinds of benchmark spec, same harness (time + peak memory, same phases),
distinguished by their sweep axis:
Two kinds of benchmark spec, same harness (time *or* peak memory — a
`run`/`sweep` `--metric` flag, same phases), distinguished by their sweep axis:

- **Models** (`models/`, `REGISTRY`) — whole `linopy.Model`s swept over
`size` (axis `n`): "how does cost scale with the problem?"
Expand Down
1 change: 0 additions & 1 deletion benchmarks/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from benchmarks.cli import sweep # noqa: F401
from benchmarks.cli import compare # noqa: F401
from benchmarks.cli import plot # noqa: F401
from benchmarks.cli import memory # noqa: F401

# isort: on

Expand Down
27 changes: 18 additions & 9 deletions benchmarks/cli/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,37 @@
Shared app object, types, and helpers for the benchmark CLI.

The command groups (``introspect``, ``run``, ``sweep``, ``compare``,
``plot``, ``memory``) all register onto the single ``app`` defined here, so
the user-facing command surface stays flat (``python -m benchmarks run`` etc.).
``plot``) all register onto the single ``app`` defined here, so the
user-facing command surface stays flat (``python -m benchmarks run`` etc.).
Time vs memory is a ``--metric`` flag on ``run``/``sweep``, not a sub-app.

Note on colour: ``typer.secho`` strips colour automatically when stdout isn't
a TTY, so piping any command into ``grep`` still yields plain text.
"""

from __future__ import annotations

from enum import StrEnum
from typing import Literal

import typer

from benchmarks.snapshot import discover_snapshots


class Measure(StrEnum):
"""
What a measuring command records — orthogonal to the workflow.

``time`` runs pytest-benchmark (wall clock); ``memory`` tracks peak RSS
via memray; ``both`` runs them sequentially (never concurrently — memray's
overhead would skew the wall-clock numbers).
"""

time = "time"
memory = "memory"
both = "both"

app = typer.Typer(
help=(
"Linopy internal benchmark suite — a thin layer over pytest plus "
Expand All @@ -26,13 +42,6 @@
rich_markup_mode="rich",
)

memory_app = typer.Typer(
help="Peak-RSS memory snapshots (pytest-memray under the hood).",
no_args_is_help=True,
)
app.add_typer(memory_app, name="memory")


PhaseName = Literal[
"build", "matrices", "to_lp", "to_netcdf", "from_netcdf", "to_solver"
]
Expand Down
45 changes: 39 additions & 6 deletions benchmarks/cli/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@
)
def compare(ctx: typer.Context) -> None:
"""
Compare timing snapshots side-by-side via ``pytest-benchmark compare``.
Compare two snapshots side-by-side — timing or memory, auto-detected.

Thin wrapper around the upstream tool so the whole suite stays under
one entry point. Pass the snapshot paths first, then any pytest-benchmark
flags::
Timing snapshots wrap ``pytest-benchmark compare``; memory snapshots
(``peak_mib`` key) get a peak-RSS table. Pass the snapshot paths first,
then any pytest-benchmark flags (timing only)::

python -m benchmarks compare a.json b.json
python -m benchmarks compare a.json b.json --group-by=name
Expand All @@ -29,8 +29,9 @@ def compare(ctx: typer.Context) -> None:
With no arguments (or missing paths), prints what snapshots exist
under ``.benchmarks/`` so you can copy-paste the path you want.

For memory snapshots use ``memory compare`` instead — different format,
different tool.
Memory snapshots (``peak_mib`` key) are auto-detected and diffed with a
peak-RSS table; timing snapshots go through pytest-benchmark. The two
can't be mixed in one call.

Implementation note: typer/click don't have a clean idiom for "list-typed
positional + pass-through", so this command parses ``ctx.args`` by hand
Expand Down Expand Up @@ -62,6 +63,38 @@ def compare(ctx: typer.Context) -> None:
_suggest_snapshots(f"missing snapshots: {[str(p) for p in missing]}")
raise typer.Exit(code=2)

# Auto-detect the metric from the snapshots (memory snapshots load as MiB,
# timing as s) and route accordingly — no ``memory compare`` needed.
# ``load_snapshot`` validates each file, so a malformed/unreadable one
# fails here with a clear, file-named message instead of a raw traceback.
from benchmarks.snapshot import load_snapshot

try:
units = [load_snapshot(p)[2] for p in snapshots]
except ValueError as exc:
typer.secho(str(exc), fg=typer.colors.RED, err=True)
raise typer.Exit(code=2) from exc
is_memory = [u == "MiB" for u in units]
if any(is_memory):
if not all(is_memory):
typer.secho(
"can't compare memory and timing snapshots together",
fg=typer.colors.RED,
err=True,
)
raise typer.Exit(code=2)
if len(snapshots) != 2:
typer.secho(
"memory compare takes exactly 2 snapshots",
fg=typer.colors.RED,
err=True,
)
raise typer.Exit(code=2)
from benchmarks.memory import compare_snapshots

compare_snapshots(snapshots[0], snapshots[1])
return

# Override pytest-benchmark's wide default table: ``--group-by=fullname``
# gives each test its own (baseline, candidate) mini-table and
# ``--columns=min,iqr`` shows the noise-floor time plus spread. Applied
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/cli/introspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def _row(label: str, value: object) -> None:
_row("sizes:", spec.sizes)
_row("features:", sorted(spec.features))
_row("phases:", sorted(spec.phases))
_row("quick_threshold:", spec.quick_threshold)
_row("quick:", spec.quick_subset)
_row("long_threshold:", spec.long_threshold)
if spec.requires:
_row("requires:", list(spec.requires))
Expand All @@ -105,7 +105,7 @@ def _row(label: str, value: object) -> None:
_row("severities:", pattern.severities)
_row("description:", pattern.description)
_row("phases:", sorted(pattern.phases))
_row("quick_threshold:", pattern.quick_threshold)
_row("quick:", pattern.quick_subset)
_row("long_threshold:", pattern.long_threshold)
if pattern.requires:
_row("requires:", list(pattern.requires))
Expand Down
169 changes: 0 additions & 169 deletions benchmarks/cli/memory.py

This file was deleted.

Loading
Loading