From 8ae34695b0036ba3eab9cddf9ad65ebf48930454 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Thu, 9 Apr 2026 21:54:23 -0400 Subject: [PATCH 01/13] use dask size calculation --- src/lbench/pytest/fixtures.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/lbench/pytest/fixtures.py b/src/lbench/pytest/fixtures.py index 630da67..1d25670 100644 --- a/src/lbench/pytest/fixtures.py +++ b/src/lbench/pytest/fixtures.py @@ -1,6 +1,8 @@ from pathlib import Path import sys +from dask.sizeof import sizeof + import pytest from pytest import fixture from distributed import Client @@ -57,7 +59,7 @@ def collection_benchmark_func(collection): graph = collection.dask graph_len = len(graph) - graph_size = sum(sys.getsizeof(graph[key]) for key in graph.keys()) + graph_size = sizeof(graph) lbench_dask(run_func) benchmark.extra_info["dask"]["dask_graph_len"] = graph_len From 4b7625a71f54bb8e5c6f3d5bcfbdc2361253b999 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Thu, 9 Apr 2026 23:45:39 -0400 Subject: [PATCH 02/13] disable size calc for performance --- src/lbench/pytest/fixtures.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/lbench/pytest/fixtures.py b/src/lbench/pytest/fixtures.py index 1d25670..e32ae0a 100644 --- a/src/lbench/pytest/fixtures.py +++ b/src/lbench/pytest/fixtures.py @@ -59,10 +59,8 @@ def collection_benchmark_func(collection): graph = collection.dask graph_len = len(graph) - graph_size = sizeof(graph) lbench_dask(run_func) benchmark.extra_info["dask"]["dask_graph_len"] = graph_len - benchmark.extra_info["dask"]["dask_graph_size_bytes"] = graph_size return collection_benchmark_func From 44bbc65b50729cd2fe8ecef63f5693614dc494c2 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Fri, 10 Apr 2026 12:14:33 -0400 Subject: [PATCH 03/13] update metric types --- src/lbench/dashboard/metrics/groups/dask_group.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/lbench/dashboard/metrics/groups/dask_group.py b/src/lbench/dashboard/metrics/groups/dask_group.py index 9eae655..02f8879 100644 --- a/src/lbench/dashboard/metrics/groups/dask_group.py +++ b/src/lbench/dashboard/metrics/groups/dask_group.py @@ -1,6 +1,7 @@ from typing import Optional from lbench.dashboard.metrics import Metric, DurationMetric, MemoryMetric +from lbench.dashboard.metrics.groups.execution_group import CountMetric from lbench.dashboard.metrics.metric_group import MetricGroup @@ -15,7 +16,7 @@ def get_dask_stats(self, benchmark_data: dict) -> Optional[dict]: return None -class DaskTaskCount(DaskMetric): +class DaskTaskCount(DaskMetric, CountMetric): """Number of Dask tasks.""" def __init__(self): @@ -30,11 +31,6 @@ def extract(self, benchmark_data: dict) -> Optional[float]: pass return None - def format_value(self, value: Optional[float]) -> str: - if value is None: - return "-" - return str(int(value)) - class DaskTotalTime(DaskMetric, DurationMetric): """Total Dask execution time.""" @@ -73,7 +69,7 @@ def extract(self, benchmark_data: dict) -> Optional[float]: return None -class DaskGraphLength(DaskMetric): +class DaskGraphLength(DaskMetric, CountMetric): """Size of dask graph""" def __init__(self): From 9e20e93d8c7e64d4d9e3da576c3784b9bec878bb Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Fri, 10 Apr 2026 12:21:01 -0400 Subject: [PATCH 04/13] add size computation --- src/lbench/pytest/fixtures.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/lbench/pytest/fixtures.py b/src/lbench/pytest/fixtures.py index e32ae0a..5cd09f9 100644 --- a/src/lbench/pytest/fixtures.py +++ b/src/lbench/pytest/fixtures.py @@ -54,7 +54,7 @@ def dask_benchmark_func(func, *args, **kwargs): @fixture def lbench_dask_collection(lbench_dask, benchmark): - def collection_benchmark_func(collection): + def collection_benchmark_func(collection, measure_memory=True): run_func = lambda: collection.compute() graph = collection.dask @@ -62,5 +62,8 @@ def collection_benchmark_func(collection): lbench_dask(run_func) benchmark.extra_info["dask"]["dask_graph_len"] = graph_len + if measure_memory: + size = sizeof(graph) + benchmark.extra_info["dask"]["dask_graph_size_bytes"] = size return collection_benchmark_func From cf2192d48288e23494d89926a88e6ddb0a36e2c9 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Fri, 10 Apr 2026 16:23:06 -0400 Subject: [PATCH 05/13] add barchart plot and use dash store for metrics --- src/lbench/dashboard/context.py | 53 ++---- src/lbench/dashboard/layout.py | 15 +- src/lbench/dashboard/layouts/sidebar.py | 6 +- src/lbench/dashboard/layouts/trends.py | 223 ++++++++++++++++++++---- 4 files changed, 219 insertions(+), 78 deletions(-) diff --git a/src/lbench/dashboard/context.py b/src/lbench/dashboard/context.py index 434fc0b..f041519 100644 --- a/src/lbench/dashboard/context.py +++ b/src/lbench/dashboard/context.py @@ -7,12 +7,13 @@ from lbench.dashboard.metrics.benchmark_collection import BenchmarkCollection from lbench.dashboard.metrics.groups import stats_group, execution_group, dask_group, profiling_group -"""Registry for available metrics""" +# Registry for available metrics — constant, built once at startup registry = MetricRegistry() for group in [stats_group, execution_group, dask_group, profiling_group]: registry.register_group(group) -"""Load information about runs""" +# Root directory where benchmark runs are stored — constant +ROOT_DIR = get_lbench_root_dir() def load_run_json(run_dir): @@ -39,58 +40,34 @@ def load_all_runs(root_dir): return dict(sorted(runs.items(), key=lambda kv: kv[1].get("datetime", ""), reverse=True)) +def get_collection(run_data: dict) -> BenchmarkCollection: + """Build a BenchmarkCollection from raw run data (e.g. from run-data-store).""" + return BenchmarkCollection(run_data or {}, registry) + + def rename_run(old_name, new_name): """Rename a benchmark run folder. - Args: - old_name: Current folder name - new_name: New folder name - Returns: - tuple: (success: bool, message: str, new_run_data: dict, new_collection: BenchmarkCollection) + tuple: (success: bool, message: str, new_run_data: dict) """ - global RUN_DATA, BENCHMARK_COLLECTION - - # Validate names if not old_name or not new_name: - return False, "Names cannot be empty", RUN_DATA, BENCHMARK_COLLECTION + return False, "Names cannot be empty", None if old_name == new_name: - return False, "New name is the same as old name", RUN_DATA, BENCHMARK_COLLECTION + return False, "New name is the same as old name", None old_path = ROOT_DIR / old_name new_path = ROOT_DIR / new_name - # Check if old path exists if not old_path.exists(): - return False, f"Run '{old_name}' not found", RUN_DATA, BENCHMARK_COLLECTION + return False, f"Run '{old_name}' not found", None - # Check if new path already exists if new_path.exists(): - return False, f"Run '{new_name}' already exists", RUN_DATA, BENCHMARK_COLLECTION + return False, f"Run '{new_name}' already exists", None try: - # Rename the folder shutil.move(str(old_path), str(new_path)) - - # Reload all run data - new_run_data = load_all_runs(ROOT_DIR) - new_collection = BenchmarkCollection(new_run_data, registry) - - # Update globals - RUN_DATA = new_run_data - BENCHMARK_COLLECTION = new_collection - - return True, f"Successfully renamed '{old_name}' to '{new_name}'", new_run_data, new_collection + return True, f"Successfully renamed '{old_name}' to '{new_name}'", load_all_runs(ROOT_DIR) except Exception as e: - return False, f"Error renaming run: {str(e)}", RUN_DATA, BENCHMARK_COLLECTION - - -# Root directory where benchmark runs are stored -ROOT_DIR = get_lbench_root_dir() - -# Global run data (needs to be defined before importing pages) -RUN_DATA = load_all_runs(ROOT_DIR) - -# Initialize metrics collection -BENCHMARK_COLLECTION = BenchmarkCollection(RUN_DATA, registry) + return False, f"Error renaming run: {str(e)}", None diff --git a/src/lbench/dashboard/layout.py b/src/lbench/dashboard/layout.py index d3a7d55..65993cd 100644 --- a/src/lbench/dashboard/layout.py +++ b/src/lbench/dashboard/layout.py @@ -1,7 +1,7 @@ from dash import html, dcc, Input, Output, callback import dash_bootstrap_components as dbc -from lbench.dashboard.context import RUN_DATA +from lbench.dashboard.context import load_all_runs, ROOT_DIR from lbench.dashboard.layouts.sidebar import sidebar_panel, rename_modal from lbench.dashboard.layouts.tables import tables_panel from lbench.dashboard.layouts.trends import trends_panel @@ -19,8 +19,9 @@ def _navbar(): def _container(): return dbc.Container([ + dcc.Location(id="url", refresh=False), dcc.Store(id="date-filter-store", data={}), - dcc.Store(id="run-data-store", data=RUN_DATA), + dcc.Store(id="run-data-store", data={}), dcc.Store(id="rename-old-name", data=""), dcc.Store(id="right-panel-view", data="tables"), rename_modal(), @@ -50,6 +51,16 @@ def _container(): style={"height": "100vh", "overflow": "hidden", "display": "flex", "flexDirection": "column"}, ) + +@callback( + Output("run-data-store", "data", allow_duplicate=True), + Input("url", "pathname"), + prevent_initial_call="initial_duplicate", +) +def reload_on_page_load(_pathname): + return load_all_runs(ROOT_DIR) + + @callback( Output("tables-view", "style"), Output("trends-view", "style"), diff --git a/src/lbench/dashboard/layouts/sidebar.py b/src/lbench/dashboard/layouts/sidebar.py index 00a95cb..63ee1e6 100644 --- a/src/lbench/dashboard/layouts/sidebar.py +++ b/src/lbench/dashboard/layouts/sidebar.py @@ -3,7 +3,7 @@ from dash import html, Input, Output, State, dcc, callback, no_update import dash_bootstrap_components as dbc -from lbench.dashboard.context import RUN_DATA, rename_run +from lbench.dashboard.context import rename_run from lbench.dashboard.layouts.tables import benchmarks_to_tables @@ -120,7 +120,7 @@ def sidebar_panel(): ), html.Div( id="sidebar-container", - children=create_sidebar(RUN_DATA), + children=create_sidebar({}), style={"overflowY": "auto", "flex": "1", "minHeight": "0"}, ), ], @@ -256,7 +256,7 @@ def handle_rename(edit_clicks, cancel_clicks, confirm_clicks, old_name, new_name return False, "", "", "", no_update if triggered_id == "rename-confirm-btn" and confirm_clicks: - success, message, new_run_data, _ = rename_run(old_name, new_name) + success, message, new_run_data = rename_run(old_name, new_name) if success: return False, "", "", "", new_run_data return no_update, no_update, no_update, message, no_update diff --git a/src/lbench/dashboard/layouts/trends.py b/src/lbench/dashboard/layouts/trends.py index 152b226..f9dd58f 100644 --- a/src/lbench/dashboard/layouts/trends.py +++ b/src/lbench/dashboard/layouts/trends.py @@ -3,7 +3,7 @@ from dash import dcc, html, Input, Output, callback import dash_bootstrap_components as dbc -from lbench.dashboard.context import registry, BENCHMARK_COLLECTION +from lbench.dashboard.context import registry, get_collection def trends_panel(): @@ -17,7 +17,7 @@ def trends_panel(): html.Label("Select benchmarks:", className="fw-bold"), dcc.Dropdown( id="benchmark-selector", - options=[{"label": b, "value": b} for b in BENCHMARK_COLLECTION.get_benchmark_names()], + options=[], placeholder="Select one or more benchmarks", multi=True, ), @@ -26,20 +26,57 @@ def trends_panel(): ), dbc.Col( [ - html.Label("Select metric:", className="fw-bold"), + html.Label("Select metrics:", className="fw-bold"), dcc.Dropdown( id="metric-selector", - options=[{"label": m.display_name, "value": m.name} for m in BENCHMARK_COLLECTION.get_common_metrics()], - value="mean", - placeholder="Select a metric", + options=[], + value=["mean"], + placeholder="Select one or more metrics", + multi=True, ), ], - width=6, + width=4, + ), + dbc.Col( + [ + html.Label("Chart type:", className="fw-bold"), + dbc.RadioItems( + id="chart-type-selector", + options=[ + {"label": "Line", "value": "line"}, + {"label": "Bar", "value": "bar"}, + ], + value="line", + inline=True, + ), + ], + width=2, + className="d-flex flex-column justify-content-start", ), ], className="mb-3", style={"flexShrink": "0"}, ), + dbc.Row( + [ + dbc.Col( + [ + html.Label("Select runs:", className="fw-bold"), + dcc.Dropdown( + id="bar-run-selector", + options=[], + value=[], + multi=True, + placeholder="Select runs to include", + ), + ], + width=12, + ), + ], + id="bar-run-selector-row", + className="mb-3", + style={"display": "none", "flexShrink": "0"}, + ), dcc.Graph( id="trend-plot", figure={"layout": {"title": "Select a benchmark and metric to view trends"}}, @@ -66,47 +103,146 @@ def _apply_date_filter(df: pd.DataFrame, date_filter: dict) -> pd.DataFrame: return df[mask] +@callback( + Output("benchmark-selector", "options"), + Output("metric-selector", "options"), + Output("bar-run-selector", "options"), + Output("bar-run-selector", "value"), + Input("run-data-store", "data"), +) +def refresh_trend_options(run_data): + collection = get_collection(run_data) + benchmark_options = [{"label": b, "value": b} for b in collection.get_benchmark_names()] + metric_options = [{"label": m.display_name, "value": m.name} for m in collection.get_common_metrics()] + run_ids = list((run_data or {}).keys()) + run_options = [{"label": r, "value": r} for r in run_ids] + return benchmark_options, metric_options, run_options, run_ids + + +@callback( + Output("bar-run-selector-row", "style"), + Input("chart-type-selector", "value"), +) +def toggle_run_selector(chart_type): + if chart_type == "bar": + return {"flexShrink": "0"} + return {"display": "none", "flexShrink": "0"} + + @callback( Output("trend-plot", "figure"), Input("benchmark-selector", "value"), Input("metric-selector", "value"), Input("date-filter-store", "data"), + Input("chart-type-selector", "value"), + Input("bar-run-selector", "value"), + Input("run-data-store", "data"), ) -def update_trend_plot(selected_benchmarks, selected_metric_name, date_filter): - if not selected_benchmarks or not selected_metric_name: - return {"layout": {"title": "Select one or more benchmarks and a metric to view trends"}} +def update_trend_plot(selected_benchmarks, selected_metric_names, date_filter, chart_type, selected_runs, run_data): + if not selected_benchmarks or not selected_metric_names: + return {"layout": {"title": "Select one or more benchmarks and metrics to view trends"}} + + if isinstance(selected_metric_names, str): + selected_metric_names = [selected_metric_names] + + collection = get_collection(run_data) + + # Build (metric, series, scale, unit) tuples, skipping metrics with no data + metrics_data = [] + for metric_name in selected_metric_names: + metric = registry.get(metric_name) + if not metric: + continue + series = { + b: _apply_date_filter(collection.get_metric_series(b, metric), date_filter) + for b in selected_benchmarks + } + series = {b: df for b, df in series.items() if not df.empty} + if not series: + continue + all_values = pd.concat([df["value"] for df in series.values()]) + scale, unit = metric.get_plot_scale_and_unit(all_values) + metrics_data.append((metric, series, scale, unit)) + + if not metrics_data: + return {"layout": {"title": "No data available for the selected benchmarks and metrics"}} + + # Assign y-axes: group metrics by unit, up to 2 axes (left/right) + unit_to_axis: dict[str, int] = {} + metric_axis: dict[str, int] = {} + for metric, series, scale, unit in metrics_data: + if unit not in unit_to_axis: + unit_to_axis[unit] = min(len(unit_to_axis) + 1, 2) + metric_axis[metric.name] = unit_to_axis[unit] - metric = registry.get(selected_metric_name) - if not metric: - return {"layout": {"title": f"Metric '{selected_metric_name}' not found"}} + axis_labels: dict[int, list[str]] = {} + for metric, _, _, unit in metrics_data: + ax = metric_axis[metric.name] + label = metric.display_name + (f" ({unit})" if unit else "") + if ax not in axis_labels: + axis_labels[ax] = [] + if label not in axis_labels[ax]: + axis_labels[ax].append(label) + multi_metric = len(metrics_data) > 1 fig = go.Figure() - series = { - b: _apply_date_filter(BENCHMARK_COLLECTION.get_metric_series(b, metric), date_filter) - for b in selected_benchmarks + for metric, series, scale, unit in metrics_data: + # Bar charts don't support multiple y-axes with grouped bars — always use y1 + ax = metric_axis[metric.name] if chart_type == "line" else 1 + yaxis_ref = "y" if ax == 1 else "y2" + + if chart_type == "bar": + traces = _make_bar_traces(series, scale, selected_runs, metric, multi_metric, yaxis_ref) + else: + traces = _make_line_traces(series, scale, metric, date_filter, multi_metric, yaxis_ref, collection) + + for trace in traces: + fig.add_trace(trace) + + metric_names_str = " / ".join(m.display_name for m, *_ in metrics_data) + title_prefix = "Comparison" if chart_type == "bar" else "Trends" + + layout_kwargs = { + "title": f"{title_prefix}: {metric_names_str}", + "xaxis_title": "Run", + "yaxis": {"title": " / ".join(axis_labels.get(1, []))}, + "legend": {"orientation": "h", "yanchor": "top", "y": -0.1, "xanchor": "center", "x": 0.5}, } - series = {b: df for b, df in series.items() if not df.empty} - if not series: - return {"layout": {"title": "No data available for the selected benchmarks and metric"}} + if chart_type == "line" and 2 in axis_labels: + layout_kwargs["yaxis2"] = { + "title": " / ".join(axis_labels[2]), + "overlaying": "y", + "side": "right", + } + + if chart_type == "line": + layout_kwargs["hovermode"] = "x unified" + if chart_type == "bar": + layout_kwargs["barmode"] = "group" + + fig.update_layout(**layout_kwargs) + return fig - all_values = pd.concat([df["value"] for df in series.values()]) - scale, plot_unit = metric.get_plot_scale_and_unit(all_values) +def _make_line_traces(series, scale, metric, date_filter, multi_metric, yaxis_ref, collection): + traces = [] for benchmark, df in series.items(): + name = f"{benchmark} ({metric.display_name})" if multi_metric else benchmark trace_kwargs = { "x": df["timestamp"], "y": df["value"] / scale, "mode": "lines+markers", - "name": benchmark, + "name": name, + "yaxis": yaxis_ref, } error_bar_config = metric.get_error_bar_config() if error_bar_config: error_bar_metric = error_bar_config["metric"] error_df = _apply_date_filter( - BENCHMARK_COLLECTION.get_metric_series(benchmark, error_bar_metric), date_filter + collection.get_metric_series(benchmark, error_bar_metric), date_filter ) if not error_df.empty: merged = df.merge(error_df, on=["run_id", "timestamp"], suffixes=("", "_error")) @@ -115,17 +251,34 @@ def update_trend_plot(selected_benchmarks, selected_metric_name, date_filter): trace_kwargs["x"] = merged["timestamp"] trace_kwargs["y"] = merged["value"] / scale - fig.add_trace(go.Scatter(**trace_kwargs)) + traces.append(go.Scatter(**trace_kwargs)) + return traces - y_axis_label = metric.display_name - if plot_unit: - y_axis_label += f" ({plot_unit})" - fig.update_layout( - xaxis_title="Run", - yaxis_title=y_axis_label, - hovermode="x unified", - title=f"Trends: {metric.display_name}", - legend={"orientation": "h", "yanchor": "top", "y": -0.1, "xanchor": "center", "x": 0.5}, - ) - return fig +def _make_bar_traces(series, scale, selected_runs, metric, multi_metric, yaxis_ref): + run_ids_in_data = set() + for df in series.values(): + run_ids_in_data.update(df["run_id"].tolist()) + + if selected_runs: + run_ids = [r for r in selected_runs if r in run_ids_in_data] + else: + run_ids = sorted(run_ids_in_data) + + if not run_ids: + return [] + + traces = [] + for benchmark, df in series.items(): + name = f"{benchmark} ({metric.display_name})" if multi_metric else benchmark + df_filtered = df[df["run_id"].isin(run_ids)] + df_filtered = df_filtered.set_index("run_id").reindex(run_ids).reset_index() + traces.append( + go.Bar( + x=df_filtered["run_id"], + y=df_filtered["value"] / scale, + name=name, + yaxis=yaxis_ref, + ) + ) + return traces From 425eb940f43500c2741db3994a27d6a32deb9ca1 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Fri, 10 Apr 2026 17:05:54 -0400 Subject: [PATCH 06/13] hydrate with ppt --- .copier-answers.yml | 26 +++ .git_archival.txt | 4 + .gitattributes | 24 +++ .github/ISSUE_TEMPLATE/0-general_issue.md | 8 + .github/ISSUE_TEMPLATE/1-bug_report.md | 28 +++ .github/ISSUE_TEMPLATE/2-feature_request.md | 18 ++ .github/ISSUE_TEMPLATE/README.md | 9 + .github/dependabot.yml | 10 + .github/pull_request_template.md | 13 ++ .github/workflows/README.md | 9 + .github/workflows/build-documentation.yml | 41 +++++ .github/workflows/pre-commit-ci.yml | 37 ++++ .github/workflows/publish-to-pypi.yml | 38 ++++ .github/workflows/smoke-test.yml | 43 +++++ .github/workflows/testing-and-coverage.yml | 40 ++++ .gitignore | 151 +++++++++++++++ .pre-commit-config.yaml | 85 +++++++++ .readthedocs.yml | 24 +++ .setup_dev.sh | 51 ++++++ LICENSE | 21 +++ Untitled.ipynb | 68 +++++++ docs/Makefile | 31 ++++ docs/conf.py | 58 ++++++ docs/index.rst | 53 ++++++ docs/notebooks.rst | 6 + docs/notebooks/README.md | 25 +++ docs/notebooks/intro_notebook.ipynb | 84 +++++++++ docs/pre_executed/README.md | 16 ++ docs/requirements.txt | 10 + pyproject.toml | 92 +++++++++- requirements.txt | 0 src/lbench/__init__.py | 3 +- src/lbench/notebook/__init__.py | 7 + src/lbench/notebook/magic.py | 192 ++++++++++++++++++++ tests/lbench/conftest.py | 0 tests/lbench/test_packaging.py | 6 + 36 files changed, 1327 insertions(+), 4 deletions(-) create mode 100644 .copier-answers.yml create mode 100644 .git_archival.txt create mode 100644 .gitattributes create mode 100644 .github/ISSUE_TEMPLATE/0-general_issue.md create mode 100644 .github/ISSUE_TEMPLATE/1-bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/2-feature_request.md create mode 100644 .github/ISSUE_TEMPLATE/README.md create mode 100644 .github/dependabot.yml create mode 100644 .github/pull_request_template.md create mode 100644 .github/workflows/README.md create mode 100644 .github/workflows/build-documentation.yml create mode 100644 .github/workflows/pre-commit-ci.yml create mode 100644 .github/workflows/publish-to-pypi.yml create mode 100644 .github/workflows/smoke-test.yml create mode 100644 .github/workflows/testing-and-coverage.yml create mode 100644 .pre-commit-config.yaml create mode 100644 .readthedocs.yml create mode 100755 .setup_dev.sh create mode 100644 LICENSE create mode 100644 Untitled.ipynb create mode 100644 docs/Makefile create mode 100644 docs/conf.py create mode 100644 docs/index.rst create mode 100644 docs/notebooks.rst create mode 100644 docs/notebooks/README.md create mode 100644 docs/notebooks/intro_notebook.ipynb create mode 100644 docs/pre_executed/README.md create mode 100644 docs/requirements.txt create mode 100644 requirements.txt create mode 100644 src/lbench/notebook/__init__.py create mode 100644 src/lbench/notebook/magic.py create mode 100644 tests/lbench/conftest.py create mode 100644 tests/lbench/test_packaging.py diff --git a/.copier-answers.yml b/.copier-answers.yml new file mode 100644 index 0000000..212da10 --- /dev/null +++ b/.copier-answers.yml @@ -0,0 +1,26 @@ +# Changes here will be overwritten by Copier +_commit: v2.2.0 +_src_path: gh:lincc-frameworks/python-project-template +author_email: seanmcgu@andrew.cmu.edu +author_name: LINCC Frameworks +create_example_module: false +custom_install: custom +enforce_style: +- ruff_lint +- ruff_format +failure_notification: [] +include_benchmarks: false +include_docs: true +include_notebooks: true +mypy_type_checking: none +package_name: lbench +project_description: Benchmarking tools for LSDB +project_license: MIT +project_name: lbench +project_organization: lincc-frameworks +python_versions: +- '3.10' +- '3.11' +- '3.12' +- '3.13' +test_lowest_version: none diff --git a/.git_archival.txt b/.git_archival.txt new file mode 100644 index 0000000..b1a286b --- /dev/null +++ b/.git_archival.txt @@ -0,0 +1,4 @@ +node: $Format:%H$ +node-date: $Format:%cI$ +describe-name: $Format:%(describe:tags=true,match=*[0-9]*)$ +ref-names: $Format:%D$ \ No newline at end of file diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..343a755 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,24 @@ +# For explanation of this file and uses see +# https://git-scm.com/docs/gitattributes +# https://developer.lsst.io/git/git-lfs.html#using-git-lfs-enabled-repositories +# https://lincc-ppt.readthedocs.io/en/latest/practices/git-lfs.html +# +# Used by https://github.com/lsst/afwdata.git +# *.boost filter=lfs diff=lfs merge=lfs -text +# *.dat filter=lfs diff=lfs merge=lfs -text +# *.fits filter=lfs diff=lfs merge=lfs -text +# *.gz filter=lfs diff=lfs merge=lfs -text +# +# apache parquet files +# *.parq filter=lfs diff=lfs merge=lfs -text +# +# sqlite files +# *.sqlite3 filter=lfs diff=lfs merge=lfs -text +# +# gzip files +# *.gz filter=lfs diff=lfs merge=lfs -text +# +# png image files +# *.png filter=lfs diff=lfs merge=lfs -text + +.git_archival.txt export-subst \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/0-general_issue.md b/.github/ISSUE_TEMPLATE/0-general_issue.md new file mode 100644 index 0000000..84bb0d7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/0-general_issue.md @@ -0,0 +1,8 @@ +--- +name: General issue +about: Quickly create a general issue +title: '' +labels: '' +assignees: '' + +--- \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/1-bug_report.md b/.github/ISSUE_TEMPLATE/1-bug_report.md new file mode 100644 index 0000000..eaa7049 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/1-bug_report.md @@ -0,0 +1,28 @@ +--- +name: Bug report +about: Tell us about a problem to fix +title: 'Short description' +labels: 'bug' +assignees: '' + +--- +**Bug report** + + +**Environment Information** + + +
+Traceback + +FILL IN YOUR STACK TRACE HERE + +
+ +**Before submitting** +Please check the following: + +- [ ] I have described the situation in which the bug arose, including what code was executed, and any applicable data others will need to reproduce the problem. +- [ ] I have included information about my environment, including the version of this package (e.g. `lbench.__version__`) +- [ ] I have included available evidence of the unexpected behavior (including error messages, screenshots, and/or plots) as well as a description of what I expected instead. +- [ ] If I have a solution in mind, I have provided an explanation and/or pseudocode and/or task list. diff --git a/.github/ISSUE_TEMPLATE/2-feature_request.md b/.github/ISSUE_TEMPLATE/2-feature_request.md new file mode 100644 index 0000000..908ff72 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/2-feature_request.md @@ -0,0 +1,18 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: 'Short description' +labels: 'enhancement' +assignees: '' + +--- + +**Feature request** + + +**Before submitting** +Please check the following: + +- [ ] I have described the purpose of the suggested change, specifying what I need the enhancement to accomplish, i.e. what problem it solves. +- [ ] I have included any relevant links, screenshots, environment information, and data relevant to implementing the requested feature, as well as pseudocode for how I want to access the new functionality. +- [ ] If I have ideas for how the new feature could be implemented, I have provided explanations and/or pseudocode and/or task lists for the steps. diff --git a/.github/ISSUE_TEMPLATE/README.md b/.github/ISSUE_TEMPLATE/README.md new file mode 100644 index 0000000..46dc08e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/README.md @@ -0,0 +1,9 @@ +# Configurations + +Templates for various different issue types are defined in this directory +and a pull request template is defined as ``../pull_request_template.md``. Adding, +removing, and modifying these templates to suit the needs of your project is encouraged. + +For more information about these templates, look here: https://lincc-ppt.readthedocs.io/en/latest/practices/issue_pr_templating.html + +Or if you still have questions contact us: https://lincc-ppt.readthedocs.io/en/latest/source/contact.html \ No newline at end of file diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..3b5ca19 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,10 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "monthly" diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..5f341fd --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,13 @@ +## Change Description + + +## Solution Description + + +## Code Quality +- [ ] I have read the Contribution Guide and agree to the Code of Conduct +- [ ] My code follows the code style of this project +- [ ] My code builds (or compiles) cleanly without any errors or warnings +- [ ] My code contains relevant comments and necessary documentation diff --git a/.github/workflows/README.md b/.github/workflows/README.md new file mode 100644 index 0000000..e34a71f --- /dev/null +++ b/.github/workflows/README.md @@ -0,0 +1,9 @@ +# Workflows + +The .yml files in this directory are used to define the various continuous +integration scripts that will be run on your behalf e.g. nightly as a smoke check, +or when you create a new PR. + +For more information about CI and workflows, look here: https://lincc-ppt.readthedocs.io/en/latest/practices/ci.html + +Or if you still have questions contact us: https://lincc-ppt.readthedocs.io/en/latest/source/contact.html \ No newline at end of file diff --git a/.github/workflows/build-documentation.yml b/.github/workflows/build-documentation.yml new file mode 100644 index 0000000..e848269 --- /dev/null +++ b/.github/workflows/build-documentation.yml @@ -0,0 +1,41 @@ + +# This workflow will install Python dependencies, build the package and then build the documentation. + +name: Build documentation + + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v6 + - name: Set up Python 3.11 + uses: actions/setup-python@v6 + with: + python-version: '3.11' + - name: Install uv + uses: astral-sh/setup-uv@v7 + - name: Install dependencies + run: | + sudo apt-get update + uv pip install --system -e . + if [ -f docs/requirements.txt ]; then uv pip install --system -r docs/requirements.txt; fi + if [ -f requirements.txt ]; then uv pip install --system -r requirements.txt; fi + - name: Install notebook requirements + run: | + sudo apt-get install pandoc + - name: Build docs + run: | + sphinx-build -T -E -b html -d docs/build/doctrees ./docs docs/build/html diff --git a/.github/workflows/pre-commit-ci.yml b/.github/workflows/pre-commit-ci.yml new file mode 100644 index 0000000..392feed --- /dev/null +++ b/.github/workflows/pre-commit-ci.yml @@ -0,0 +1,37 @@ + +# This workflow runs pre-commit hooks on pushes and pull requests to main +# to enforce coding style. To ensure correct configuration, please refer to: +# https://lincc-ppt.readthedocs.io/en/latest/practices/ci_precommit.html +name: Run pre-commit hooks + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + pre-commit-ci: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.11' + - name: Install uv + uses: astral-sh/setup-uv@v7 + - name: Install dependencies + run: | + sudo apt-get update + uv pip install --system .[dev] + if [ -f requirements.txt ]; then uv pip install --system -r requirements.txt; fi + - uses: pre-commit/action@v3.0.1 + with: + extra_args: --all-files --verbose + env: + SKIP: "check-lincc-frameworks-template-version,no-commit-to-branch,check-added-large-files,validate-pyproject,sphinx-build,pytest-check" + - uses: pre-commit-ci/lite-action@v1.1.0 + if: failure() && github.event_name == 'pull_request' && github.event.pull_request.draft == false \ No newline at end of file diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml new file mode 100644 index 0000000..8bfbcbc --- /dev/null +++ b/.github/workflows/publish-to-pypi.yml @@ -0,0 +1,38 @@ + +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://github.com/pypa/gh-action-pypi-publish#trusted-publishing + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +name: Upload Python Package + +on: + release: + types: [published] + +permissions: + contents: read + +jobs: + deploy: + + runs-on: ubuntu-latest + permissions: + id-token: write + steps: + - uses: actions/checkout@v6 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.11' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build package + run: python -m build + - name: Publish package + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/smoke-test.yml b/.github/workflows/smoke-test.yml new file mode 100644 index 0000000..763208a --- /dev/null +++ b/.github/workflows/smoke-test.yml @@ -0,0 +1,43 @@ +# This workflow will run daily at 06:45. +# It will install Python dependencies and run tests with a variety of Python versions. +# See documentation for help debugging smoke test issues: +# https://lincc-ppt.readthedocs.io/en/latest/practices/ci_testing.html#version-culprit + +name: Unit test smoke test + +on: + + # Runs this workflow automatically + schedule: + - cron: 45 6 * * * + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.10', '3.11', '3.12', '3.13'] + + steps: + - uses: actions/checkout@v6 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + - name: Install uv + uses: astral-sh/setup-uv@v7 + - name: Install dependencies + run: | + sudo apt-get update + uv pip install --system -e .[dev] + if [ -f requirements.txt ]; then uv pip install --system -r requirements.txt; fi + - name: List dependencies + run: | + pip list + - name: Run unit tests with pytest + run: | + python -m pytest \ No newline at end of file diff --git a/.github/workflows/testing-and-coverage.yml b/.github/workflows/testing-and-coverage.yml new file mode 100644 index 0000000..bb1fd39 --- /dev/null +++ b/.github/workflows/testing-and-coverage.yml @@ -0,0 +1,40 @@ + +# This workflow will install Python dependencies, run tests and report code coverage with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Unit test and code coverage + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.10', '3.11', '3.12', '3.13'] + + steps: + - uses: actions/checkout@v6 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + - name: Install uv + uses: astral-sh/setup-uv@v7 + - name: Install dependencies + run: | + sudo apt-get update + uv pip install --system -e .[dev] + if [ -f requirements.txt ]; then uv pip install --system -r requirements.txt; fi + - name: Run unit tests with pytest + run: | + python -m pytest --cov=lbench --cov-report=xml + - name: Upload coverage report to codecov + uses: codecov/codecov-action@v5 + with: + token: ${{ secrets.CODECOV_TOKEN }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 796ded3..36c8240 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,154 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ *.egg-info/ +.installed.cfg +*.egg +MANIFEST +_version.py + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ +_readthedocs/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# vscode +.vscode/ + +# dask +dask-worker-space/ + +# tmp directory +tmp/ + +# Mac OS +.DS_Store + +# Airspeed Velocity performance results +_results/ +_html/ + +# Project initialization script +.initialize_new_project.sh + *.cpython*.pyc *cprofile*.prof *pytest-benchmark.json @@ -7,4 +157,5 @@ __pycache__/ .idea/ .ipynb_checkpoints/ dask_performance_report*.html + lbench_results/ \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..74b691a --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,85 @@ + +repos: + # Compare the local template version to the latest remote template version + # This hook should always pass. It will print a message if the local version + # is out of date. + - repo: https://github.com/lincc-frameworks/pre-commit-hooks + rev: v0.2.2 + hooks: + - id: check-lincc-frameworks-template-version + name: Check template version + description: Compare current template version against latest + verbose: true + # Clear output from jupyter notebooks so that only the input cells are committed. + - repo: local + hooks: + - id: jupyter-nb-clear-output + name: Clear output from Jupyter notebooks + description: Clear output from Jupyter notebooks. + files: \.ipynb$ + exclude: ^docs/pre_executed + stages: [pre-commit] + language: system + entry: jupyter nbconvert --clear-output + # Prevents committing directly branches named 'main' and 'master'. + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: no-commit-to-branch + name: Prevent main branch commits + description: Prevent the user from committing directly to the primary branch. + - id: check-added-large-files + name: Check for large files + description: Prevent the user from committing very large files. + args: ['--maxkb=500'] + # Verify that pyproject.toml is well formed + - repo: https://github.com/abravalheri/validate-pyproject + rev: v0.24.1 + hooks: + - id: validate-pyproject + name: Validate pyproject.toml + description: Verify that pyproject.toml adheres to the established schema. + # Verify that GitHub workflows are well formed + - repo: https://github.com/python-jsonschema/check-jsonschema + rev: 0.28.0 + hooks: + - id: check-github-workflows + args: ["--verbose"] + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.2.1 + hooks: + - id: ruff + name: Lint code using ruff; sort and organize imports + types_or: [ python, pyi ] + args: ["--fix"] + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.2.1 + hooks: + - id: ruff-format + name: Format code using ruff + types_or: [ python, pyi, jupyter ] + - repo: https://github.com/lincc-frameworks/pre-commit-hooks + rev: v0.2.2 + hooks: + - id: pre-executed-nb-never-execute + name: Check pre-executed notebooks + files: ^docs/pre_executed/.*\.ipynb$ + verbose: true + args: + ["docs/pre_executed/"] + # Run unit tests, verify that they pass. Note that coverage is run against + # the ./src directory here because that is what will be committed. In the + # github workflow script, the coverage is run against the installed package + # and uploaded to Codecov by calling pytest like so: + # `python -m pytest --cov= --cov-report=xml` + - repo: local + hooks: + - id: pytest-check + name: Run unit tests + description: Run unit tests with pytest. + entry: bash -c "if python -m pytest --co -qq; then python -m pytest --cov=./src --cov-report=html; fi" + language: system + pass_filenames: false + always_run: true diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000..3d200a7 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,24 @@ + +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +build: + os: ubuntu-22.04 + tools: + python: "3.11" + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Optionally declare the Python requirements required to build your docs +python: + install: + - requirements: docs/requirements.txt + - requirements: requirements.txt + - method: pip + path: . diff --git a/.setup_dev.sh b/.setup_dev.sh new file mode 100755 index 0000000..5286e41 --- /dev/null +++ b/.setup_dev.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +# Bash Unofficial strict mode (http://redsymbol.net/articles/unofficial-bash-strict-mode/) +# and (https://disconnected.systems/blog/another-bash-strict-mode/) +set -o nounset # Any uninitialized variable is an error +set -o errexit # Exit the script on the failure of any command to execute without error +set -o pipefail # Fail command pipelines on the failure of any individual step +IFS=$'\n\t' #set internal field separator to avoid iteration errors +# Trap all exits and output something helpful +trap 's=$?; echo "$0: Error on line "$LINENO": $BASH_COMMAND"; exit $s' ERR + +# This script should be run by new developers to install this package in +# editable mode and configure their local environment + +echo "Checking virtual environment" +if [ "${VIRTUAL_ENV:-missing}" = "missing" ] && [ "${CONDA_PREFIX:-missing}" = "missing" ]; then + echo 'No virtual environment detected: none of $VIRTUAL_ENV or $CONDA_PREFIX is set.' + echo + echo "=== This script is going to install the project in the system python environment ===" + echo "Proceed? [y/N]" + read -r RESPONCE + if [ "${RESPONCE}" != "y" ]; then + echo "See https://lincc-ppt.readthedocs.io/ for details." + echo "Exiting." + exit 1 + fi + +fi + +echo "Checking pip version" +MINIMUM_PIP_VERSION=22 +pipversion=( $(python -m pip --version | awk '{print $2}' | sed 's/\./\n\t/g') ) +if let "${pipversion[0]}<${MINIMUM_PIP_VERSION}"; then + echo "Insufficient version of pip found. Requires at least version ${MINIMUM_PIP_VERSION}." + echo "See https://lincc-ppt.readthedocs.io/ for details." + exit 1 +fi + +echo "Installing package and runtime dependencies in local environment" +python -m pip install -e . > /dev/null + +echo "Installing developer dependencies in local environment" +python -m pip install -e .'[dev]' > /dev/null +if [ -f docs/requirements.txt ]; then python -m pip install -r docs/requirements.txt > /dev/null; fi + +echo "Installing pre-commit" +pre-commit install > /dev/null + +####################################################### +# Include any additional configurations below this line +####################################################### diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f96e87e --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 LINCC Frameworks + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 0000000..12b5193 --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,68 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "63a26342-fa6f-496a-9341-cadc74641be9", + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'IPythonKernel' object has no attribute 'do_one_iteration'", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mAttributeError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mlbench\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdashboard\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mapp\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m run_dashboard\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/code/lsdb-benchmarking/src/lbench/dashboard/app.py:25\u001b[39m\n\u001b[32m 21\u001b[39m ROOT_DIR = get_lbench_root_dir()\n\u001b[32m 23\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mdash\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m jupyter_dash\n\u001b[32m---> \u001b[39m\u001b[32m25\u001b[39m \u001b[43mjupyter_dash\u001b[49m\u001b[43m.\u001b[49m\u001b[43minfer_jupyter_proxy_config\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 28\u001b[39m \u001b[38;5;66;03m# --- Load and cache runs ---\u001b[39;00m\n\u001b[32m 29\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mload_run_json\u001b[39m(run_dir):\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/lsdb-benchmarking/lib/python3.12/site-packages/dash/_jupyter.py:266\u001b[39m, in \u001b[36mJupyterDash.infer_jupyter_proxy_config\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 264\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[32m 265\u001b[39m \u001b[38;5;66;03m# Assume classic notebook or JupyterLab\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m266\u001b[39m \u001b[43m_request_jupyter_config\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/lsdb-benchmarking/lib/python3.12/site-packages/dash/_jupyter.py:205\u001b[39m, in \u001b[36m_request_jupyter_config\u001b[39m\u001b[34m(timeout)\u001b[39m\n\u001b[32m 202\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m _jupyter_comm_response_received():\n\u001b[32m 203\u001b[39m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m205\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m asyncio.iscoroutinefunction(\u001b[43mkernel\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdo_one_iteration\u001b[49m):\n\u001b[32m 206\u001b[39m loop = asyncio.get_event_loop()\n\u001b[32m 207\u001b[39m nest_asyncio.apply(loop)\n", + "\u001b[31mAttributeError\u001b[39m: 'IPythonKernel' object has no attribute 'do_one_iteration'" + ] + } + ], + "source": [ + "from lbench.dashboard.app import run_dashboard" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68e2d178-0807-4e96-980b-ea257eceffae", + "metadata": {}, + "outputs": [], + "source": [ + "run_dashboard()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a106998-bc24-4099-8538-8b7739fa6de9", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..a5622f1 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,31 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= -T -E -d _build/doctrees -D language=en +EXCLUDENB ?= -D exclude_patterns="notebooks/*","_build","**.ipynb_checkpoints" +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = ../_readthedocs/ + +.PHONY: help clean Makefile no-nb no-notebooks + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +# Build all Sphinx docs locally, except the notebooks +no-nb no-notebooks: + @$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(EXCLUDENB) $(O) + +# Cleans up files generated by the build process +clean: + rm -r "_build/doctrees" + rm -r "$(BUILDDIR)" + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..8adaec8 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,58 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + + +import os +import sys +from importlib.metadata import version + +# Define path to the code to be documented **relative to where conf.py (this file) is kept** +sys.path.insert(0, os.path.abspath("../src/")) + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "lbench" +copyright = "2025, LINCC Frameworks" +author = "LINCC Frameworks" +release = version("lbench") +# for example take major/minor +version = ".".join(release.split(".")[:2]) + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = ["sphinx.ext.mathjax", "sphinx.ext.napoleon", "sphinx.ext.viewcode"] + +extensions.append("autoapi.extension") +extensions.append("nbsphinx") + +# -- sphinx-copybutton configuration ---------------------------------------- +extensions.append("sphinx_copybutton") +## sets up the expected prompt text from console blocks, and excludes it from +## the text that goes into the clipboard. +copybutton_exclude = ".linenos, .gp" +copybutton_prompt_text = ">> " + +## lets us suppress the copy button on select code blocks. +copybutton_selector = "div:not(.no-copybutton) > div.highlight > pre" + +templates_path = [] +exclude_patterns = ["_build", "**.ipynb_checkpoints"] + +# This assumes that sphinx-build is called from the root directory +master_doc = "index" +# Remove 'view source code' from top of page (for html, not python) +html_show_sourcelink = False +# Remove namespaces from class/method signatures +add_module_names = False + +autoapi_type = "python" +autoapi_dirs = ["../src"] +autoapi_ignore = ["*/__main__.py", "*/_version.py"] +autoapi_add_toc_tree_entry = False +autoapi_member_order = "bysource" + +html_theme = "sphinx_rtd_theme" diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..390012a --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,53 @@ + +.. lbench documentation main file. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to lbench's documentation! +======================================================================================== + +Benchmarking tools for LSDB + +Dev Guide - Getting Started +--------------------------- + +Before installing any dependencies or writing code, it's a great idea to create a +virtual environment. LINCC-Frameworks engineers primarily use `conda` to manage virtual +environments. If you have conda installed locally, you can run the following to +create and activate a new environment. + +.. code-block:: console + + >> conda create env -n python=3.11 + >> conda activate + + +Once you have created a new environment, you can install this project for local +development using the following commands: + +.. code-block:: console + + >> pip install -e .'[dev]' + >> pre-commit install + >> conda install pandoc + + +Notes: + +1) The single quotes around ``'[dev]'`` may not be required for your operating system. +2) ``pre-commit install`` will initialize pre-commit for this local repository, so + that a set of tests will be run prior to completing a local commit. For more + information, see the Python Project Template documentation on + `pre-commit `_. +3) Installing ``pandoc`` allows you to verify that automatic rendering of Jupyter notebooks + into documentation for ReadTheDocs works as expected. For more information, see + the Python Project Template documentation on + `Sphinx and Python Notebooks `_. + + +.. toctree:: + :hidden: + + Home page + API Reference + Notebooks diff --git a/docs/notebooks.rst b/docs/notebooks.rst new file mode 100644 index 0000000..7f7e544 --- /dev/null +++ b/docs/notebooks.rst @@ -0,0 +1,6 @@ +Notebooks +======================================================================================== + +.. toctree:: + + Introducing Jupyter Notebooks diff --git a/docs/notebooks/README.md b/docs/notebooks/README.md new file mode 100644 index 0000000..2b4fb45 --- /dev/null +++ b/docs/notebooks/README.md @@ -0,0 +1,25 @@ +# Jupyter notebooks to run on-demand. + +Jupyter notebooks in this directory will be run each time you render your documentation. + +This means they should be able to be run with the resources in the repo, and in various environments: + +- any other developer's machine +- github CI runners +- ReadTheDocs doc generation + +This is great for notebooks that can run in a few minutes, on smaller datasets. + +If you would like to include these notebooks in automatically generated documentation +simply add the notebook name to the ``../notebooks.rst`` file, and include a markdown +cell at the beginning of your notebook with ``# Title`` that will be used as the text +in the table of contents in the documentation. + +Be aware that you may also need to update the ``../requirements.txt`` file if +your notebooks have dependencies that are not specified in ``../pyproject.toml``. + +For notebooks that require large datasets, access to third party APIs, large CPU or GPU requirements, put them in `./pre_executed` instead. + +For more information look here: https://lincc-ppt.readthedocs.io/en/latest/practices/sphinx.html#python-notebooks + +Or if you still have questions contact us: https://lincc-ppt.readthedocs.io/en/latest/source/contact.html \ No newline at end of file diff --git a/docs/notebooks/intro_notebook.ipynb b/docs/notebooks/intro_notebook.ipynb new file mode 100644 index 0000000..0589b29 --- /dev/null +++ b/docs/notebooks/intro_notebook.ipynb @@ -0,0 +1,84 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "textblock1", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "# Introducing Jupyter Notebooks in Sphinx\n", + "\n", + "This notebook showcases very basic functionality of rendering your jupyter notebooks as tutorials inside your sphinx documentation.\n", + "\n", + "As part of the LINCC Frameworks python project template, your notebooks will be executed AND rendered at document build time.\n", + "\n", + "You can read more about Sphinx, ReadTheDocs, and building notebooks in [LINCC's documentation](https://lincc-ppt.readthedocs.io/en/latest/practices/sphinx.html)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "codeblock1", + "metadata": {}, + "outputs": [], + "source": [ + "def sierpinsky(order):\n", + " \"\"\"Define a method that will create a Sierpinsky triangle of given order,\n", + " and will print it out.\"\"\"\n", + " triangles = [\"*\"]\n", + " for i in range(order):\n", + " spaces = \" \" * (2**i)\n", + " triangles = [spaces + triangle + spaces for triangle in triangles] + [\n", + " triangle + \" \" + triangle for triangle in triangles\n", + " ]\n", + " print(f\"Printing order {order} triangle\")\n", + " print(\"\\n\".join(triangles))" + ] + }, + { + "cell_type": "markdown", + "id": "textblock2", + "metadata": { + "cell_marker": "\"\"\"", + "lines_to_next_cell": 1 + }, + "source": [ + "Then, call our method a few times. This will happen on the fly during notebook rendering." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "codeblock2", + "metadata": {}, + "outputs": [], + "source": [ + "for order in range(3):\n", + " sierpinsky(order)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "codeblock3", + "metadata": {}, + "outputs": [], + "source": [ + "sierpinsky(4)" + ] + } + ], + "metadata": { + "jupytext": { + "cell_markers": "\"\"\"" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/pre_executed/README.md b/docs/pre_executed/README.md new file mode 100644 index 0000000..fb3cc7c --- /dev/null +++ b/docs/pre_executed/README.md @@ -0,0 +1,16 @@ +# Pre-executed Jupyter notebooks + +Jupyter notebooks in this directory will NOT be run in the docs workflows, and will be rendered with +the provided output cells as-is. + +This is useful for notebooks that require large datasets, access to third party APIs, large CPU or GPU requirements. + +Where possible, instead write smaller notebooks that can be run as part of a github worker, and within the ReadTheDocs rendering process. + +To ensure that the notebooks are not run by the notebook conversion process, you can add the following metadata block to the notebook: + +``` + "nbsphinx": { + "execute": "never" + }, +``` diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..423ef3f --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,10 @@ + +ipykernel +ipython +jupytext +nbconvert +nbsphinx +sphinx +sphinx-autoapi +sphinx-copybutton +sphinx-rtd-theme>=3.0 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 4fa5e9f..41b46d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,21 @@ [project] name = "lbench" -version = "0.0.1" +license = "MIT" +license-files = ["LICENSE"] +readme = "README.md" +authors = [ + { name = "LINCC Frameworks", email = "seanmcgu@andrew.cmu.edu" } +] +description = "Benchmarking tools for LSDB" +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "Operating System :: OS Independent", + "Programming Language :: Python", +] +dynamic = ["version"] +requires-python = ">=3.10" dependencies = [ "pytest", "pytest-benchmark", @@ -12,8 +27,23 @@ dependencies = [ "memray" ] +[project.urls] +"Source Code" = "https://github.com/lincc-frameworks/lbench" + +# On a mac, install optional dependencies with `pip install '.[dev]'` (include the single quotes) +[project.optional-dependencies] +dev = [ + "jupyter", # Clears output from Jupyter notebooks + "pre-commit", # Used to run checks before finalizing a git commit + "pytest-cov", # Used to report total code coverage + "ruff", # Used for static linting of files +] + [build-system] -requires = ["setuptools"] +requires = [ + "setuptools>=62", # Used to build and package the Python project + "setuptools_scm>=6.2", # Gets release version from git. Makes it available programmatically +] build-backend = "setuptools.build_meta" [project.scripts] @@ -22,7 +52,63 @@ lbench = "lbench.cli.lbench:app" [project.entry-points.pytest11] lbench = "lbench.pytest" +[tool.setuptools_scm] +write_to = "src/lbench/_version.py" + [tool.pytest.ini_options] markers = [ "lbench_memory: mark test to capture memory usage" -] \ No newline at end of file +] +testpaths = [ + "tests", + "src", + "docs", +] +addopts = "--doctest-modules --doctest-glob=*.rst" + +[tool.ruff] +line-length = 110 +[tool.ruff.lint] +select = [ + # pycodestyle + "E", + "W", + # Pyflakes + "F", + # pep8-naming + "N", + # pyupgrade + "UP", + # flake8-bugbear + "B", + # flake8-simplify + "SIM", + # isort + "I", + # docstrings + "D101", + "D102", + "D103", + "D106", + "D206", + "D207", + "D208", + "D300", + "D417", + "D419", + # Numpy v2.0 compatibility + "NPY201", +] +ignore = [ + "UP006", # Allow non standard library generics in type hints + "UP007", # Allow Union in type hints + "SIM114", # Allow if with same arms + "B028", # Allow default warning level + "SIM117", # Allow nested with + "UP015", # Allow redundant open parameters + "UP028", # Allow yield in for loop +] + + +[tool.coverage.run] +omit = ["src/lbench/_version.py"] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e69de29 diff --git a/src/lbench/__init__.py b/src/lbench/__init__.py index 176dace..ecafe60 100644 --- a/src/lbench/__init__.py +++ b/src/lbench/__init__.py @@ -1,3 +1,4 @@ from .dashboard.app import run_dashboard +from ._version import __version__ -__all__ = ["run_dashboard"] +__all__ = ["run_dashboard", "__version__"] diff --git a/src/lbench/notebook/__init__.py b/src/lbench/notebook/__init__.py new file mode 100644 index 0000000..f357dbe --- /dev/null +++ b/src/lbench/notebook/__init__.py @@ -0,0 +1,7 @@ +"""Jupyter integration for lbench. Load with: %load_ext lbench.notebook""" + +from lbench.notebook.magic import LbenchMagics + + +def load_ipython_extension(ip): + ip.register_magics(LbenchMagics) diff --git a/src/lbench/notebook/magic.py b/src/lbench/notebook/magic.py new file mode 100644 index 0000000..e191d6f --- /dev/null +++ b/src/lbench/notebook/magic.py @@ -0,0 +1,192 @@ +""" +Jupyter cell magic for lbench benchmarks. + +Usage +----- +Load the extension once per notebook:: + + %load_ext lbench.notebook + +Then use the cell magic (similar to %%timeit):: + + %%lbench + my_expensive_function() + +Options:: + + %%lbench --rounds 10 --warmup --memory --profile --dask --name my_bench + my_dask_function() + + # Also capture Dask graph stats from a collection variable: + %%lbench --dask --collection my_df + my_df.compute() + +Options +------- +--rounds / -r Number of timed rounds (default: 5) +--warmup / -w Run one un-timed warmup round first +--memory / -m Track peak memory with memray +--profile / -p Capture a cProfile .prof file +--dask / -d Collect Dask metrics (task stream, memory, performance report) +--collection VAR Name of a Dask collection variable; also records graph size/length +--name / -n Name for this benchmark entry (default: auto-generated) +""" + +from __future__ import annotations + +import sys +from datetime import datetime +from pathlib import Path +from typing import Optional + +from IPython.core.magic import Magics, cell_magic, magics_class +from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring + +from lbench.cli.env import get_lbench_root_dir +from lbench.runner import ( + make_benchmark_entry, + run_cprofile, + run_dask_benchmark, + run_memray, + time_function, + write_benchmark_json, +) + +# -- session state ----------------------------------------------------------- + +_run_dir: Optional[Path] = None + + +def _get_run_dir() -> Path: + global _run_dir + if _run_dir is None: + root = get_lbench_root_dir() + run_id = datetime.now().strftime("%Y%m%d-%H%M%S") + _run_dir = root / run_id + _run_dir.mkdir(parents=True, exist_ok=True) + return _run_dir + + +def reset_session(): + """Start a fresh run directory for this notebook session.""" + global _run_dir + _run_dir = None + + +# -- display helpers --------------------------------------------------------- + +def _fmt_time(seconds: float) -> str: + if seconds >= 1: + return f"{seconds:.3f} s" + if seconds >= 1e-3: + return f"{seconds * 1e3:.3f} ms" + if seconds >= 1e-6: + return f"{seconds * 1e6:.3f} µs" + return f"{seconds * 1e9:.3f} ns" + + +def _fmt_memory(nbytes: int) -> str: + for unit, scale in [("GiB", 2 ** 30), ("MiB", 2 ** 20), ("KiB", 2 ** 10)]: + if nbytes >= scale: + return f"{nbytes / scale:.2f} {unit}" + return f"{nbytes} B" + + +# -- magic class ------------------------------------------------------------- + +@magics_class +class LbenchMagics(Magics): + """Provides the %%lbench cell magic.""" + + @cell_magic + @magic_arguments() + @argument("--rounds", "-r", type=int, default=5, + help="Number of timed rounds (default: 5)") + @argument("--warmup", "-w", action="store_true", + help="Run one un-timed warmup round before measuring") + @argument("--memory", "-m", action="store_true", + help="Track peak memory usage with memray") + @argument("--profile", "-p", action="store_true", + help="Capture a cProfile .prof file") + @argument("--dask", "-d", action="store_true", + help="Collect Dask metrics (task stream, memory sampler, performance report)") + @argument("--collection", type=str, default=None, metavar="VAR", + help="Name of a Dask collection variable; also records graph size and length") + @argument("--name", "-n", type=str, default=None, + help="Name for this benchmark entry") + def lbench(self, line: str, cell: str): + """Benchmark a cell's code and save results to a lbench-compatible JSON log.""" + args = parse_argstring(self.lbench, line) + + ip = self.shell + ns = ip.user_ns + + name = args.name or f"cell_{datetime.now().strftime('%H%M%S')}" + fullname = f"notebook::{name}" + run_dir = _get_run_dir() + + code = compile(cell, f"", "exec") + + def run_cell(): + exec(code, ns) # noqa: S102 – intentional notebook execution + + # --- time ----------------------------------------------------------- + data = time_function(run_cell, rounds=args.rounds, warmup=args.warmup) + + # --- optional profiling --------------------------------------------- + extra_info: dict = {} + + if args.profile: + extra_info["cprofile_path"] = run_cprofile(run_cell, run_dir) + + if args.memory: + extra_info["peak_memory_bytes"] = run_memray(run_cell, run_dir) + + # --- optional dask metrics ------------------------------------------ + if args.dask: + dask_info = run_dask_benchmark(run_cell, run_dir) + + if args.collection: + collection = ns.get(args.collection) + if collection is None: + raise NameError( + f"--collection: variable {args.collection!r} not found in namespace" + ) + graph = collection.dask + dask_info["dask_graph_len"] = len(graph) + dask_info["dask_graph_size_bytes"] = sum( + sys.getsizeof(graph[k]) for k in graph + ) + + extra_info["dask"] = dask_info + + # --- build entry & write JSON --------------------------------------- + entry = make_benchmark_entry( + name=name, + fullname=fullname, + data=data, + extra_info=extra_info, + ) + json_path = write_benchmark_json(run_dir, [entry]) + + # --- display -------------------------------------------------------- + stats = entry["stats"] + print( + f"{stats['rounds']} rounds " + f"mean: {_fmt_time(stats['mean'])} ± {_fmt_time(stats['stddev'])} " + f"(min: {_fmt_time(stats['min'])}, max: {_fmt_time(stats['max'])})" + ) + if "peak_memory_bytes" in extra_info: + print(f"peak memory: {_fmt_memory(extra_info['peak_memory_bytes'])}") + if "dask" in extra_info: + d = extra_info["dask"] + print(f"dask tasks: {d.get('n_tasks', '?')}", end="") + if "peak_memory_bytes" in d: + print(f" peak memory: {_fmt_memory(d['peak_memory_bytes'])}", end="") + if "dask_graph_len" in d: + print(f" graph nodes: {d['dask_graph_len']}", end="") + print() + print(f"perf report: {d['performance_report']}") + if "cprofile_path" in extra_info: + print(f"cProfile: {extra_info['cprofile_path']}") + print(f"log: {json_path}") diff --git a/tests/lbench/conftest.py b/tests/lbench/conftest.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/lbench/test_packaging.py b/tests/lbench/test_packaging.py new file mode 100644 index 0000000..42d6a3b --- /dev/null +++ b/tests/lbench/test_packaging.py @@ -0,0 +1,6 @@ +import lbench + + +def test_version(): + """Check to see that we can get the package version""" + assert lbench.__version__ is not None From 8978db2c4f85315133ade1d7b0ac54d8f5665184 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Fri, 10 Apr 2026 17:27:26 -0400 Subject: [PATCH 07/13] remove linting and clear notebooks --- .copier-answers.yml | 4 +-- .pre-commit-config.yaml | 15 ------------ Untitled.ipynb | 19 ++------------- benchmarks/basic_notebook.ipynb | 38 +++++++---------------------- pyproject.toml | 43 --------------------------------- 5 files changed, 12 insertions(+), 107 deletions(-) diff --git a/.copier-answers.yml b/.copier-answers.yml index 212da10..7fd2afc 100644 --- a/.copier-answers.yml +++ b/.copier-answers.yml @@ -5,9 +5,7 @@ author_email: seanmcgu@andrew.cmu.edu author_name: LINCC Frameworks create_example_module: false custom_install: custom -enforce_style: -- ruff_lint -- ruff_format +enforce_style: [] failure_notification: [] include_benchmarks: false include_docs: true diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 74b691a..d856e2e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -45,21 +45,6 @@ repos: hooks: - id: check-github-workflows args: ["--verbose"] - - repo: https://github.com/astral-sh/ruff-pre-commit - # Ruff version. - rev: v0.2.1 - hooks: - - id: ruff - name: Lint code using ruff; sort and organize imports - types_or: [ python, pyi ] - args: ["--fix"] - - repo: https://github.com/astral-sh/ruff-pre-commit - # Ruff version. - rev: v0.2.1 - hooks: - - id: ruff-format - name: Format code using ruff - types_or: [ python, pyi, jupyter ] - repo: https://github.com/lincc-frameworks/pre-commit-hooks rev: v0.2.2 hooks: diff --git a/Untitled.ipynb b/Untitled.ipynb index 12b5193..e001ffd 100644 --- a/Untitled.ipynb +++ b/Untitled.ipynb @@ -2,25 +2,10 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "63a26342-fa6f-496a-9341-cadc74641be9", "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "'IPythonKernel' object has no attribute 'do_one_iteration'", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mAttributeError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mlbench\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdashboard\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mapp\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m run_dashboard\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/code/lsdb-benchmarking/src/lbench/dashboard/app.py:25\u001b[39m\n\u001b[32m 21\u001b[39m ROOT_DIR = get_lbench_root_dir()\n\u001b[32m 23\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mdash\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m jupyter_dash\n\u001b[32m---> \u001b[39m\u001b[32m25\u001b[39m \u001b[43mjupyter_dash\u001b[49m\u001b[43m.\u001b[49m\u001b[43minfer_jupyter_proxy_config\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 28\u001b[39m \u001b[38;5;66;03m# --- Load and cache runs ---\u001b[39;00m\n\u001b[32m 29\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mload_run_json\u001b[39m(run_dir):\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/lsdb-benchmarking/lib/python3.12/site-packages/dash/_jupyter.py:266\u001b[39m, in \u001b[36mJupyterDash.infer_jupyter_proxy_config\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 264\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[32m 265\u001b[39m \u001b[38;5;66;03m# Assume classic notebook or JupyterLab\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m266\u001b[39m \u001b[43m_request_jupyter_config\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/lsdb-benchmarking/lib/python3.12/site-packages/dash/_jupyter.py:205\u001b[39m, in \u001b[36m_request_jupyter_config\u001b[39m\u001b[34m(timeout)\u001b[39m\n\u001b[32m 202\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m _jupyter_comm_response_received():\n\u001b[32m 203\u001b[39m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m205\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m asyncio.iscoroutinefunction(\u001b[43mkernel\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdo_one_iteration\u001b[49m):\n\u001b[32m 206\u001b[39m loop = asyncio.get_event_loop()\n\u001b[32m 207\u001b[39m nest_asyncio.apply(loop)\n", - "\u001b[31mAttributeError\u001b[39m: 'IPythonKernel' object has no attribute 'do_one_iteration'" - ] - } - ], + "outputs": [], "source": [ "from lbench.dashboard.app import run_dashboard" ] diff --git a/benchmarks/basic_notebook.ipynb b/benchmarks/basic_notebook.ipynb index 17bbbb0..f740ff0 100644 --- a/benchmarks/basic_notebook.ipynb +++ b/benchmarks/basic_notebook.ipynb @@ -2,14 +2,13 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "initial_id", "metadata": { "ExecuteTime": { "end_time": "2026-04-03T18:10:01.175489Z", "start_time": "2026-04-03T18:10:01.133802Z" }, - "collapsed": true, "jupyter": { "outputs_hidden": true } @@ -21,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "5b500d8e0351656c", "metadata": { "ExecuteTime": { @@ -43,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "6d8fef147c0a7522", "metadata": { "ExecuteTime": { @@ -58,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "e8f6f2af9dacfce5", "metadata": { "ExecuteTime": { @@ -75,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "ecfe03da485592c3", "metadata": { "ExecuteTime": { @@ -83,18 +82,7 @@ "start_time": "2026-04-03T18:13:33.448441Z" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1 rounds mean: 1.013 s ± 0.000 ns (min: 1.013 s, max: 1.013 s)\n", - "dask tasks: 1 peak memory: 61.73 MiB graph nodes: 1\n", - "perf report: /Users/smcmu/code/lsdb-benchmarking/benchmarks/lbench_results/20260403-141217/dask_performance_report_6fd91c6c-8017-4b54-b270-a944983ab961.html\n", - "log: /Users/smcmu/code/lsdb-benchmarking/benchmarks/lbench_results/20260403-141217/pytest-benchmark.json\n" - ] - } - ], + "outputs": [], "source": [ "%%lbench --dask --rounds 1 --collection sleep_del\n", "sleep_del.compute()" @@ -102,7 +90,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "f2da3bf761e96462", "metadata": {}, "outputs": [], @@ -112,18 +100,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "b9deaf00-0f3e-49f5-9d0c-cd5db1092ff1", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dash app running on http://127.0.0.1:8050/\n" - ] - } - ], + "outputs": [], "source": [ "run_dashboard(jupyter_mode=\"external\")" ] diff --git a/pyproject.toml b/pyproject.toml index 41b46d3..8fc628e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,6 @@ dev = [ "jupyter", # Clears output from Jupyter notebooks "pre-commit", # Used to run checks before finalizing a git commit "pytest-cov", # Used to report total code coverage - "ruff", # Used for static linting of files ] [build-system] @@ -66,48 +65,6 @@ testpaths = [ ] addopts = "--doctest-modules --doctest-glob=*.rst" -[tool.ruff] -line-length = 110 -[tool.ruff.lint] -select = [ - # pycodestyle - "E", - "W", - # Pyflakes - "F", - # pep8-naming - "N", - # pyupgrade - "UP", - # flake8-bugbear - "B", - # flake8-simplify - "SIM", - # isort - "I", - # docstrings - "D101", - "D102", - "D103", - "D106", - "D206", - "D207", - "D208", - "D300", - "D417", - "D419", - # Numpy v2.0 compatibility - "NPY201", -] -ignore = [ - "UP006", # Allow non standard library generics in type hints - "UP007", # Allow Union in type hints - "SIM114", # Allow if with same arms - "B028", # Allow default warning level - "SIM117", # Allow nested with - "UP015", # Allow redundant open parameters - "UP028", # Allow yield in for loop -] [tool.coverage.run] From 54e0df143bfbd5ead3e9c84d0f75130416fa81fa Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Fri, 10 Apr 2026 17:37:19 -0400 Subject: [PATCH 08/13] add dask dependency --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 8fc628e..e4083be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ dependencies = [ "pytest", "pytest-benchmark", "typer", + "dask", "dash", "dash-bootstrap-components", "pandas", From 129c68a6c9c3f000dd84f5621213f95e0e3dbd0f Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Fri, 10 Apr 2026 17:41:16 -0400 Subject: [PATCH 09/13] add dask dependency --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e4083be..d8a8356 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ dependencies = [ "pytest", "pytest-benchmark", "typer", - "dask", + "dask[complete]", "dash", "dash-bootstrap-components", "pandas", From 77ca99f2ab255d389a3572c3ed3c7480eabb6340 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Thu, 16 Apr 2026 15:06:38 -0400 Subject: [PATCH 10/13] update copier settings --- .copier-answers.yml | 5 +++-- .pre-commit-config.yaml | 12 ++++++++++++ pyproject.toml | 5 ++++- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/.copier-answers.yml b/.copier-answers.yml index 7fd2afc..d006815 100644 --- a/.copier-answers.yml +++ b/.copier-answers.yml @@ -1,11 +1,12 @@ # Changes here will be overwritten by Copier _commit: v2.2.0 _src_path: gh:lincc-frameworks/python-project-template -author_email: seanmcgu@andrew.cmu.edu +author_email: lincc-frameworks-team@lists.lsst.org author_name: LINCC Frameworks create_example_module: false custom_install: custom -enforce_style: [] +enforce_style: +- black failure_notification: [] include_benchmarks: false include_docs: true diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d856e2e..9f43941 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -45,6 +45,18 @@ repos: hooks: - id: check-github-workflows args: ["--verbose"] + # Analyze the code style and report code that doesn't adhere. + - repo: https://github.com/psf/black + rev: 23.7.0 + hooks: + - id: black-jupyter + name: Format code using black + types_or: [python, pyi, jupyter] + # It is recommended to specify the latest version of Python + # supported by your project here, or alternatively use + # pre-commit's default_language_version, see + # https://pre-commit.com/#top_level-default_language_version + language_version: python3.11 - repo: https://github.com/lincc-frameworks/pre-commit-hooks rev: v0.2.2 hooks: diff --git a/pyproject.toml b/pyproject.toml index d8a8356..f8fae32 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ license = "MIT" license-files = ["LICENSE"] readme = "README.md" authors = [ - { name = "LINCC Frameworks", email = "seanmcgu@andrew.cmu.edu" } + { name = "LINCC Frameworks", email = "lincc-frameworks-team@lists.lsst.org" } ] description = "Benchmarking tools for LSDB" classifiers = [ @@ -34,6 +34,7 @@ dependencies = [ # On a mac, install optional dependencies with `pip install '.[dev]'` (include the single quotes) [project.optional-dependencies] dev = [ + "black", # Used for static linting of files "jupyter", # Clears output from Jupyter notebooks "pre-commit", # Used to run checks before finalizing a git commit "pytest-cov", # Used to report total code coverage @@ -66,6 +67,8 @@ testpaths = [ ] addopts = "--doctest-modules --doctest-glob=*.rst" +[tool.black] +line-length = 110 [tool.coverage.run] From 4dfb81127f1a506ca012def4ea4d22b8f55f24db Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Thu, 16 Apr 2026 15:07:11 -0400 Subject: [PATCH 11/13] black --- benchmarks/conftest.py | 7 +++ benchmarks/test_column_mean.py | 23 +++++---- benchmarks/test_crossmatch.py | 6 +-- benchmarks/test_io.py | 46 +++++++++++------- benchmarks/test_local_io.py | 20 ++------ src/lbench/cli/env.py | 1 + src/lbench/cli/lbench.py | 1 + src/lbench/dashboard/app.py | 10 ++-- src/lbench/dashboard/layout.py | 20 +++++--- src/lbench/dashboard/layouts/sidebar.py | 48 ++++++++++++++----- src/lbench/dashboard/layouts/tables.py | 5 +- src/lbench/dashboard/layouts/trends.py | 16 +++++-- .../dashboard/metrics/benchmark_collection.py | 19 ++++---- .../dashboard/metrics/groups/dask_group.py | 45 ++++++++--------- .../metrics/groups/execution_group.py | 3 +- .../metrics/groups/profiling_group.py | 16 ++++--- .../dashboard/metrics/groups/stats_group.py | 12 ++--- src/lbench/dashboard/metrics/registry.py | 2 +- src/lbench/dashboard/utils.py | 1 + src/lbench/notebook/magic.py | 44 +++++++++-------- src/lbench/pytest/plugin.py | 2 +- src/lbench/runner.py | 33 +++++++------ 22 files changed, 214 insertions(+), 166 deletions(-) diff --git a/benchmarks/conftest.py b/benchmarks/conftest.py index fbaf3c2..3706631 100644 --- a/benchmarks/conftest.py +++ b/benchmarks/conftest.py @@ -6,6 +6,7 @@ import pyarrow as pa import lsdb + @fixture def catalog_local_dir() -> UPath: root = os.environ.get("CATALOG_LOCAL_DIR", "/epyc/data3/hats/catalogs") @@ -16,6 +17,7 @@ def catalog_local_dir() -> UPath: def gaia_local_collection_path(catalog_local_dir: UPath) -> UPath: return catalog_local_dir / "gaia_dr3" + @fixture def gaia_local_catalog_path(gaia_local_collection_path) -> UPath: return gaia_local_collection_path / "gaia" @@ -30,14 +32,17 @@ def gaia_local_metadata_path(gaia_local_catalog_path) -> UPath: def gaia_s3_collection_path() -> UPath: return UPath("s3://stpubdata/gaia/gaia_dr3/public/hats") + @fixture(scope="session") def gaia_s3_catalog_path(gaia_s3_collection_path) -> UPath: return gaia_s3_collection_path / "gaia" + @fixture(scope="session") def gaia_s3_metadata_path(gaia_s3_catalog_path) -> UPath: return gaia_s3_catalog_path / "dataset" / "_metadata" + @fixture(scope="session") def gaia_s3_dataset(gaia_s3_metadata_path) -> pyarrow.dataset.Dataset: return pyarrow.dataset.parquet_dataset( @@ -46,6 +51,7 @@ def gaia_s3_dataset(gaia_s3_metadata_path) -> pyarrow.dataset.Dataset: filesystem=pa.fs.S3FileSystem(), ) + @fixture def gaia_local_dataset(gaia_local_metadata_path) -> pyarrow.dataset.Dataset: return pyarrow.dataset.parquet_dataset( @@ -86,4 +92,5 @@ def get_lsdb_catalog(io_method, **kwargs): else: raise ValueError(f"Unsupported IO method: {io_method}") return lsdb.open_catalog(path.as_uri(), **kwargs) + return Helpers() diff --git a/benchmarks/test_column_mean.py b/benchmarks/test_column_mean.py index ad770e5..58dd6bf 100644 --- a/benchmarks/test_column_mean.py +++ b/benchmarks/test_column_mean.py @@ -4,6 +4,7 @@ import pyarrow as pa import nested_pandas as npd + def test_pyarrow_mean(gaia_collection_path, lbench): gaia_root = gaia_collection_path / "gaia" parquet_root = f"{gaia_root}/dataset" @@ -11,12 +12,12 @@ def test_pyarrow_mean(gaia_collection_path, lbench): def dataset_mean(dataset, field: str, *, use_threads: bool = True): total_sum = None # Arrow Scalar - total_count = 0 # Python int + total_count = 0 # Python int for batch in dataset.to_batches(columns=[field], use_threads=use_threads): col = batch.column(0) - b_sum = pc.sum(col) # Scalar (or null if all-null) - b_count = pc.count(col, mode="only_valid") # Int64 Scalar + b_sum = pc.sum(col) # Scalar (or null if all-null) + b_count = pc.count(col, mode="only_valid") # Int64 Scalar if not pc.is_null(b_sum).as_py() and b_count.as_py() > 0: total_sum = b_sum if total_sum is None else pc.add(total_sum, b_sum) total_count += b_count.as_py() @@ -29,15 +30,18 @@ def dataset_mean(dataset, field: str, *, use_threads: bool = True): lbench(dataset_mean, pyarrow_ds, "phot_g_mean_mag") + def test_lsdb_mean(gaia_collection_path, lbench_dask): - def catalog_mean(df, target_column=''): - result = npd.NestedFrame({ - "sum": [df[target_column].sum()], - "count": [len(df)], - }) + def catalog_mean(df, target_column=""): + result = npd.NestedFrame( + { + "sum": [df[target_column].sum()], + "count": [len(df)], + } + ) return result - lsdb_gaia = lsdb.open_catalog(gaia_collection_path, columns=['phot_g_mean_mag']) + lsdb_gaia = lsdb.open_catalog(gaia_collection_path, columns=["phot_g_mean_mag"]) unrealized = lsdb_gaia.map_partitions( catalog_mean, target_column="phot_g_mean_mag", @@ -45,4 +49,5 @@ def catalog_mean(df, target_column=''): def compute_mean(): result = unrealized.compute() + lbench_dask(compute_mean) diff --git a/benchmarks/test_crossmatch.py b/benchmarks/test_crossmatch.py index c64c1e4..f656422 100644 --- a/benchmarks/test_crossmatch.py +++ b/benchmarks/test_crossmatch.py @@ -27,9 +27,7 @@ def test_crossmatch(lbench, catalog_local_dir): # Size (memory) of Gaia margin pixel: 87.9 MiB gaia_margin_path = catalog_local_dir / "gaia_dr3" / "gaia_300arcs" - gaia_margin_part = npd.read_parquet( - pixel_catalog_file(gaia_margin_path, gaia_pixel) - ) + gaia_margin_part = npd.read_parquet(pixel_catalog_file(gaia_margin_path, gaia_pixel)) gaia_margin = lsdb.read_hats(gaia_margin_path) algorithm = KdTreeCrossmatch() @@ -65,4 +63,4 @@ def crossmatch(): meta_df=meta_df, ) - lbench(crossmatch) \ No newline at end of file + lbench(crossmatch) diff --git a/benchmarks/test_io.py b/benchmarks/test_io.py index 05d6919..c586fbf 100644 --- a/benchmarks/test_io.py +++ b/benchmarks/test_io.py @@ -5,49 +5,62 @@ COLUMN_CONFIGS = [ ["source_id", "ra", "dec"], - ["source_id", "ra", "dec", "ra_error", "dec_error", "parallax", "pm", "designation", "phot_g_mean_mag", "phot_bp_mean_mag"], + [ + "source_id", + "ra", + "dec", + "ra_error", + "dec_error", + "parallax", + "pm", + "designation", + "phot_g_mean_mag", + "phot_bp_mean_mag", + ], ] -@pytest.mark.parametrize( - "columns", - COLUMN_CONFIGS, - ids=["3col", "10col"] -) -@pytest.mark.parametrize( - "io_method", - ["s3", "local"], - ids=["s3", "local"] -) +@pytest.mark.parametrize("columns", COLUMN_CONFIGS, ids=["3col", "10col"]) +@pytest.mark.parametrize("io_method", ["s3", "local"], ids=["s3", "local"]) class TestLsdbIO: def test_pyarrow_single_partition(self, columns, io_method, lbench, helpers): dataset = helpers.get_pyarrow_dataset(io_method) frag = list(dataset.get_fragments())[0] + def load_partition(): df = frag.to_table(columns=columns).to_pandas() + lbench(load_partition) def test_lsdb_single_partition(self, columns, io_method, lbench_dask, helpers): catalog = helpers.get_lsdb_catalog(io_method, columns=columns) partition = catalog.partitions[0] + def load_partition(): df = partition.compute() + lbench_dask(load_partition) def test_pyarrow_multi_partition(self, columns, io_method, lbench, helpers): dataset = helpers.get_pyarrow_dataset(io_method) frag = list(dataset.get_fragments())[:10] paths = [f.path for f in frag] - ds = pyarrow.dataset.dataset(paths, format="parquet", schema=dataset.schema, filesystem=dataset.filesystem) + ds = pyarrow.dataset.dataset( + paths, format="parquet", schema=dataset.schema, filesystem=dataset.filesystem + ) + def load_partition(): df = ds.to_table(columns=columns).to_pandas() + lbench(load_partition) def test_lsdb_multi_partition(self, columns, io_method, lbench_dask, helpers): catalog = helpers.get_lsdb_catalog(io_method, columns=columns) partition = catalog.partitions[0:10] + def load_partition(): df = partition.compute() + lbench_dask(load_partition) def test_pyarrow_filtered_query(self, columns, io_method, lbench, helpers): @@ -67,7 +80,9 @@ def load_partition(): lbench(load_partition) def test_lsdb_filtered_query(self, columns, io_method, lbench_dask, helpers): - catalog = helpers.get_lsdb_catalog(io_method, columns=columns, filters=[("ra", ">", 45.0), ("ra", "<", 46.0)]) + catalog = helpers.get_lsdb_catalog( + io_method, columns=columns, filters=[("ra", ">", 45.0), ("ra", "<", 46.0)] + ) partition = catalog.partitions[0:10] def load_partition(): @@ -76,13 +91,10 @@ def load_partition(): lbench_dask(load_partition) def test_lsdb_cone_search(self, columns, io_method, lbench_dask, helpers): - catalog = helpers.get_lsdb_catalog( - io_method, columns=columns - ) + catalog = helpers.get_lsdb_catalog(io_method, columns=columns) partition = catalog.cone_search(ra=45.5, dec=0.0, radius_arcsec=1800.0) def load_partition(): df = partition.compute() lbench_dask(load_partition) - diff --git a/benchmarks/test_local_io.py b/benchmarks/test_local_io.py index 7e43261..09423bc 100644 --- a/benchmarks/test_local_io.py +++ b/benchmarks/test_local_io.py @@ -5,9 +5,7 @@ import pytest -def test_local_catalog_partition_read( - gaia_collection_path, lbench_dask -): +def test_local_catalog_partition_read(gaia_collection_path, lbench_dask): gaia = lsdb.read_hats(gaia_collection_path) cat = gaia.partitions[0] @@ -20,9 +18,7 @@ def load_partition(): def test_local_catalog_npd_read(gaia_collection_path, lbench): gaia = lsdb.read_hats(gaia_collection_path) partition_0_pixel = gaia.partitions[0].get_healpix_pixels()[0] - partition_0_path = hats.io.paths.pixel_catalog_file( - gaia.hc_structure.catalog_base_dir, partition_0_pixel - ) + partition_0_path = hats.io.paths.pixel_catalog_file(gaia.hc_structure.catalog_base_dir, partition_0_pixel) def load_partition_npd(): npd.read_parquet(partition_0_path) @@ -33,9 +29,7 @@ def load_partition_npd(): def test_local_catalog_pd_read(gaia_collection_path, lbench): gaia = lsdb.read_hats(gaia_collection_path) partition_0_pixel = gaia.partitions[0].get_healpix_pixels()[0] - partition_0_path = hats.io.paths.pixel_catalog_file( - gaia.hc_structure.catalog_base_dir, partition_0_pixel - ) + partition_0_path = hats.io.paths.pixel_catalog_file(gaia.hc_structure.catalog_base_dir, partition_0_pixel) def load_partition_pd(): pd.read_parquet(partition_0_path) @@ -44,9 +38,7 @@ def load_partition_pd(): @pytest.mark.benchmark(min_rounds=1) -def test_local_catalog_multi_partition_read( - gaia_collection_path, lbench_dask -): +def test_local_catalog_multi_partition_read(gaia_collection_path, lbench_dask): gaia = lsdb.read_hats(gaia_collection_path) n_partitions = 10 cat = gaia.partitions[:n_partitions] @@ -65,9 +57,7 @@ def test_local_catalog_multi_partition_npd_read(gaia_collection_path, lbench): for i in range(n_partitions): partition = gaia.partitions[i] partition_pixel = partition.get_healpix_pixels()[0] - partition_path = hats.io.paths.pixel_catalog_file( - gaia.hc_structure.catalog_base_dir, partition_pixel - ) + partition_path = hats.io.paths.pixel_catalog_file(gaia.hc_structure.catalog_base_dir, partition_pixel) partition_paths.append(partition_path) def load_partitions_npd(): diff --git a/src/lbench/cli/env.py b/src/lbench/cli/env.py index 697c52a..f227e14 100644 --- a/src/lbench/cli/env.py +++ b/src/lbench/cli/env.py @@ -3,6 +3,7 @@ ROOT_DIR_ENV_VAR = "LBENCH_ROOT" + def get_lbench_root_dir() -> Path: """ Resolve the lbench root directory. diff --git a/src/lbench/cli/lbench.py b/src/lbench/cli/lbench.py index 4da7901..1cb877d 100644 --- a/src/lbench/cli/lbench.py +++ b/src/lbench/cli/lbench.py @@ -4,6 +4,7 @@ app = typer.Typer(help="lbench CLI — run benchmarks and dashboards") + @app.command() def dash(port: int = 8050): """Run the lbench dashboard.""" diff --git a/src/lbench/dashboard/app.py b/src/lbench/dashboard/app.py index e745c22..376494f 100644 --- a/src/lbench/dashboard/app.py +++ b/src/lbench/dashboard/app.py @@ -18,7 +18,7 @@ __name__, external_stylesheets=[ dbc.themes.FLATLY, - "https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.1/font/bootstrap-icons.css" + "https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.1/font/bootstrap-icons.css", ], ) @@ -56,11 +56,7 @@ def serve_flamegraph(run_name, filename): data = read(str(prof_file)) html_content = render(data, prof_file.name) - html_content = re.sub( - r'src="static/(.*?)"', r'src="/tuna_web/static/\1"', html_content - ) - html_content = re.sub( - r'href="static/(.*?)"', r'href="/tuna_web/static/\1"', html_content - ) + html_content = re.sub(r'src="static/(.*?)"', r'src="/tuna_web/static/\1"', html_content) + html_content = re.sub(r'href="static/(.*?)"', r'href="/tuna_web/static/\1"', html_content) return Response(html_content, mimetype="text/html") diff --git a/src/lbench/dashboard/layout.py b/src/lbench/dashboard/layout.py index 65993cd..4738163 100644 --- a/src/lbench/dashboard/layout.py +++ b/src/lbench/dashboard/layout.py @@ -6,6 +6,7 @@ from lbench.dashboard.layouts.tables import tables_panel from lbench.dashboard.layouts.trends import trends_panel + def _navbar(): return dbc.NavbarSimple( brand="lbench Dashboard", @@ -17,8 +18,10 @@ def _navbar(): fluid=True, ) + def _container(): - return dbc.Container([ + return dbc.Container( + [ dcc.Location(id="url", refresh=False), dcc.Store(id="date-filter-store", data={}), dcc.Store(id="run-data-store", data={}), @@ -39,13 +42,18 @@ def _container(): ], fluid=True, style={ - "flex": "1", "overflow": "hidden", - "paddingLeft": "1em", "paddingRight": "1em", - "paddingTop": "0", "paddingBottom": "0", - "display": "flex", "flexDirection": "column", - } + "flex": "1", + "overflow": "hidden", + "paddingLeft": "1em", + "paddingRight": "1em", + "paddingTop": "0", + "paddingBottom": "0", + "display": "flex", + "flexDirection": "column", + }, ) + layout = html.Div( [_navbar(), _container()], style={"height": "100vh", "overflow": "hidden", "display": "flex", "flexDirection": "column"}, diff --git a/src/lbench/dashboard/layouts/sidebar.py b/src/lbench/dashboard/layouts/sidebar.py index 63ee1e6..5c1d6ac 100644 --- a/src/lbench/dashboard/layouts/sidebar.py +++ b/src/lbench/dashboard/layouts/sidebar.py @@ -106,17 +106,28 @@ def sidebar_panel(): ), html.Div( [ - dbc.Button("Apply", id="apply-filter-btn", color="primary", size="sm", - className="me-1"), + dbc.Button( + "Apply", id="apply-filter-btn", color="primary", size="sm", className="me-1" + ), dbc.Button("Clear", id="clear-filter-btn", color="secondary", size="sm"), - dbc.Button("Plot series", id="plot-range-btn", color="success", size="sm", - style={"marginLeft": "auto"}), + dbc.Button( + "Plot series", + id="plot-range-btn", + color="success", + size="sm", + style={"marginLeft": "auto"}, + ), ], style={"marginTop": "10px", "display": "flex"}, ), ], - style={"borderTop": "1px solid #ccc", "padding": "1em 0", "flexShrink": "0", - "position": "relative", "zIndex": 10}, + style={ + "borderTop": "1px solid #ccc", + "padding": "1em 0", + "flexShrink": "0", + "position": "relative", + "zIndex": 10, + }, ), html.Div( id="sidebar-container", @@ -137,6 +148,7 @@ def sidebar_panel(): # --- Date filter --- + @callback( Output("date-filter-store", "data"), Input("apply-filter-btn", "n_clicks"), @@ -168,6 +180,7 @@ def sync_date_picker(date_filter, _run_data): # --- Benchmark tables + sidebar --- + @callback( Output("benchmark-tables-container", "children"), Output("sidebar-container", "children"), @@ -179,11 +192,17 @@ def update_benchmarks_and_sidebar(n_clicks_list, run_data, date_filter): triggered = dash.ctx.triggered_id def placeholder(msg): - return html.Div(msg, style={ - "height": "100%", "display": "flex", - "alignItems": "center", "justifyContent": "center", - "color": "#888", "fontSize": "1.1rem", - }) + return html.Div( + msg, + style={ + "height": "100%", + "display": "flex", + "alignItems": "center", + "justifyContent": "center", + "color": "#888", + "fontSize": "1.1rem", + }, + ) if not run_data or not isinstance(run_data, dict): return placeholder("No run data found"), create_sidebar({}) @@ -197,14 +216,16 @@ def placeholder(msg): idx = triggered.get("index") if idx is not None: run_name = list(filtered_run_data.keys())[idx] - return benchmarks_to_tables(run_name, run_data[run_name]), create_sidebar(filtered_run_data, - active_run=run_name) + return benchmarks_to_tables(run_name, run_data[run_name]), create_sidebar( + filtered_run_data, active_run=run_name + ) return placeholder("Select a run from the sidebar or plot series"), create_sidebar(filtered_run_data) # --- Panel switching --- + @callback( Output("right-panel-view", "data"), Input("plot-range-btn", "n_clicks"), @@ -225,6 +246,7 @@ def show_tables(_): # --- Rename --- + @callback( Output("rename-modal", "is_open"), Output("rename-input", "value"), diff --git a/src/lbench/dashboard/layouts/tables.py b/src/lbench/dashboard/layouts/tables.py index a35993d..34517da 100644 --- a/src/lbench/dashboard/layouts/tables.py +++ b/src/lbench/dashboard/layouts/tables.py @@ -15,6 +15,7 @@ def tables_panel(): def _fmt_run_datetime(dt_str: str) -> str: from datetime import datetime + try: dt = datetime.fromisoformat(dt_str) local_dt = dt.astimezone().replace(tzinfo=None) @@ -30,7 +31,9 @@ def benchmark_to_table(bm, run_name, run_datetime=None): html.Span( run_datetime, style={"fontSize": "0.8em", "color": "#888", "marginLeft": "1em", "fontWeight": "normal"}, - ) if run_datetime else None, + ) + if run_datetime + else None, ], style={"display": "flex", "justifyContent": "space-between", "alignItems": "baseline"}, ) diff --git a/src/lbench/dashboard/layouts/trends.py b/src/lbench/dashboard/layouts/trends.py index f9dd58f..333c0d7 100644 --- a/src/lbench/dashboard/layouts/trends.py +++ b/src/lbench/dashboard/layouts/trends.py @@ -94,7 +94,9 @@ def _apply_date_filter(df: pd.DataFrame, date_filter: dict) -> pd.DataFrame: end_raw = date_filter.get("end_date") if not start_raw and not end_raw: return df - timestamps = df["timestamp"].dt.tz_localize(None) if df["timestamp"].dt.tz is not None else df["timestamp"] + timestamps = ( + df["timestamp"].dt.tz_localize(None) if df["timestamp"].dt.tz is not None else df["timestamp"] + ) mask = pd.Series(True, index=df.index) if start_raw: mask &= timestamps >= pd.to_datetime(start_raw) @@ -138,7 +140,9 @@ def toggle_run_selector(chart_type): Input("bar-run-selector", "value"), Input("run-data-store", "data"), ) -def update_trend_plot(selected_benchmarks, selected_metric_names, date_filter, chart_type, selected_runs, run_data): +def update_trend_plot( + selected_benchmarks, selected_metric_names, date_filter, chart_type, selected_runs, run_data +): if not selected_benchmarks or not selected_metric_names: return {"layout": {"title": "Select one or more benchmarks and metrics to view trends"}} @@ -195,7 +199,9 @@ def update_trend_plot(selected_benchmarks, selected_metric_names, date_filter, c if chart_type == "bar": traces = _make_bar_traces(series, scale, selected_runs, metric, multi_metric, yaxis_ref) else: - traces = _make_line_traces(series, scale, metric, date_filter, multi_metric, yaxis_ref, collection) + traces = _make_line_traces( + series, scale, metric, date_filter, multi_metric, yaxis_ref, collection + ) for trace in traces: fig.add_trace(trace) @@ -247,7 +253,9 @@ def _make_line_traces(series, scale, metric, date_filter, multi_metric, yaxis_re if not error_df.empty: merged = df.merge(error_df, on=["run_id", "timestamp"], suffixes=("", "_error")) if "value_error" in merged.columns: - trace_kwargs["error_y"] = dict(type="data", array=merged["value_error"] / scale, visible=True) + trace_kwargs["error_y"] = dict( + type="data", array=merged["value_error"] / scale, visible=True + ) trace_kwargs["x"] = merged["timestamp"] trace_kwargs["y"] = merged["value"] / scale diff --git a/src/lbench/dashboard/metrics/benchmark_collection.py b/src/lbench/dashboard/metrics/benchmark_collection.py index 1846add..38c3130 100644 --- a/src/lbench/dashboard/metrics/benchmark_collection.py +++ b/src/lbench/dashboard/metrics/benchmark_collection.py @@ -64,12 +64,7 @@ def __init__(self, run_data: dict, registry: MetricRegistry): if not bm_name: continue - run = BenchmarkRun( - name=bm_name, - run_id=run_id, - timestamp=timestamp, - raw_data=bm_data - ) + run = BenchmarkRun(name=bm_name, run_id=run_id, timestamp=timestamp, raw_data=bm_data) self.runs.append(run) @@ -110,11 +105,13 @@ def get_metric_series(self, benchmark: str, metric: Metric) -> pd.DataFrame: for run in runs: value = run.get_metric_value(metric) if value is not None: # Only include runs where metric is available - data.append({ - "run_id": run.run_id, - "timestamp": run.timestamp, - "value": value, - }) + data.append( + { + "run_id": run.run_id, + "timestamp": run.timestamp, + "value": value, + } + ) return pd.DataFrame(data) diff --git a/src/lbench/dashboard/metrics/groups/dask_group.py b/src/lbench/dashboard/metrics/groups/dask_group.py index 02f8879..5ed0398 100644 --- a/src/lbench/dashboard/metrics/groups/dask_group.py +++ b/src/lbench/dashboard/metrics/groups/dask_group.py @@ -43,10 +43,7 @@ def extract(self, benchmark_data: dict) -> Optional[float]: if dask_stats: try: startstops = dask_stats.get("startstops", []) - times = [ - sum([k["stop"] - k["start"] for k in s]) - for s in startstops - ] + times = [sum([k["stop"] - k["start"] for k in s]) for s in startstops] return sum(times) if times else None except (TypeError, ValueError, KeyError): pass @@ -131,29 +128,27 @@ def render_card(self, benchmark_data: dict, run_name: str) -> Optional[any]: if keys: from lbench.dashboard.utils import format_duration - times = [ - sum([k["stop"] - k["start"] for k in s]) - for s in dask_stats.get("startstops", []) - ] + times = [sum([k["stop"] - k["start"] for k in s]) for s in dask_stats.get("startstops", [])] total_time_by_key = {} for k, t in zip(keys, times): total_time_by_key[k] = total_time_by_key.get(k, 0) + t - sorted_key_times = sorted( - total_time_by_key.items(), key=lambda x: x[1], reverse=True - ) + sorted_key_times = sorted(total_time_by_key.items(), key=lambda x: x[1], reverse=True) formatted_times = [format_duration(t) for _, t in sorted_key_times] - task_table = pd.DataFrame({ - "task_key": [k for k, _ in sorted_key_times], - "total time": [f"{v} {u}" for v, u in formatted_times], - }) + task_table = pd.DataFrame( + { + "task_key": [k for k, _ in sorted_key_times], + "total time": [f"{v} {u}" for v, u in formatted_times], + } + ) components.append(html.H5("Dask Task Times", className="card-title mt-3")) components.append( - dbc.Table.from_dataframe(task_table, striped=True, bordered=True, hover=True)) + dbc.Table.from_dataframe(task_table, striped=True, bordered=True, hover=True) + ) return dbc.CardBody(components) if components else None @@ -170,13 +165,15 @@ def get_action_buttons(self, benchmark_data: dict, run_name: str): report_path = dask_stats.get("performance_report") if report_path: report_name = Path(report_path).name - buttons.append(html.A( - "Open Dask Performance Report", - href=f"/file/{run_name}/{report_name}", - target="_blank", - className="btn btn-outline-primary mt-2", - role="button", - )) + buttons.append( + html.A( + "Open Dask Performance Report", + href=f"/file/{run_name}/{report_name}", + target="_blank", + className="btn btn-outline-primary mt-2", + role="button", + ) + ) return buttons @@ -184,5 +181,5 @@ def get_action_buttons(self, benchmark_data: dict, run_name: str): dask_group = DaskGroup( "dask", "Dask Metrics", - [DaskTaskCount(), DaskTotalTime(), DaskPeakMemory(), DaskGraphLength(), DaskGraphSize()] + [DaskTaskCount(), DaskTotalTime(), DaskPeakMemory(), DaskGraphLength(), DaskGraphSize()], ) diff --git a/src/lbench/dashboard/metrics/groups/execution_group.py b/src/lbench/dashboard/metrics/groups/execution_group.py index d85780c..70438c4 100644 --- a/src/lbench/dashboard/metrics/groups/execution_group.py +++ b/src/lbench/dashboard/metrics/groups/execution_group.py @@ -38,6 +38,7 @@ def extract(self, benchmark_data: dict) -> Optional[float]: except (TypeError, ValueError): return None + execution_group = MetricGroup( "execution", "Execution Info", @@ -45,5 +46,5 @@ def extract(self, benchmark_data: dict) -> Optional[float]: CountMetric("rounds", "Rounds"), CountMetric("iterations", "Iterations"), PeakMemory(), - ] + ], ) diff --git a/src/lbench/dashboard/metrics/groups/profiling_group.py b/src/lbench/dashboard/metrics/groups/profiling_group.py index d4f1c03..d14f3fa 100644 --- a/src/lbench/dashboard/metrics/groups/profiling_group.py +++ b/src/lbench/dashboard/metrics/groups/profiling_group.py @@ -32,13 +32,15 @@ def get_action_buttons(self, benchmark_data: dict, run_name: str) -> List[Any]: if "cprofile_path" in extra_info: profile_path = extra_info["cprofile_path"] profile_name = Path(profile_path).name - buttons.append(html.A( - "Open Flamegraph", - href=f"/flamegraph/{run_name}/{profile_name}", - target="_blank", - className="btn btn-outline-secondary mt-2", - role="button", - )) + buttons.append( + html.A( + "Open Flamegraph", + href=f"/flamegraph/{run_name}/{profile_name}", + target="_blank", + className="btn btn-outline-secondary mt-2", + role="button", + ) + ) return buttons diff --git a/src/lbench/dashboard/metrics/groups/stats_group.py b/src/lbench/dashboard/metrics/groups/stats_group.py index 0358d8d..012fa26 100644 --- a/src/lbench/dashboard/metrics/groups/stats_group.py +++ b/src/lbench/dashboard/metrics/groups/stats_group.py @@ -7,8 +7,9 @@ class StatsMetric(DurationMetric): """Base class for metrics from the 'stats' section with time formatting.""" - def __init__(self, name: str, display_name: str, stats_key: str = None, - error_bar_metric: "StatsMetric" = None): + def __init__( + self, name: str, display_name: str, stats_key: str = None, error_bar_metric: "StatsMetric" = None + ): super().__init__(name, display_name) self.stats_key = stats_key or name self._error_bar_metric = error_bar_metric @@ -23,10 +24,7 @@ def extract(self, benchmark_data: dict) -> Optional[float]: def get_error_bar_config(self) -> Optional[Dict[str, Any]]: """Return error bar configuration if this metric has one.""" if self._error_bar_metric: - return { - "metric": self._error_bar_metric, - "type": "symmetric" - } + return {"metric": self._error_bar_metric, "type": "symmetric"} return None @@ -44,5 +42,5 @@ def get_error_bar_config(self) -> Optional[Dict[str, Any]]: stats_group = MetricGroup( "stats", "Performance Statistics", - [min_metric, max_metric, mean_metric, median_metric, stddev_metric, iqr_metric, q1_metric, q3_metric] + [min_metric, max_metric, mean_metric, median_metric, stddev_metric, iqr_metric, q1_metric, q3_metric], ) diff --git a/src/lbench/dashboard/metrics/registry.py b/src/lbench/dashboard/metrics/registry.py index f1c60ee..cec001c 100644 --- a/src/lbench/dashboard/metrics/registry.py +++ b/src/lbench/dashboard/metrics/registry.py @@ -3,6 +3,7 @@ from lbench.dashboard.metrics.metric import Metric from lbench.dashboard.metrics.metric_group import MetricGroup + class MetricRegistry: """Registry for managing available metrics and groups. @@ -77,4 +78,3 @@ def get_available_groups(self, benchmark_data: dict) -> List[MetricGroup]: List of available groups """ return [g for g in self._groups.values() if g.is_available(benchmark_data)] - diff --git a/src/lbench/dashboard/utils.py b/src/lbench/dashboard/utils.py index fd8df8b..bb15396 100644 --- a/src/lbench/dashboard/utils.py +++ b/src/lbench/dashboard/utils.py @@ -20,6 +20,7 @@ def format_memory(bytes_value, digits=2): return f"{num_bytes:.{digits}f}", units[-1] + def format_duration(seconds, digits=3): """ Format a duration in seconds using the most appropriate unit. diff --git a/src/lbench/notebook/magic.py b/src/lbench/notebook/magic.py index e191d6f..d89ae0d 100644 --- a/src/lbench/notebook/magic.py +++ b/src/lbench/notebook/magic.py @@ -75,6 +75,7 @@ def reset_session(): # -- display helpers --------------------------------------------------------- + def _fmt_time(seconds: float) -> str: if seconds >= 1: return f"{seconds:.3f} s" @@ -86,7 +87,7 @@ def _fmt_time(seconds: float) -> str: def _fmt_memory(nbytes: int) -> str: - for unit, scale in [("GiB", 2 ** 30), ("MiB", 2 ** 20), ("KiB", 2 ** 10)]: + for unit, scale in [("GiB", 2**30), ("MiB", 2**20), ("KiB", 2**10)]: if nbytes >= scale: return f"{nbytes / scale:.2f} {unit}" return f"{nbytes} B" @@ -94,26 +95,31 @@ def _fmt_memory(nbytes: int) -> str: # -- magic class ------------------------------------------------------------- + @magics_class class LbenchMagics(Magics): """Provides the %%lbench cell magic.""" @cell_magic @magic_arguments() - @argument("--rounds", "-r", type=int, default=5, - help="Number of timed rounds (default: 5)") - @argument("--warmup", "-w", action="store_true", - help="Run one un-timed warmup round before measuring") - @argument("--memory", "-m", action="store_true", - help="Track peak memory usage with memray") - @argument("--profile", "-p", action="store_true", - help="Capture a cProfile .prof file") - @argument("--dask", "-d", action="store_true", - help="Collect Dask metrics (task stream, memory sampler, performance report)") - @argument("--collection", type=str, default=None, metavar="VAR", - help="Name of a Dask collection variable; also records graph size and length") - @argument("--name", "-n", type=str, default=None, - help="Name for this benchmark entry") + @argument("--rounds", "-r", type=int, default=5, help="Number of timed rounds (default: 5)") + @argument("--warmup", "-w", action="store_true", help="Run one un-timed warmup round before measuring") + @argument("--memory", "-m", action="store_true", help="Track peak memory usage with memray") + @argument("--profile", "-p", action="store_true", help="Capture a cProfile .prof file") + @argument( + "--dask", + "-d", + action="store_true", + help="Collect Dask metrics (task stream, memory sampler, performance report)", + ) + @argument( + "--collection", + type=str, + default=None, + metavar="VAR", + help="Name of a Dask collection variable; also records graph size and length", + ) + @argument("--name", "-n", type=str, default=None, help="Name for this benchmark entry") def lbench(self, line: str, cell: str): """Benchmark a cell's code and save results to a lbench-compatible JSON log.""" args = parse_argstring(self.lbench, line) @@ -149,14 +155,10 @@ def run_cell(): if args.collection: collection = ns.get(args.collection) if collection is None: - raise NameError( - f"--collection: variable {args.collection!r} not found in namespace" - ) + raise NameError(f"--collection: variable {args.collection!r} not found in namespace") graph = collection.dask dask_info["dask_graph_len"] = len(graph) - dask_info["dask_graph_size_bytes"] = sum( - sys.getsizeof(graph[k]) for k in graph - ) + dask_info["dask_graph_size_bytes"] = sum(sys.getsizeof(graph[k]) for k in graph) extra_info["dask"] = dask_info diff --git a/src/lbench/pytest/plugin.py b/src/lbench/pytest/plugin.py index 72f6e38..fd8c7c4 100644 --- a/src/lbench/pytest/plugin.py +++ b/src/lbench/pytest/plugin.py @@ -39,7 +39,7 @@ def pytest_configure(config: pytest.Config): # configure pytest-benchmark config.option.benchmark_only = True - config.option.benchmark_json = (run_dir / "pytest-benchmark.json").open("wb") # kinda hacky + config.option.benchmark_json = (run_dir / "pytest-benchmark.json").open("wb") # kinda hacky terminal = config.pluginmanager.get_plugin("terminalreporter") if terminal: diff --git a/src/lbench/runner.py b/src/lbench/runner.py index 2b9b3db..abf796a 100644 --- a/src/lbench/runner.py +++ b/src/lbench/runner.py @@ -38,6 +38,7 @@ def run_dask_benchmark( if client is None: from distributed import get_client + client = get_client() report_path = run_dir / f"dask_performance_report_{uuid.uuid4()}.html" @@ -105,9 +106,7 @@ def compute_stats(data: list) -> dict: q1, q3 = min_val, max_val iqr = q3 - q1 - iqr_outliers = sum( - 1 for x in data if x < q1 - 1.5 * iqr or x > q3 + 1.5 * iqr - ) + iqr_outliers = sum(1 for x in data if x < q1 - 1.5 * iqr or x > q3 + 1.5 * iqr) stddev_outliers = sum(1 for x in data if abs(x - mean) > stddev) return { @@ -139,9 +138,7 @@ def make_benchmark_entry( params: Optional[dict] = None, ) -> dict: """Build a benchmark entry dict in pytest-benchmark JSON format.""" - param_str = ( - "-".join(str(v) for v in params.values()) if params else None - ) + param_str = "-".join(str(v) for v in params.values()) if params else None return { "group": group, "name": name, @@ -185,18 +182,20 @@ def get_machine_info() -> dict: def get_commit_info() -> dict: try: - git_id = subprocess.check_output( - ["git", "rev-parse", "HEAD"], stderr=subprocess.DEVNULL - ).decode().strip() - git_time = subprocess.check_output( - ["git", "log", "-1", "--format=%cI"], stderr=subprocess.DEVNULL - ).decode().strip() - git_branch = subprocess.check_output( - ["git", "rev-parse", "--abbrev-ref", "HEAD"], stderr=subprocess.DEVNULL - ).decode().strip() - dirty = ( - subprocess.call(["git", "diff", "--quiet"], stderr=subprocess.DEVNULL) != 0 + git_id = ( + subprocess.check_output(["git", "rev-parse", "HEAD"], stderr=subprocess.DEVNULL).decode().strip() + ) + git_time = ( + subprocess.check_output(["git", "log", "-1", "--format=%cI"], stderr=subprocess.DEVNULL) + .decode() + .strip() + ) + git_branch = ( + subprocess.check_output(["git", "rev-parse", "--abbrev-ref", "HEAD"], stderr=subprocess.DEVNULL) + .decode() + .strip() ) + dirty = subprocess.call(["git", "diff", "--quiet"], stderr=subprocess.DEVNULL) != 0 return { "id": git_id, "time": git_time, From b53be553d3c89cb91287507538a5076e6c7b2b6b Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Thu, 16 Apr 2026 15:08:31 -0400 Subject: [PATCH 12/13] remove old files --- Untitled.ipynb | 53 ------------------------------------------------ requirements.txt | 0 2 files changed, 53 deletions(-) delete mode 100644 Untitled.ipynb delete mode 100644 requirements.txt diff --git a/Untitled.ipynb b/Untitled.ipynb deleted file mode 100644 index e001ffd..0000000 --- a/Untitled.ipynb +++ /dev/null @@ -1,53 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "63a26342-fa6f-496a-9341-cadc74641be9", - "metadata": {}, - "outputs": [], - "source": [ - "from lbench.dashboard.app import run_dashboard" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "68e2d178-0807-4e96-980b-ea257eceffae", - "metadata": {}, - "outputs": [], - "source": [ - "run_dashboard()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2a106998-bc24-4099-8538-8b7739fa6de9", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index e69de29..0000000 From 4e2bbd43d4d03c1cef81c087a90ded05622ff666 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Thu, 16 Apr 2026 15:12:49 -0400 Subject: [PATCH 13/13] add badges to readme --- README.md | 45 ++++++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index be6734b..8d67a31 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,13 @@ run, and analyze benchmarks for Python projects. It provides automatic result lo flamegraphs, Dask performance reporting, memory tracking, a Jupyter notebook magic, and a dashboard for visualizing and comparing benchmark results over time. +[![Template](https://img.shields.io/badge/Template-LINCC%20Frameworks%20Python%20Project%20Template-brightgreen)](https://lincc-ppt.readthedocs.io/en/latest/) + +[![PyPI](https://img.shields.io/pypi/v/lbench?color=blue&logo=pypi&logoColor=white)](https://pypi.org/project/{{project_name}}/) +[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/lincc-frameworks/lsdb-benchmarking/smoke-test.yml)](https://github.com/lincc-frameworks/lsdb-benchmarking/actions/workflows/smoke-test.yml) +[![Codecov](https://codecov.io/gh/lincc-frameworks/lsdb-benchmarking/branch/main/graph/badge.svg)](https://codecov.io/gh/{{project_organization}}/{{project_name}}) +[![Read The Docs](https://img.shields.io/readthedocs/lbench)](https://lbench.readthedocs.io/) + ## Installation ```bash @@ -62,6 +69,7 @@ pytest --lbench benchmarks/ ``` This creates a timestamped result directory, runs all benchmarks, and saves: + - `pytest-benchmark.json` — timing stats and extra metrics - `cprofile_*.prof` — cProfile data for each benchmark - `dask_performance_report_*.html` — Dask performance reports (Dask benchmarks only) @@ -88,7 +96,8 @@ so notebook results appear alongside pytest results in the dashboard. Load the extension once per notebook: ```python -%load_ext lbench.notebook +%load_ext +lbench.notebook ``` Then use the cell magic on any cell: @@ -101,30 +110,35 @@ my_expensive_function() With options: ```python -%%lbench --rounds 10 --warmup --memory --profile --name my_benchmark +%%lbench - -rounds +10 - -warmup - -memory - -profile - -name +my_benchmark my_expensive_function() ``` Available options: -| Option | Short | Description | -|---|---|---| -| `--rounds N` | `-r` | Number of timed rounds (default: 5) | -| `--warmup` | `-w` | Run one un-timed warmup round first | -| `--memory` | `-m` | Track peak memory with memray | -| `--profile` | `-p` | Capture a cProfile `.prof` file | -| `--dask` | `-d` | Collect Dask metrics (task stream, memory, performance report) | -| `--collection VAR` | | Also record graph size/length from a Dask collection variable | -| `--name NAME` | `-n` | Name for this benchmark entry | +| Option | Short | Description | +|--------------------|-------|----------------------------------------------------------------| +| `--rounds N` | `-r` | Number of timed rounds (default: 5) | +| `--warmup` | `-w` | Run one un-timed warmup round first | +| `--memory` | `-m` | Track peak memory with memray | +| `--profile` | `-p` | Capture a cProfile `.prof` file | +| `--dask` | `-d` | Collect Dask metrics (task stream, memory, performance report) | +| `--collection VAR` | | Also record graph size/length from a Dask collection variable | +| `--name NAME` | `-n` | Name for this benchmark entry | ### Dask benchmarks in notebooks ```python -%%lbench --dask --rounds 3 +%%lbench - -dask - -rounds +3 my_collection.compute() # With graph stats from a named variable: -%%lbench --dask --collection src_catalog --name catalog_scan +%%lbench - -dask - -collection +src_catalog - -name +catalog_scan src_catalog.compute() ``` @@ -142,6 +156,7 @@ Or from a notebook: ```python from lbench.dashboard.app import run_dashboard + run_dashboard(port=8050) ``` @@ -150,15 +165,18 @@ Calling `run_dashboard()` again will restart the server on the new settings. ### Dashboard Features **Run browser (sidebar)** + - Lists all runs in chronological order - Filter runs by date range with the date picker - Rename runs with the pencil icon **Benchmark tables** + - Per-benchmark cards showing timing stats, memory usage, and Dask metrics - Links to open flamegraphs (cProfile) and Dask performance reports directly in the browser **Trend plots** + - Click "Plot series" to switch to the trend view - Select one or more benchmarks and a metric to plot performance over time - Error bars show standard deviation where available @@ -169,6 +187,7 @@ Calling `run_dashboard()` again will restart the server on the new settings. ```python import pytest + @pytest.mark.parametrize("size", [1000, 10000, 100000]) def test_dataframe_operation(size, lbench): import pandas as pd