diff --git a/.copier-answers.yml b/.copier-answers.yml
new file mode 100644
index 0000000..d006815
--- /dev/null
+++ b/.copier-answers.yml
@@ -0,0 +1,25 @@
+# Changes here will be overwritten by Copier
+_commit: v2.2.0
+_src_path: gh:lincc-frameworks/python-project-template
+author_email: lincc-frameworks-team@lists.lsst.org
+author_name: LINCC Frameworks
+create_example_module: false
+custom_install: custom
+enforce_style:
+- black
+failure_notification: []
+include_benchmarks: false
+include_docs: true
+include_notebooks: true
+mypy_type_checking: none
+package_name: lbench
+project_description: Benchmarking tools for LSDB
+project_license: MIT
+project_name: lbench
+project_organization: lincc-frameworks
+python_versions:
+- '3.10'
+- '3.11'
+- '3.12'
+- '3.13'
+test_lowest_version: none
diff --git a/.git_archival.txt b/.git_archival.txt
new file mode 100644
index 0000000..b1a286b
--- /dev/null
+++ b/.git_archival.txt
@@ -0,0 +1,4 @@
+node: $Format:%H$
+node-date: $Format:%cI$
+describe-name: $Format:%(describe:tags=true,match=*[0-9]*)$
+ref-names: $Format:%D$
\ No newline at end of file
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..343a755
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,24 @@
+# For explanation of this file and uses see
+# https://git-scm.com/docs/gitattributes
+# https://developer.lsst.io/git/git-lfs.html#using-git-lfs-enabled-repositories
+# https://lincc-ppt.readthedocs.io/en/latest/practices/git-lfs.html
+#
+# Used by https://github.com/lsst/afwdata.git
+# *.boost filter=lfs diff=lfs merge=lfs -text
+# *.dat filter=lfs diff=lfs merge=lfs -text
+# *.fits filter=lfs diff=lfs merge=lfs -text
+# *.gz filter=lfs diff=lfs merge=lfs -text
+#
+# apache parquet files
+# *.parq filter=lfs diff=lfs merge=lfs -text
+#
+# sqlite files
+# *.sqlite3 filter=lfs diff=lfs merge=lfs -text
+#
+# gzip files
+# *.gz filter=lfs diff=lfs merge=lfs -text
+#
+# png image files
+# *.png filter=lfs diff=lfs merge=lfs -text
+
+.git_archival.txt export-subst
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/0-general_issue.md b/.github/ISSUE_TEMPLATE/0-general_issue.md
new file mode 100644
index 0000000..84bb0d7
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/0-general_issue.md
@@ -0,0 +1,8 @@
+---
+name: General issue
+about: Quickly create a general issue
+title: ''
+labels: ''
+assignees: ''
+
+---
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/1-bug_report.md b/.github/ISSUE_TEMPLATE/1-bug_report.md
new file mode 100644
index 0000000..eaa7049
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/1-bug_report.md
@@ -0,0 +1,28 @@
+---
+name: Bug report
+about: Tell us about a problem to fix
+title: 'Short description'
+labels: 'bug'
+assignees: ''
+
+---
+**Bug report**
+
+
+**Environment Information**
+
+
+
+Traceback
+
+FILL IN YOUR STACK TRACE HERE
+
+
+
+**Before submitting**
+Please check the following:
+
+- [ ] I have described the situation in which the bug arose, including what code was executed, and any applicable data others will need to reproduce the problem.
+- [ ] I have included information about my environment, including the version of this package (e.g. `lbench.__version__`)
+- [ ] I have included available evidence of the unexpected behavior (including error messages, screenshots, and/or plots) as well as a description of what I expected instead.
+- [ ] If I have a solution in mind, I have provided an explanation and/or pseudocode and/or task list.
diff --git a/.github/ISSUE_TEMPLATE/2-feature_request.md b/.github/ISSUE_TEMPLATE/2-feature_request.md
new file mode 100644
index 0000000..908ff72
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/2-feature_request.md
@@ -0,0 +1,18 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: 'Short description'
+labels: 'enhancement'
+assignees: ''
+
+---
+
+**Feature request**
+
+
+**Before submitting**
+Please check the following:
+
+- [ ] I have described the purpose of the suggested change, specifying what I need the enhancement to accomplish, i.e. what problem it solves.
+- [ ] I have included any relevant links, screenshots, environment information, and data relevant to implementing the requested feature, as well as pseudocode for how I want to access the new functionality.
+- [ ] If I have ideas for how the new feature could be implemented, I have provided explanations and/or pseudocode and/or task lists for the steps.
diff --git a/.github/ISSUE_TEMPLATE/README.md b/.github/ISSUE_TEMPLATE/README.md
new file mode 100644
index 0000000..46dc08e
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/README.md
@@ -0,0 +1,9 @@
+# Configurations
+
+Templates for various different issue types are defined in this directory
+and a pull request template is defined as ``../pull_request_template.md``. Adding,
+removing, and modifying these templates to suit the needs of your project is encouraged.
+
+For more information about these templates, look here: https://lincc-ppt.readthedocs.io/en/latest/practices/issue_pr_templating.html
+
+Or if you still have questions contact us: https://lincc-ppt.readthedocs.io/en/latest/source/contact.html
\ No newline at end of file
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..3b5ca19
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,10 @@
+version: 2
+updates:
+ - package-ecosystem: "github-actions"
+ directory: "/"
+ schedule:
+ interval: "monthly"
+ - package-ecosystem: "pip"
+ directory: "/"
+ schedule:
+ interval: "monthly"
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 0000000..5f341fd
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,13 @@
+## Change Description
+
+
+## Solution Description
+
+
+## Code Quality
+- [ ] I have read the Contribution Guide and agree to the Code of Conduct
+- [ ] My code follows the code style of this project
+- [ ] My code builds (or compiles) cleanly without any errors or warnings
+- [ ] My code contains relevant comments and necessary documentation
diff --git a/.github/workflows/README.md b/.github/workflows/README.md
new file mode 100644
index 0000000..e34a71f
--- /dev/null
+++ b/.github/workflows/README.md
@@ -0,0 +1,9 @@
+# Workflows
+
+The .yml files in this directory are used to define the various continuous
+integration scripts that will be run on your behalf e.g. nightly as a smoke check,
+or when you create a new PR.
+
+For more information about CI and workflows, look here: https://lincc-ppt.readthedocs.io/en/latest/practices/ci.html
+
+Or if you still have questions contact us: https://lincc-ppt.readthedocs.io/en/latest/source/contact.html
\ No newline at end of file
diff --git a/.github/workflows/build-documentation.yml b/.github/workflows/build-documentation.yml
new file mode 100644
index 0000000..e848269
--- /dev/null
+++ b/.github/workflows/build-documentation.yml
@@ -0,0 +1,41 @@
+
+# This workflow will install Python dependencies, build the package and then build the documentation.
+
+name: Build documentation
+
+
+on:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ build:
+
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v6
+ - name: Set up Python 3.11
+ uses: actions/setup-python@v6
+ with:
+ python-version: '3.11'
+ - name: Install uv
+ uses: astral-sh/setup-uv@v7
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+ uv pip install --system -e .
+ if [ -f docs/requirements.txt ]; then uv pip install --system -r docs/requirements.txt; fi
+ if [ -f requirements.txt ]; then uv pip install --system -r requirements.txt; fi
+ - name: Install notebook requirements
+ run: |
+ sudo apt-get install pandoc
+ - name: Build docs
+ run: |
+ sphinx-build -T -E -b html -d docs/build/doctrees ./docs docs/build/html
diff --git a/.github/workflows/pre-commit-ci.yml b/.github/workflows/pre-commit-ci.yml
new file mode 100644
index 0000000..392feed
--- /dev/null
+++ b/.github/workflows/pre-commit-ci.yml
@@ -0,0 +1,37 @@
+
+# This workflow runs pre-commit hooks on pushes and pull requests to main
+# to enforce coding style. To ensure correct configuration, please refer to:
+# https://lincc-ppt.readthedocs.io/en/latest/practices/ci_precommit.html
+name: Run pre-commit hooks
+
+on:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+
+jobs:
+ pre-commit-ci:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v6
+ with:
+ fetch-depth: 0
+ - name: Set up Python
+ uses: actions/setup-python@v6
+ with:
+ python-version: '3.11'
+ - name: Install uv
+ uses: astral-sh/setup-uv@v7
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+ uv pip install --system .[dev]
+ if [ -f requirements.txt ]; then uv pip install --system -r requirements.txt; fi
+ - uses: pre-commit/action@v3.0.1
+ with:
+ extra_args: --all-files --verbose
+ env:
+ SKIP: "check-lincc-frameworks-template-version,no-commit-to-branch,check-added-large-files,validate-pyproject,sphinx-build,pytest-check"
+ - uses: pre-commit-ci/lite-action@v1.1.0
+ if: failure() && github.event_name == 'pull_request' && github.event.pull_request.draft == false
\ No newline at end of file
diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml
new file mode 100644
index 0000000..8bfbcbc
--- /dev/null
+++ b/.github/workflows/publish-to-pypi.yml
@@ -0,0 +1,38 @@
+
+# This workflow will upload a Python Package using Twine when a release is created
+# For more information see: https://github.com/pypa/gh-action-pypi-publish#trusted-publishing
+
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+name: Upload Python Package
+
+on:
+ release:
+ types: [published]
+
+permissions:
+ contents: read
+
+jobs:
+ deploy:
+
+ runs-on: ubuntu-latest
+ permissions:
+ id-token: write
+ steps:
+ - uses: actions/checkout@v6
+ - name: Set up Python
+ uses: actions/setup-python@v6
+ with:
+ python-version: '3.11'
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install build
+ - name: Build package
+ run: python -m build
+ - name: Publish package
+ uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/smoke-test.yml b/.github/workflows/smoke-test.yml
new file mode 100644
index 0000000..763208a
--- /dev/null
+++ b/.github/workflows/smoke-test.yml
@@ -0,0 +1,43 @@
+# This workflow will run daily at 06:45.
+# It will install Python dependencies and run tests with a variety of Python versions.
+# See documentation for help debugging smoke test issues:
+# https://lincc-ppt.readthedocs.io/en/latest/practices/ci_testing.html#version-culprit
+
+name: Unit test smoke test
+
+on:
+
+ # Runs this workflow automatically
+ schedule:
+ - cron: 45 6 * * *
+
+ # Allows you to run this workflow manually from the Actions tab
+ workflow_dispatch:
+
+jobs:
+ build:
+
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version: ['3.10', '3.11', '3.12', '3.13']
+
+ steps:
+ - uses: actions/checkout@v6
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v6
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install uv
+ uses: astral-sh/setup-uv@v7
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+ uv pip install --system -e .[dev]
+ if [ -f requirements.txt ]; then uv pip install --system -r requirements.txt; fi
+ - name: List dependencies
+ run: |
+ pip list
+ - name: Run unit tests with pytest
+ run: |
+ python -m pytest
\ No newline at end of file
diff --git a/.github/workflows/testing-and-coverage.yml b/.github/workflows/testing-and-coverage.yml
new file mode 100644
index 0000000..bb1fd39
--- /dev/null
+++ b/.github/workflows/testing-and-coverage.yml
@@ -0,0 +1,40 @@
+
+# This workflow will install Python dependencies, run tests and report code coverage with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: Unit test and code coverage
+
+on:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+
+jobs:
+ build:
+
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version: ['3.10', '3.11', '3.12', '3.13']
+
+ steps:
+ - uses: actions/checkout@v6
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v6
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install uv
+ uses: astral-sh/setup-uv@v7
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+ uv pip install --system -e .[dev]
+ if [ -f requirements.txt ]; then uv pip install --system -r requirements.txt; fi
+ - name: Run unit tests with pytest
+ run: |
+ python -m pytest --cov=lbench --cov-report=xml
+ - name: Upload coverage report to codecov
+ uses: codecov/codecov-action@v5
+ with:
+ token: ${{ secrets.CODECOV_TOKEN }}
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 796ded3..36c8240 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,154 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+_version.py
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+_readthedocs/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# vscode
+.vscode/
+
+# dask
+dask-worker-space/
+
+# tmp directory
+tmp/
+
+# Mac OS
+.DS_Store
+
+# Airspeed Velocity performance results
+_results/
+_html/
+
+# Project initialization script
+.initialize_new_project.sh
+
*.cpython*.pyc
*cprofile*.prof
*pytest-benchmark.json
@@ -7,4 +157,5 @@ __pycache__/
.idea/
.ipynb_checkpoints/
dask_performance_report*.html
+
lbench_results/
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..9f43941
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,82 @@
+
+repos:
+ # Compare the local template version to the latest remote template version
+ # This hook should always pass. It will print a message if the local version
+ # is out of date.
+ - repo: https://github.com/lincc-frameworks/pre-commit-hooks
+ rev: v0.2.2
+ hooks:
+ - id: check-lincc-frameworks-template-version
+ name: Check template version
+ description: Compare current template version against latest
+ verbose: true
+ # Clear output from jupyter notebooks so that only the input cells are committed.
+ - repo: local
+ hooks:
+ - id: jupyter-nb-clear-output
+ name: Clear output from Jupyter notebooks
+ description: Clear output from Jupyter notebooks.
+ files: \.ipynb$
+ exclude: ^docs/pre_executed
+ stages: [pre-commit]
+ language: system
+ entry: jupyter nbconvert --clear-output
+ # Prevents committing directly branches named 'main' and 'master'.
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v4.4.0
+ hooks:
+ - id: no-commit-to-branch
+ name: Prevent main branch commits
+ description: Prevent the user from committing directly to the primary branch.
+ - id: check-added-large-files
+ name: Check for large files
+ description: Prevent the user from committing very large files.
+ args: ['--maxkb=500']
+ # Verify that pyproject.toml is well formed
+ - repo: https://github.com/abravalheri/validate-pyproject
+ rev: v0.24.1
+ hooks:
+ - id: validate-pyproject
+ name: Validate pyproject.toml
+ description: Verify that pyproject.toml adheres to the established schema.
+ # Verify that GitHub workflows are well formed
+ - repo: https://github.com/python-jsonschema/check-jsonschema
+ rev: 0.28.0
+ hooks:
+ - id: check-github-workflows
+ args: ["--verbose"]
+ # Analyze the code style and report code that doesn't adhere.
+ - repo: https://github.com/psf/black
+ rev: 23.7.0
+ hooks:
+ - id: black-jupyter
+ name: Format code using black
+ types_or: [python, pyi, jupyter]
+ # It is recommended to specify the latest version of Python
+ # supported by your project here, or alternatively use
+ # pre-commit's default_language_version, see
+ # https://pre-commit.com/#top_level-default_language_version
+ language_version: python3.11
+ - repo: https://github.com/lincc-frameworks/pre-commit-hooks
+ rev: v0.2.2
+ hooks:
+ - id: pre-executed-nb-never-execute
+ name: Check pre-executed notebooks
+ files: ^docs/pre_executed/.*\.ipynb$
+ verbose: true
+ args:
+ ["docs/pre_executed/"]
+ # Run unit tests, verify that they pass. Note that coverage is run against
+ # the ./src directory here because that is what will be committed. In the
+ # github workflow script, the coverage is run against the installed package
+ # and uploaded to Codecov by calling pytest like so:
+ # `python -m pytest --cov= --cov-report=xml`
+ - repo: local
+ hooks:
+ - id: pytest-check
+ name: Run unit tests
+ description: Run unit tests with pytest.
+ entry: bash -c "if python -m pytest --co -qq; then python -m pytest --cov=./src --cov-report=html; fi"
+ language: system
+ pass_filenames: false
+ always_run: true
diff --git a/.readthedocs.yml b/.readthedocs.yml
new file mode 100644
index 0000000..3d200a7
--- /dev/null
+++ b/.readthedocs.yml
@@ -0,0 +1,24 @@
+
+# .readthedocs.yml
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+build:
+ os: ubuntu-22.04
+ tools:
+ python: "3.11"
+
+# Build documentation in the docs/ directory with Sphinx
+sphinx:
+ configuration: docs/conf.py
+
+# Optionally declare the Python requirements required to build your docs
+python:
+ install:
+ - requirements: docs/requirements.txt
+ - requirements: requirements.txt
+ - method: pip
+ path: .
diff --git a/.setup_dev.sh b/.setup_dev.sh
new file mode 100755
index 0000000..5286e41
--- /dev/null
+++ b/.setup_dev.sh
@@ -0,0 +1,51 @@
+#!/usr/bin/env bash
+
+# Bash Unofficial strict mode (http://redsymbol.net/articles/unofficial-bash-strict-mode/)
+# and (https://disconnected.systems/blog/another-bash-strict-mode/)
+set -o nounset # Any uninitialized variable is an error
+set -o errexit # Exit the script on the failure of any command to execute without error
+set -o pipefail # Fail command pipelines on the failure of any individual step
+IFS=$'\n\t' #set internal field separator to avoid iteration errors
+# Trap all exits and output something helpful
+trap 's=$?; echo "$0: Error on line "$LINENO": $BASH_COMMAND"; exit $s' ERR
+
+# This script should be run by new developers to install this package in
+# editable mode and configure their local environment
+
+echo "Checking virtual environment"
+if [ "${VIRTUAL_ENV:-missing}" = "missing" ] && [ "${CONDA_PREFIX:-missing}" = "missing" ]; then
+ echo 'No virtual environment detected: none of $VIRTUAL_ENV or $CONDA_PREFIX is set.'
+ echo
+ echo "=== This script is going to install the project in the system python environment ==="
+ echo "Proceed? [y/N]"
+ read -r RESPONCE
+ if [ "${RESPONCE}" != "y" ]; then
+ echo "See https://lincc-ppt.readthedocs.io/ for details."
+ echo "Exiting."
+ exit 1
+ fi
+
+fi
+
+echo "Checking pip version"
+MINIMUM_PIP_VERSION=22
+pipversion=( $(python -m pip --version | awk '{print $2}' | sed 's/\./\n\t/g') )
+if let "${pipversion[0]}<${MINIMUM_PIP_VERSION}"; then
+ echo "Insufficient version of pip found. Requires at least version ${MINIMUM_PIP_VERSION}."
+ echo "See https://lincc-ppt.readthedocs.io/ for details."
+ exit 1
+fi
+
+echo "Installing package and runtime dependencies in local environment"
+python -m pip install -e . > /dev/null
+
+echo "Installing developer dependencies in local environment"
+python -m pip install -e .'[dev]' > /dev/null
+if [ -f docs/requirements.txt ]; then python -m pip install -r docs/requirements.txt > /dev/null; fi
+
+echo "Installing pre-commit"
+pre-commit install > /dev/null
+
+#######################################################
+# Include any additional configurations below this line
+#######################################################
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..f96e87e
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 LINCC Frameworks
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
index be6734b..8d67a31 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,13 @@ run, and analyze benchmarks for Python projects. It provides automatic result lo
flamegraphs, Dask performance reporting, memory tracking, a Jupyter notebook magic, and a dashboard for
visualizing and comparing benchmark results over time.
+[](https://lincc-ppt.readthedocs.io/en/latest/)
+
+[](https://pypi.org/project/{{project_name}}/)
+[](https://github.com/lincc-frameworks/lsdb-benchmarking/actions/workflows/smoke-test.yml)
+[](https://codecov.io/gh/{{project_organization}}/{{project_name}})
+[](https://lbench.readthedocs.io/)
+
## Installation
```bash
@@ -62,6 +69,7 @@ pytest --lbench benchmarks/
```
This creates a timestamped result directory, runs all benchmarks, and saves:
+
- `pytest-benchmark.json` — timing stats and extra metrics
- `cprofile_*.prof` — cProfile data for each benchmark
- `dask_performance_report_*.html` — Dask performance reports (Dask benchmarks only)
@@ -88,7 +96,8 @@ so notebook results appear alongside pytest results in the dashboard.
Load the extension once per notebook:
```python
-%load_ext lbench.notebook
+%load_ext
+lbench.notebook
```
Then use the cell magic on any cell:
@@ -101,30 +110,35 @@ my_expensive_function()
With options:
```python
-%%lbench --rounds 10 --warmup --memory --profile --name my_benchmark
+%%lbench - -rounds
+10 - -warmup - -memory - -profile - -name
+my_benchmark
my_expensive_function()
```
Available options:
-| Option | Short | Description |
-|---|---|---|
-| `--rounds N` | `-r` | Number of timed rounds (default: 5) |
-| `--warmup` | `-w` | Run one un-timed warmup round first |
-| `--memory` | `-m` | Track peak memory with memray |
-| `--profile` | `-p` | Capture a cProfile `.prof` file |
-| `--dask` | `-d` | Collect Dask metrics (task stream, memory, performance report) |
-| `--collection VAR` | | Also record graph size/length from a Dask collection variable |
-| `--name NAME` | `-n` | Name for this benchmark entry |
+| Option | Short | Description |
+|--------------------|-------|----------------------------------------------------------------|
+| `--rounds N` | `-r` | Number of timed rounds (default: 5) |
+| `--warmup` | `-w` | Run one un-timed warmup round first |
+| `--memory` | `-m` | Track peak memory with memray |
+| `--profile` | `-p` | Capture a cProfile `.prof` file |
+| `--dask` | `-d` | Collect Dask metrics (task stream, memory, performance report) |
+| `--collection VAR` | | Also record graph size/length from a Dask collection variable |
+| `--name NAME` | `-n` | Name for this benchmark entry |
### Dask benchmarks in notebooks
```python
-%%lbench --dask --rounds 3
+%%lbench - -dask - -rounds
+3
my_collection.compute()
# With graph stats from a named variable:
-%%lbench --dask --collection src_catalog --name catalog_scan
+%%lbench - -dask - -collection
+src_catalog - -name
+catalog_scan
src_catalog.compute()
```
@@ -142,6 +156,7 @@ Or from a notebook:
```python
from lbench.dashboard.app import run_dashboard
+
run_dashboard(port=8050)
```
@@ -150,15 +165,18 @@ Calling `run_dashboard()` again will restart the server on the new settings.
### Dashboard Features
**Run browser (sidebar)**
+
- Lists all runs in chronological order
- Filter runs by date range with the date picker
- Rename runs with the pencil icon
**Benchmark tables**
+
- Per-benchmark cards showing timing stats, memory usage, and Dask metrics
- Links to open flamegraphs (cProfile) and Dask performance reports directly in the browser
**Trend plots**
+
- Click "Plot series" to switch to the trend view
- Select one or more benchmarks and a metric to plot performance over time
- Error bars show standard deviation where available
@@ -169,6 +187,7 @@ Calling `run_dashboard()` again will restart the server on the new settings.
```python
import pytest
+
@pytest.mark.parametrize("size", [1000, 10000, 100000])
def test_dataframe_operation(size, lbench):
import pandas as pd
diff --git a/benchmarks/basic_notebook.ipynb b/benchmarks/basic_notebook.ipynb
index 17bbbb0..f740ff0 100644
--- a/benchmarks/basic_notebook.ipynb
+++ b/benchmarks/basic_notebook.ipynb
@@ -2,14 +2,13 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"id": "initial_id",
"metadata": {
"ExecuteTime": {
"end_time": "2026-04-03T18:10:01.175489Z",
"start_time": "2026-04-03T18:10:01.133802Z"
},
- "collapsed": true,
"jupyter": {
"outputs_hidden": true
}
@@ -21,7 +20,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"id": "5b500d8e0351656c",
"metadata": {
"ExecuteTime": {
@@ -43,7 +42,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"id": "6d8fef147c0a7522",
"metadata": {
"ExecuteTime": {
@@ -58,7 +57,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"id": "e8f6f2af9dacfce5",
"metadata": {
"ExecuteTime": {
@@ -75,7 +74,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"id": "ecfe03da485592c3",
"metadata": {
"ExecuteTime": {
@@ -83,18 +82,7 @@
"start_time": "2026-04-03T18:13:33.448441Z"
}
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "1 rounds mean: 1.013 s ± 0.000 ns (min: 1.013 s, max: 1.013 s)\n",
- "dask tasks: 1 peak memory: 61.73 MiB graph nodes: 1\n",
- "perf report: /Users/smcmu/code/lsdb-benchmarking/benchmarks/lbench_results/20260403-141217/dask_performance_report_6fd91c6c-8017-4b54-b270-a944983ab961.html\n",
- "log: /Users/smcmu/code/lsdb-benchmarking/benchmarks/lbench_results/20260403-141217/pytest-benchmark.json\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"%%lbench --dask --rounds 1 --collection sleep_del\n",
"sleep_del.compute()"
@@ -102,7 +90,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"id": "f2da3bf761e96462",
"metadata": {},
"outputs": [],
@@ -112,18 +100,10 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"id": "b9deaf00-0f3e-49f5-9d0c-cd5db1092ff1",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Dash app running on http://127.0.0.1:8050/\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"run_dashboard(jupyter_mode=\"external\")"
]
diff --git a/benchmarks/conftest.py b/benchmarks/conftest.py
index fbaf3c2..3706631 100644
--- a/benchmarks/conftest.py
+++ b/benchmarks/conftest.py
@@ -6,6 +6,7 @@
import pyarrow as pa
import lsdb
+
@fixture
def catalog_local_dir() -> UPath:
root = os.environ.get("CATALOG_LOCAL_DIR", "/epyc/data3/hats/catalogs")
@@ -16,6 +17,7 @@ def catalog_local_dir() -> UPath:
def gaia_local_collection_path(catalog_local_dir: UPath) -> UPath:
return catalog_local_dir / "gaia_dr3"
+
@fixture
def gaia_local_catalog_path(gaia_local_collection_path) -> UPath:
return gaia_local_collection_path / "gaia"
@@ -30,14 +32,17 @@ def gaia_local_metadata_path(gaia_local_catalog_path) -> UPath:
def gaia_s3_collection_path() -> UPath:
return UPath("s3://stpubdata/gaia/gaia_dr3/public/hats")
+
@fixture(scope="session")
def gaia_s3_catalog_path(gaia_s3_collection_path) -> UPath:
return gaia_s3_collection_path / "gaia"
+
@fixture(scope="session")
def gaia_s3_metadata_path(gaia_s3_catalog_path) -> UPath:
return gaia_s3_catalog_path / "dataset" / "_metadata"
+
@fixture(scope="session")
def gaia_s3_dataset(gaia_s3_metadata_path) -> pyarrow.dataset.Dataset:
return pyarrow.dataset.parquet_dataset(
@@ -46,6 +51,7 @@ def gaia_s3_dataset(gaia_s3_metadata_path) -> pyarrow.dataset.Dataset:
filesystem=pa.fs.S3FileSystem(),
)
+
@fixture
def gaia_local_dataset(gaia_local_metadata_path) -> pyarrow.dataset.Dataset:
return pyarrow.dataset.parquet_dataset(
@@ -86,4 +92,5 @@ def get_lsdb_catalog(io_method, **kwargs):
else:
raise ValueError(f"Unsupported IO method: {io_method}")
return lsdb.open_catalog(path.as_uri(), **kwargs)
+
return Helpers()
diff --git a/benchmarks/test_column_mean.py b/benchmarks/test_column_mean.py
index ad770e5..58dd6bf 100644
--- a/benchmarks/test_column_mean.py
+++ b/benchmarks/test_column_mean.py
@@ -4,6 +4,7 @@
import pyarrow as pa
import nested_pandas as npd
+
def test_pyarrow_mean(gaia_collection_path, lbench):
gaia_root = gaia_collection_path / "gaia"
parquet_root = f"{gaia_root}/dataset"
@@ -11,12 +12,12 @@ def test_pyarrow_mean(gaia_collection_path, lbench):
def dataset_mean(dataset, field: str, *, use_threads: bool = True):
total_sum = None # Arrow Scalar
- total_count = 0 # Python int
+ total_count = 0 # Python int
for batch in dataset.to_batches(columns=[field], use_threads=use_threads):
col = batch.column(0)
- b_sum = pc.sum(col) # Scalar (or null if all-null)
- b_count = pc.count(col, mode="only_valid") # Int64 Scalar
+ b_sum = pc.sum(col) # Scalar (or null if all-null)
+ b_count = pc.count(col, mode="only_valid") # Int64 Scalar
if not pc.is_null(b_sum).as_py() and b_count.as_py() > 0:
total_sum = b_sum if total_sum is None else pc.add(total_sum, b_sum)
total_count += b_count.as_py()
@@ -29,15 +30,18 @@ def dataset_mean(dataset, field: str, *, use_threads: bool = True):
lbench(dataset_mean, pyarrow_ds, "phot_g_mean_mag")
+
def test_lsdb_mean(gaia_collection_path, lbench_dask):
- def catalog_mean(df, target_column=''):
- result = npd.NestedFrame({
- "sum": [df[target_column].sum()],
- "count": [len(df)],
- })
+ def catalog_mean(df, target_column=""):
+ result = npd.NestedFrame(
+ {
+ "sum": [df[target_column].sum()],
+ "count": [len(df)],
+ }
+ )
return result
- lsdb_gaia = lsdb.open_catalog(gaia_collection_path, columns=['phot_g_mean_mag'])
+ lsdb_gaia = lsdb.open_catalog(gaia_collection_path, columns=["phot_g_mean_mag"])
unrealized = lsdb_gaia.map_partitions(
catalog_mean,
target_column="phot_g_mean_mag",
@@ -45,4 +49,5 @@ def catalog_mean(df, target_column=''):
def compute_mean():
result = unrealized.compute()
+
lbench_dask(compute_mean)
diff --git a/benchmarks/test_crossmatch.py b/benchmarks/test_crossmatch.py
index c64c1e4..f656422 100644
--- a/benchmarks/test_crossmatch.py
+++ b/benchmarks/test_crossmatch.py
@@ -27,9 +27,7 @@ def test_crossmatch(lbench, catalog_local_dir):
# Size (memory) of Gaia margin pixel: 87.9 MiB
gaia_margin_path = catalog_local_dir / "gaia_dr3" / "gaia_300arcs"
- gaia_margin_part = npd.read_parquet(
- pixel_catalog_file(gaia_margin_path, gaia_pixel)
- )
+ gaia_margin_part = npd.read_parquet(pixel_catalog_file(gaia_margin_path, gaia_pixel))
gaia_margin = lsdb.read_hats(gaia_margin_path)
algorithm = KdTreeCrossmatch()
@@ -65,4 +63,4 @@ def crossmatch():
meta_df=meta_df,
)
- lbench(crossmatch)
\ No newline at end of file
+ lbench(crossmatch)
diff --git a/benchmarks/test_io.py b/benchmarks/test_io.py
index 05d6919..c586fbf 100644
--- a/benchmarks/test_io.py
+++ b/benchmarks/test_io.py
@@ -5,49 +5,62 @@
COLUMN_CONFIGS = [
["source_id", "ra", "dec"],
- ["source_id", "ra", "dec", "ra_error", "dec_error", "parallax", "pm", "designation", "phot_g_mean_mag", "phot_bp_mean_mag"],
+ [
+ "source_id",
+ "ra",
+ "dec",
+ "ra_error",
+ "dec_error",
+ "parallax",
+ "pm",
+ "designation",
+ "phot_g_mean_mag",
+ "phot_bp_mean_mag",
+ ],
]
-@pytest.mark.parametrize(
- "columns",
- COLUMN_CONFIGS,
- ids=["3col", "10col"]
-)
-@pytest.mark.parametrize(
- "io_method",
- ["s3", "local"],
- ids=["s3", "local"]
-)
+@pytest.mark.parametrize("columns", COLUMN_CONFIGS, ids=["3col", "10col"])
+@pytest.mark.parametrize("io_method", ["s3", "local"], ids=["s3", "local"])
class TestLsdbIO:
def test_pyarrow_single_partition(self, columns, io_method, lbench, helpers):
dataset = helpers.get_pyarrow_dataset(io_method)
frag = list(dataset.get_fragments())[0]
+
def load_partition():
df = frag.to_table(columns=columns).to_pandas()
+
lbench(load_partition)
def test_lsdb_single_partition(self, columns, io_method, lbench_dask, helpers):
catalog = helpers.get_lsdb_catalog(io_method, columns=columns)
partition = catalog.partitions[0]
+
def load_partition():
df = partition.compute()
+
lbench_dask(load_partition)
def test_pyarrow_multi_partition(self, columns, io_method, lbench, helpers):
dataset = helpers.get_pyarrow_dataset(io_method)
frag = list(dataset.get_fragments())[:10]
paths = [f.path for f in frag]
- ds = pyarrow.dataset.dataset(paths, format="parquet", schema=dataset.schema, filesystem=dataset.filesystem)
+ ds = pyarrow.dataset.dataset(
+ paths, format="parquet", schema=dataset.schema, filesystem=dataset.filesystem
+ )
+
def load_partition():
df = ds.to_table(columns=columns).to_pandas()
+
lbench(load_partition)
def test_lsdb_multi_partition(self, columns, io_method, lbench_dask, helpers):
catalog = helpers.get_lsdb_catalog(io_method, columns=columns)
partition = catalog.partitions[0:10]
+
def load_partition():
df = partition.compute()
+
lbench_dask(load_partition)
def test_pyarrow_filtered_query(self, columns, io_method, lbench, helpers):
@@ -67,7 +80,9 @@ def load_partition():
lbench(load_partition)
def test_lsdb_filtered_query(self, columns, io_method, lbench_dask, helpers):
- catalog = helpers.get_lsdb_catalog(io_method, columns=columns, filters=[("ra", ">", 45.0), ("ra", "<", 46.0)])
+ catalog = helpers.get_lsdb_catalog(
+ io_method, columns=columns, filters=[("ra", ">", 45.0), ("ra", "<", 46.0)]
+ )
partition = catalog.partitions[0:10]
def load_partition():
@@ -76,13 +91,10 @@ def load_partition():
lbench_dask(load_partition)
def test_lsdb_cone_search(self, columns, io_method, lbench_dask, helpers):
- catalog = helpers.get_lsdb_catalog(
- io_method, columns=columns
- )
+ catalog = helpers.get_lsdb_catalog(io_method, columns=columns)
partition = catalog.cone_search(ra=45.5, dec=0.0, radius_arcsec=1800.0)
def load_partition():
df = partition.compute()
lbench_dask(load_partition)
-
diff --git a/benchmarks/test_local_io.py b/benchmarks/test_local_io.py
index 7e43261..09423bc 100644
--- a/benchmarks/test_local_io.py
+++ b/benchmarks/test_local_io.py
@@ -5,9 +5,7 @@
import pytest
-def test_local_catalog_partition_read(
- gaia_collection_path, lbench_dask
-):
+def test_local_catalog_partition_read(gaia_collection_path, lbench_dask):
gaia = lsdb.read_hats(gaia_collection_path)
cat = gaia.partitions[0]
@@ -20,9 +18,7 @@ def load_partition():
def test_local_catalog_npd_read(gaia_collection_path, lbench):
gaia = lsdb.read_hats(gaia_collection_path)
partition_0_pixel = gaia.partitions[0].get_healpix_pixels()[0]
- partition_0_path = hats.io.paths.pixel_catalog_file(
- gaia.hc_structure.catalog_base_dir, partition_0_pixel
- )
+ partition_0_path = hats.io.paths.pixel_catalog_file(gaia.hc_structure.catalog_base_dir, partition_0_pixel)
def load_partition_npd():
npd.read_parquet(partition_0_path)
@@ -33,9 +29,7 @@ def load_partition_npd():
def test_local_catalog_pd_read(gaia_collection_path, lbench):
gaia = lsdb.read_hats(gaia_collection_path)
partition_0_pixel = gaia.partitions[0].get_healpix_pixels()[0]
- partition_0_path = hats.io.paths.pixel_catalog_file(
- gaia.hc_structure.catalog_base_dir, partition_0_pixel
- )
+ partition_0_path = hats.io.paths.pixel_catalog_file(gaia.hc_structure.catalog_base_dir, partition_0_pixel)
def load_partition_pd():
pd.read_parquet(partition_0_path)
@@ -44,9 +38,7 @@ def load_partition_pd():
@pytest.mark.benchmark(min_rounds=1)
-def test_local_catalog_multi_partition_read(
- gaia_collection_path, lbench_dask
-):
+def test_local_catalog_multi_partition_read(gaia_collection_path, lbench_dask):
gaia = lsdb.read_hats(gaia_collection_path)
n_partitions = 10
cat = gaia.partitions[:n_partitions]
@@ -65,9 +57,7 @@ def test_local_catalog_multi_partition_npd_read(gaia_collection_path, lbench):
for i in range(n_partitions):
partition = gaia.partitions[i]
partition_pixel = partition.get_healpix_pixels()[0]
- partition_path = hats.io.paths.pixel_catalog_file(
- gaia.hc_structure.catalog_base_dir, partition_pixel
- )
+ partition_path = hats.io.paths.pixel_catalog_file(gaia.hc_structure.catalog_base_dir, partition_pixel)
partition_paths.append(partition_path)
def load_partitions_npd():
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..a5622f1
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,31 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS ?= -T -E -d _build/doctrees -D language=en
+EXCLUDENB ?= -D exclude_patterns="notebooks/*","_build","**.ipynb_checkpoints"
+SPHINXBUILD ?= sphinx-build
+SOURCEDIR = .
+BUILDDIR = ../_readthedocs/
+
+.PHONY: help clean Makefile no-nb no-notebooks
+
+# Put it first so that "make" without argument is like "make help".
+help:
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+# Build all Sphinx docs locally, except the notebooks
+no-nb no-notebooks:
+ @$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(EXCLUDENB) $(O)
+
+# Cleans up files generated by the build process
+clean:
+ rm -r "_build/doctrees"
+ rm -r "$(BUILDDIR)"
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..8adaec8
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,58 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+
+import os
+import sys
+from importlib.metadata import version
+
+# Define path to the code to be documented **relative to where conf.py (this file) is kept**
+sys.path.insert(0, os.path.abspath("../src/"))
+
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
+
+project = "lbench"
+copyright = "2025, LINCC Frameworks"
+author = "LINCC Frameworks"
+release = version("lbench")
+# for example take major/minor
+version = ".".join(release.split(".")[:2])
+
+# -- General configuration ---------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
+
+extensions = ["sphinx.ext.mathjax", "sphinx.ext.napoleon", "sphinx.ext.viewcode"]
+
+extensions.append("autoapi.extension")
+extensions.append("nbsphinx")
+
+# -- sphinx-copybutton configuration ----------------------------------------
+extensions.append("sphinx_copybutton")
+## sets up the expected prompt text from console blocks, and excludes it from
+## the text that goes into the clipboard.
+copybutton_exclude = ".linenos, .gp"
+copybutton_prompt_text = ">> "
+
+## lets us suppress the copy button on select code blocks.
+copybutton_selector = "div:not(.no-copybutton) > div.highlight > pre"
+
+templates_path = []
+exclude_patterns = ["_build", "**.ipynb_checkpoints"]
+
+# This assumes that sphinx-build is called from the root directory
+master_doc = "index"
+# Remove 'view source code' from top of page (for html, not python)
+html_show_sourcelink = False
+# Remove namespaces from class/method signatures
+add_module_names = False
+
+autoapi_type = "python"
+autoapi_dirs = ["../src"]
+autoapi_ignore = ["*/__main__.py", "*/_version.py"]
+autoapi_add_toc_tree_entry = False
+autoapi_member_order = "bysource"
+
+html_theme = "sphinx_rtd_theme"
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..390012a
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,53 @@
+
+.. lbench documentation main file.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+Welcome to lbench's documentation!
+========================================================================================
+
+Benchmarking tools for LSDB
+
+Dev Guide - Getting Started
+---------------------------
+
+Before installing any dependencies or writing code, it's a great idea to create a
+virtual environment. LINCC-Frameworks engineers primarily use `conda` to manage virtual
+environments. If you have conda installed locally, you can run the following to
+create and activate a new environment.
+
+.. code-block:: console
+
+ >> conda create env -n python=3.11
+ >> conda activate
+
+
+Once you have created a new environment, you can install this project for local
+development using the following commands:
+
+.. code-block:: console
+
+ >> pip install -e .'[dev]'
+ >> pre-commit install
+ >> conda install pandoc
+
+
+Notes:
+
+1) The single quotes around ``'[dev]'`` may not be required for your operating system.
+2) ``pre-commit install`` will initialize pre-commit for this local repository, so
+ that a set of tests will be run prior to completing a local commit. For more
+ information, see the Python Project Template documentation on
+ `pre-commit `_.
+3) Installing ``pandoc`` allows you to verify that automatic rendering of Jupyter notebooks
+ into documentation for ReadTheDocs works as expected. For more information, see
+ the Python Project Template documentation on
+ `Sphinx and Python Notebooks `_.
+
+
+.. toctree::
+ :hidden:
+
+ Home page
+ API Reference
+ Notebooks
diff --git a/docs/notebooks.rst b/docs/notebooks.rst
new file mode 100644
index 0000000..7f7e544
--- /dev/null
+++ b/docs/notebooks.rst
@@ -0,0 +1,6 @@
+Notebooks
+========================================================================================
+
+.. toctree::
+
+ Introducing Jupyter Notebooks
diff --git a/docs/notebooks/README.md b/docs/notebooks/README.md
new file mode 100644
index 0000000..2b4fb45
--- /dev/null
+++ b/docs/notebooks/README.md
@@ -0,0 +1,25 @@
+# Jupyter notebooks to run on-demand.
+
+Jupyter notebooks in this directory will be run each time you render your documentation.
+
+This means they should be able to be run with the resources in the repo, and in various environments:
+
+- any other developer's machine
+- github CI runners
+- ReadTheDocs doc generation
+
+This is great for notebooks that can run in a few minutes, on smaller datasets.
+
+If you would like to include these notebooks in automatically generated documentation
+simply add the notebook name to the ``../notebooks.rst`` file, and include a markdown
+cell at the beginning of your notebook with ``# Title`` that will be used as the text
+in the table of contents in the documentation.
+
+Be aware that you may also need to update the ``../requirements.txt`` file if
+your notebooks have dependencies that are not specified in ``../pyproject.toml``.
+
+For notebooks that require large datasets, access to third party APIs, large CPU or GPU requirements, put them in `./pre_executed` instead.
+
+For more information look here: https://lincc-ppt.readthedocs.io/en/latest/practices/sphinx.html#python-notebooks
+
+Or if you still have questions contact us: https://lincc-ppt.readthedocs.io/en/latest/source/contact.html
\ No newline at end of file
diff --git a/docs/notebooks/intro_notebook.ipynb b/docs/notebooks/intro_notebook.ipynb
new file mode 100644
index 0000000..0589b29
--- /dev/null
+++ b/docs/notebooks/intro_notebook.ipynb
@@ -0,0 +1,84 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "textblock1",
+ "metadata": {
+ "cell_marker": "\"\"\""
+ },
+ "source": [
+ "# Introducing Jupyter Notebooks in Sphinx\n",
+ "\n",
+ "This notebook showcases very basic functionality of rendering your jupyter notebooks as tutorials inside your sphinx documentation.\n",
+ "\n",
+ "As part of the LINCC Frameworks python project template, your notebooks will be executed AND rendered at document build time.\n",
+ "\n",
+ "You can read more about Sphinx, ReadTheDocs, and building notebooks in [LINCC's documentation](https://lincc-ppt.readthedocs.io/en/latest/practices/sphinx.html)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "codeblock1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def sierpinsky(order):\n",
+ " \"\"\"Define a method that will create a Sierpinsky triangle of given order,\n",
+ " and will print it out.\"\"\"\n",
+ " triangles = [\"*\"]\n",
+ " for i in range(order):\n",
+ " spaces = \" \" * (2**i)\n",
+ " triangles = [spaces + triangle + spaces for triangle in triangles] + [\n",
+ " triangle + \" \" + triangle for triangle in triangles\n",
+ " ]\n",
+ " print(f\"Printing order {order} triangle\")\n",
+ " print(\"\\n\".join(triangles))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "textblock2",
+ "metadata": {
+ "cell_marker": "\"\"\"",
+ "lines_to_next_cell": 1
+ },
+ "source": [
+ "Then, call our method a few times. This will happen on the fly during notebook rendering."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "codeblock2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for order in range(3):\n",
+ " sierpinsky(order)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "codeblock3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sierpinsky(4)"
+ ]
+ }
+ ],
+ "metadata": {
+ "jupytext": {
+ "cell_markers": "\"\"\""
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/pre_executed/README.md b/docs/pre_executed/README.md
new file mode 100644
index 0000000..fb3cc7c
--- /dev/null
+++ b/docs/pre_executed/README.md
@@ -0,0 +1,16 @@
+# Pre-executed Jupyter notebooks
+
+Jupyter notebooks in this directory will NOT be run in the docs workflows, and will be rendered with
+the provided output cells as-is.
+
+This is useful for notebooks that require large datasets, access to third party APIs, large CPU or GPU requirements.
+
+Where possible, instead write smaller notebooks that can be run as part of a github worker, and within the ReadTheDocs rendering process.
+
+To ensure that the notebooks are not run by the notebook conversion process, you can add the following metadata block to the notebook:
+
+```
+ "nbsphinx": {
+ "execute": "never"
+ },
+```
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000..423ef3f
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,10 @@
+
+ipykernel
+ipython
+jupytext
+nbconvert
+nbsphinx
+sphinx
+sphinx-autoapi
+sphinx-copybutton
+sphinx-rtd-theme>=3.0
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 4fa5e9f..f8fae32 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,10 +1,26 @@
[project]
name = "lbench"
-version = "0.0.1"
+license = "MIT"
+license-files = ["LICENSE"]
+readme = "README.md"
+authors = [
+ { name = "LINCC Frameworks", email = "lincc-frameworks-team@lists.lsst.org" }
+]
+description = "Benchmarking tools for LSDB"
+classifiers = [
+ "Development Status :: 4 - Beta",
+ "Intended Audience :: Developers",
+ "Intended Audience :: Science/Research",
+ "Operating System :: OS Independent",
+ "Programming Language :: Python",
+]
+dynamic = ["version"]
+requires-python = ">=3.10"
dependencies = [
"pytest",
"pytest-benchmark",
"typer",
+ "dask[complete]",
"dash",
"dash-bootstrap-components",
"pandas",
@@ -12,8 +28,23 @@ dependencies = [
"memray"
]
+[project.urls]
+"Source Code" = "https://github.com/lincc-frameworks/lbench"
+
+# On a mac, install optional dependencies with `pip install '.[dev]'` (include the single quotes)
+[project.optional-dependencies]
+dev = [
+ "black", # Used for static linting of files
+ "jupyter", # Clears output from Jupyter notebooks
+ "pre-commit", # Used to run checks before finalizing a git commit
+ "pytest-cov", # Used to report total code coverage
+]
+
[build-system]
-requires = ["setuptools"]
+requires = [
+ "setuptools>=62", # Used to build and package the Python project
+ "setuptools_scm>=6.2", # Gets release version from git. Makes it available programmatically
+]
build-backend = "setuptools.build_meta"
[project.scripts]
@@ -22,7 +53,23 @@ lbench = "lbench.cli.lbench:app"
[project.entry-points.pytest11]
lbench = "lbench.pytest"
+[tool.setuptools_scm]
+write_to = "src/lbench/_version.py"
+
[tool.pytest.ini_options]
markers = [
"lbench_memory: mark test to capture memory usage"
-]
\ No newline at end of file
+]
+testpaths = [
+ "tests",
+ "src",
+ "docs",
+]
+addopts = "--doctest-modules --doctest-glob=*.rst"
+
+[tool.black]
+line-length = 110
+
+
+[tool.coverage.run]
+omit = ["src/lbench/_version.py"]
diff --git a/src/lbench/__init__.py b/src/lbench/__init__.py
index 176dace..ecafe60 100644
--- a/src/lbench/__init__.py
+++ b/src/lbench/__init__.py
@@ -1,3 +1,4 @@
from .dashboard.app import run_dashboard
+from ._version import __version__
-__all__ = ["run_dashboard"]
+__all__ = ["run_dashboard", "__version__"]
diff --git a/src/lbench/cli/env.py b/src/lbench/cli/env.py
index 697c52a..f227e14 100644
--- a/src/lbench/cli/env.py
+++ b/src/lbench/cli/env.py
@@ -3,6 +3,7 @@
ROOT_DIR_ENV_VAR = "LBENCH_ROOT"
+
def get_lbench_root_dir() -> Path:
"""
Resolve the lbench root directory.
diff --git a/src/lbench/cli/lbench.py b/src/lbench/cli/lbench.py
index 4da7901..1cb877d 100644
--- a/src/lbench/cli/lbench.py
+++ b/src/lbench/cli/lbench.py
@@ -4,6 +4,7 @@
app = typer.Typer(help="lbench CLI — run benchmarks and dashboards")
+
@app.command()
def dash(port: int = 8050):
"""Run the lbench dashboard."""
diff --git a/src/lbench/dashboard/app.py b/src/lbench/dashboard/app.py
index e745c22..376494f 100644
--- a/src/lbench/dashboard/app.py
+++ b/src/lbench/dashboard/app.py
@@ -18,7 +18,7 @@
__name__,
external_stylesheets=[
dbc.themes.FLATLY,
- "https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.1/font/bootstrap-icons.css"
+ "https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.1/font/bootstrap-icons.css",
],
)
@@ -56,11 +56,7 @@ def serve_flamegraph(run_name, filename):
data = read(str(prof_file))
html_content = render(data, prof_file.name)
- html_content = re.sub(
- r'src="static/(.*?)"', r'src="/tuna_web/static/\1"', html_content
- )
- html_content = re.sub(
- r'href="static/(.*?)"', r'href="/tuna_web/static/\1"', html_content
- )
+ html_content = re.sub(r'src="static/(.*?)"', r'src="/tuna_web/static/\1"', html_content)
+ html_content = re.sub(r'href="static/(.*?)"', r'href="/tuna_web/static/\1"', html_content)
return Response(html_content, mimetype="text/html")
diff --git a/src/lbench/dashboard/context.py b/src/lbench/dashboard/context.py
index 434fc0b..f041519 100644
--- a/src/lbench/dashboard/context.py
+++ b/src/lbench/dashboard/context.py
@@ -7,12 +7,13 @@
from lbench.dashboard.metrics.benchmark_collection import BenchmarkCollection
from lbench.dashboard.metrics.groups import stats_group, execution_group, dask_group, profiling_group
-"""Registry for available metrics"""
+# Registry for available metrics — constant, built once at startup
registry = MetricRegistry()
for group in [stats_group, execution_group, dask_group, profiling_group]:
registry.register_group(group)
-"""Load information about runs"""
+# Root directory where benchmark runs are stored — constant
+ROOT_DIR = get_lbench_root_dir()
def load_run_json(run_dir):
@@ -39,58 +40,34 @@ def load_all_runs(root_dir):
return dict(sorted(runs.items(), key=lambda kv: kv[1].get("datetime", ""), reverse=True))
+def get_collection(run_data: dict) -> BenchmarkCollection:
+ """Build a BenchmarkCollection from raw run data (e.g. from run-data-store)."""
+ return BenchmarkCollection(run_data or {}, registry)
+
+
def rename_run(old_name, new_name):
"""Rename a benchmark run folder.
- Args:
- old_name: Current folder name
- new_name: New folder name
-
Returns:
- tuple: (success: bool, message: str, new_run_data: dict, new_collection: BenchmarkCollection)
+ tuple: (success: bool, message: str, new_run_data: dict)
"""
- global RUN_DATA, BENCHMARK_COLLECTION
-
- # Validate names
if not old_name or not new_name:
- return False, "Names cannot be empty", RUN_DATA, BENCHMARK_COLLECTION
+ return False, "Names cannot be empty", None
if old_name == new_name:
- return False, "New name is the same as old name", RUN_DATA, BENCHMARK_COLLECTION
+ return False, "New name is the same as old name", None
old_path = ROOT_DIR / old_name
new_path = ROOT_DIR / new_name
- # Check if old path exists
if not old_path.exists():
- return False, f"Run '{old_name}' not found", RUN_DATA, BENCHMARK_COLLECTION
+ return False, f"Run '{old_name}' not found", None
- # Check if new path already exists
if new_path.exists():
- return False, f"Run '{new_name}' already exists", RUN_DATA, BENCHMARK_COLLECTION
+ return False, f"Run '{new_name}' already exists", None
try:
- # Rename the folder
shutil.move(str(old_path), str(new_path))
-
- # Reload all run data
- new_run_data = load_all_runs(ROOT_DIR)
- new_collection = BenchmarkCollection(new_run_data, registry)
-
- # Update globals
- RUN_DATA = new_run_data
- BENCHMARK_COLLECTION = new_collection
-
- return True, f"Successfully renamed '{old_name}' to '{new_name}'", new_run_data, new_collection
+ return True, f"Successfully renamed '{old_name}' to '{new_name}'", load_all_runs(ROOT_DIR)
except Exception as e:
- return False, f"Error renaming run: {str(e)}", RUN_DATA, BENCHMARK_COLLECTION
-
-
-# Root directory where benchmark runs are stored
-ROOT_DIR = get_lbench_root_dir()
-
-# Global run data (needs to be defined before importing pages)
-RUN_DATA = load_all_runs(ROOT_DIR)
-
-# Initialize metrics collection
-BENCHMARK_COLLECTION = BenchmarkCollection(RUN_DATA, registry)
+ return False, f"Error renaming run: {str(e)}", None
diff --git a/src/lbench/dashboard/layout.py b/src/lbench/dashboard/layout.py
index d3a7d55..4738163 100644
--- a/src/lbench/dashboard/layout.py
+++ b/src/lbench/dashboard/layout.py
@@ -1,11 +1,12 @@
from dash import html, dcc, Input, Output, callback
import dash_bootstrap_components as dbc
-from lbench.dashboard.context import RUN_DATA
+from lbench.dashboard.context import load_all_runs, ROOT_DIR
from lbench.dashboard.layouts.sidebar import sidebar_panel, rename_modal
from lbench.dashboard.layouts.tables import tables_panel
from lbench.dashboard.layouts.trends import trends_panel
+
def _navbar():
return dbc.NavbarSimple(
brand="lbench Dashboard",
@@ -17,10 +18,13 @@ def _navbar():
fluid=True,
)
+
def _container():
- return dbc.Container([
+ return dbc.Container(
+ [
+ dcc.Location(id="url", refresh=False),
dcc.Store(id="date-filter-store", data={}),
- dcc.Store(id="run-data-store", data=RUN_DATA),
+ dcc.Store(id="run-data-store", data={}),
dcc.Store(id="rename-old-name", data=""),
dcc.Store(id="right-panel-view", data="tables"),
rename_modal(),
@@ -38,18 +42,33 @@ def _container():
],
fluid=True,
style={
- "flex": "1", "overflow": "hidden",
- "paddingLeft": "1em", "paddingRight": "1em",
- "paddingTop": "0", "paddingBottom": "0",
- "display": "flex", "flexDirection": "column",
- }
+ "flex": "1",
+ "overflow": "hidden",
+ "paddingLeft": "1em",
+ "paddingRight": "1em",
+ "paddingTop": "0",
+ "paddingBottom": "0",
+ "display": "flex",
+ "flexDirection": "column",
+ },
)
+
layout = html.Div(
[_navbar(), _container()],
style={"height": "100vh", "overflow": "hidden", "display": "flex", "flexDirection": "column"},
)
+
+@callback(
+ Output("run-data-store", "data", allow_duplicate=True),
+ Input("url", "pathname"),
+ prevent_initial_call="initial_duplicate",
+)
+def reload_on_page_load(_pathname):
+ return load_all_runs(ROOT_DIR)
+
+
@callback(
Output("tables-view", "style"),
Output("trends-view", "style"),
diff --git a/src/lbench/dashboard/layouts/sidebar.py b/src/lbench/dashboard/layouts/sidebar.py
index 00a95cb..5c1d6ac 100644
--- a/src/lbench/dashboard/layouts/sidebar.py
+++ b/src/lbench/dashboard/layouts/sidebar.py
@@ -3,7 +3,7 @@
from dash import html, Input, Output, State, dcc, callback, no_update
import dash_bootstrap_components as dbc
-from lbench.dashboard.context import RUN_DATA, rename_run
+from lbench.dashboard.context import rename_run
from lbench.dashboard.layouts.tables import benchmarks_to_tables
@@ -106,21 +106,32 @@ def sidebar_panel():
),
html.Div(
[
- dbc.Button("Apply", id="apply-filter-btn", color="primary", size="sm",
- className="me-1"),
+ dbc.Button(
+ "Apply", id="apply-filter-btn", color="primary", size="sm", className="me-1"
+ ),
dbc.Button("Clear", id="clear-filter-btn", color="secondary", size="sm"),
- dbc.Button("Plot series", id="plot-range-btn", color="success", size="sm",
- style={"marginLeft": "auto"}),
+ dbc.Button(
+ "Plot series",
+ id="plot-range-btn",
+ color="success",
+ size="sm",
+ style={"marginLeft": "auto"},
+ ),
],
style={"marginTop": "10px", "display": "flex"},
),
],
- style={"borderTop": "1px solid #ccc", "padding": "1em 0", "flexShrink": "0",
- "position": "relative", "zIndex": 10},
+ style={
+ "borderTop": "1px solid #ccc",
+ "padding": "1em 0",
+ "flexShrink": "0",
+ "position": "relative",
+ "zIndex": 10,
+ },
),
html.Div(
id="sidebar-container",
- children=create_sidebar(RUN_DATA),
+ children=create_sidebar({}),
style={"overflowY": "auto", "flex": "1", "minHeight": "0"},
),
],
@@ -137,6 +148,7 @@ def sidebar_panel():
# --- Date filter ---
+
@callback(
Output("date-filter-store", "data"),
Input("apply-filter-btn", "n_clicks"),
@@ -168,6 +180,7 @@ def sync_date_picker(date_filter, _run_data):
# --- Benchmark tables + sidebar ---
+
@callback(
Output("benchmark-tables-container", "children"),
Output("sidebar-container", "children"),
@@ -179,11 +192,17 @@ def update_benchmarks_and_sidebar(n_clicks_list, run_data, date_filter):
triggered = dash.ctx.triggered_id
def placeholder(msg):
- return html.Div(msg, style={
- "height": "100%", "display": "flex",
- "alignItems": "center", "justifyContent": "center",
- "color": "#888", "fontSize": "1.1rem",
- })
+ return html.Div(
+ msg,
+ style={
+ "height": "100%",
+ "display": "flex",
+ "alignItems": "center",
+ "justifyContent": "center",
+ "color": "#888",
+ "fontSize": "1.1rem",
+ },
+ )
if not run_data or not isinstance(run_data, dict):
return placeholder("No run data found"), create_sidebar({})
@@ -197,14 +216,16 @@ def placeholder(msg):
idx = triggered.get("index")
if idx is not None:
run_name = list(filtered_run_data.keys())[idx]
- return benchmarks_to_tables(run_name, run_data[run_name]), create_sidebar(filtered_run_data,
- active_run=run_name)
+ return benchmarks_to_tables(run_name, run_data[run_name]), create_sidebar(
+ filtered_run_data, active_run=run_name
+ )
return placeholder("Select a run from the sidebar or plot series"), create_sidebar(filtered_run_data)
# --- Panel switching ---
+
@callback(
Output("right-panel-view", "data"),
Input("plot-range-btn", "n_clicks"),
@@ -225,6 +246,7 @@ def show_tables(_):
# --- Rename ---
+
@callback(
Output("rename-modal", "is_open"),
Output("rename-input", "value"),
@@ -256,7 +278,7 @@ def handle_rename(edit_clicks, cancel_clicks, confirm_clicks, old_name, new_name
return False, "", "", "", no_update
if triggered_id == "rename-confirm-btn" and confirm_clicks:
- success, message, new_run_data, _ = rename_run(old_name, new_name)
+ success, message, new_run_data = rename_run(old_name, new_name)
if success:
return False, "", "", "", new_run_data
return no_update, no_update, no_update, message, no_update
diff --git a/src/lbench/dashboard/layouts/tables.py b/src/lbench/dashboard/layouts/tables.py
index a35993d..34517da 100644
--- a/src/lbench/dashboard/layouts/tables.py
+++ b/src/lbench/dashboard/layouts/tables.py
@@ -15,6 +15,7 @@ def tables_panel():
def _fmt_run_datetime(dt_str: str) -> str:
from datetime import datetime
+
try:
dt = datetime.fromisoformat(dt_str)
local_dt = dt.astimezone().replace(tzinfo=None)
@@ -30,7 +31,9 @@ def benchmark_to_table(bm, run_name, run_datetime=None):
html.Span(
run_datetime,
style={"fontSize": "0.8em", "color": "#888", "marginLeft": "1em", "fontWeight": "normal"},
- ) if run_datetime else None,
+ )
+ if run_datetime
+ else None,
],
style={"display": "flex", "justifyContent": "space-between", "alignItems": "baseline"},
)
diff --git a/src/lbench/dashboard/layouts/trends.py b/src/lbench/dashboard/layouts/trends.py
index 152b226..333c0d7 100644
--- a/src/lbench/dashboard/layouts/trends.py
+++ b/src/lbench/dashboard/layouts/trends.py
@@ -3,7 +3,7 @@
from dash import dcc, html, Input, Output, callback
import dash_bootstrap_components as dbc
-from lbench.dashboard.context import registry, BENCHMARK_COLLECTION
+from lbench.dashboard.context import registry, get_collection
def trends_panel():
@@ -17,7 +17,7 @@ def trends_panel():
html.Label("Select benchmarks:", className="fw-bold"),
dcc.Dropdown(
id="benchmark-selector",
- options=[{"label": b, "value": b} for b in BENCHMARK_COLLECTION.get_benchmark_names()],
+ options=[],
placeholder="Select one or more benchmarks",
multi=True,
),
@@ -26,20 +26,57 @@ def trends_panel():
),
dbc.Col(
[
- html.Label("Select metric:", className="fw-bold"),
+ html.Label("Select metrics:", className="fw-bold"),
dcc.Dropdown(
id="metric-selector",
- options=[{"label": m.display_name, "value": m.name} for m in BENCHMARK_COLLECTION.get_common_metrics()],
- value="mean",
- placeholder="Select a metric",
+ options=[],
+ value=["mean"],
+ placeholder="Select one or more metrics",
+ multi=True,
),
],
- width=6,
+ width=4,
+ ),
+ dbc.Col(
+ [
+ html.Label("Chart type:", className="fw-bold"),
+ dbc.RadioItems(
+ id="chart-type-selector",
+ options=[
+ {"label": "Line", "value": "line"},
+ {"label": "Bar", "value": "bar"},
+ ],
+ value="line",
+ inline=True,
+ ),
+ ],
+ width=2,
+ className="d-flex flex-column justify-content-start",
),
],
className="mb-3",
style={"flexShrink": "0"},
),
+ dbc.Row(
+ [
+ dbc.Col(
+ [
+ html.Label("Select runs:", className="fw-bold"),
+ dcc.Dropdown(
+ id="bar-run-selector",
+ options=[],
+ value=[],
+ multi=True,
+ placeholder="Select runs to include",
+ ),
+ ],
+ width=12,
+ ),
+ ],
+ id="bar-run-selector-row",
+ className="mb-3",
+ style={"display": "none", "flexShrink": "0"},
+ ),
dcc.Graph(
id="trend-plot",
figure={"layout": {"title": "Select a benchmark and metric to view trends"}},
@@ -57,7 +94,9 @@ def _apply_date_filter(df: pd.DataFrame, date_filter: dict) -> pd.DataFrame:
end_raw = date_filter.get("end_date")
if not start_raw and not end_raw:
return df
- timestamps = df["timestamp"].dt.tz_localize(None) if df["timestamp"].dt.tz is not None else df["timestamp"]
+ timestamps = (
+ df["timestamp"].dt.tz_localize(None) if df["timestamp"].dt.tz is not None else df["timestamp"]
+ )
mask = pd.Series(True, index=df.index)
if start_raw:
mask &= timestamps >= pd.to_datetime(start_raw)
@@ -66,66 +105,188 @@ def _apply_date_filter(df: pd.DataFrame, date_filter: dict) -> pd.DataFrame:
return df[mask]
+@callback(
+ Output("benchmark-selector", "options"),
+ Output("metric-selector", "options"),
+ Output("bar-run-selector", "options"),
+ Output("bar-run-selector", "value"),
+ Input("run-data-store", "data"),
+)
+def refresh_trend_options(run_data):
+ collection = get_collection(run_data)
+ benchmark_options = [{"label": b, "value": b} for b in collection.get_benchmark_names()]
+ metric_options = [{"label": m.display_name, "value": m.name} for m in collection.get_common_metrics()]
+ run_ids = list((run_data or {}).keys())
+ run_options = [{"label": r, "value": r} for r in run_ids]
+ return benchmark_options, metric_options, run_options, run_ids
+
+
+@callback(
+ Output("bar-run-selector-row", "style"),
+ Input("chart-type-selector", "value"),
+)
+def toggle_run_selector(chart_type):
+ if chart_type == "bar":
+ return {"flexShrink": "0"}
+ return {"display": "none", "flexShrink": "0"}
+
+
@callback(
Output("trend-plot", "figure"),
Input("benchmark-selector", "value"),
Input("metric-selector", "value"),
Input("date-filter-store", "data"),
+ Input("chart-type-selector", "value"),
+ Input("bar-run-selector", "value"),
+ Input("run-data-store", "data"),
)
-def update_trend_plot(selected_benchmarks, selected_metric_name, date_filter):
- if not selected_benchmarks or not selected_metric_name:
- return {"layout": {"title": "Select one or more benchmarks and a metric to view trends"}}
+def update_trend_plot(
+ selected_benchmarks, selected_metric_names, date_filter, chart_type, selected_runs, run_data
+):
+ if not selected_benchmarks or not selected_metric_names:
+ return {"layout": {"title": "Select one or more benchmarks and metrics to view trends"}}
+
+ if isinstance(selected_metric_names, str):
+ selected_metric_names = [selected_metric_names]
+
+ collection = get_collection(run_data)
+
+ # Build (metric, series, scale, unit) tuples, skipping metrics with no data
+ metrics_data = []
+ for metric_name in selected_metric_names:
+ metric = registry.get(metric_name)
+ if not metric:
+ continue
+ series = {
+ b: _apply_date_filter(collection.get_metric_series(b, metric), date_filter)
+ for b in selected_benchmarks
+ }
+ series = {b: df for b, df in series.items() if not df.empty}
+ if not series:
+ continue
+ all_values = pd.concat([df["value"] for df in series.values()])
+ scale, unit = metric.get_plot_scale_and_unit(all_values)
+ metrics_data.append((metric, series, scale, unit))
- metric = registry.get(selected_metric_name)
- if not metric:
- return {"layout": {"title": f"Metric '{selected_metric_name}' not found"}}
+ if not metrics_data:
+ return {"layout": {"title": "No data available for the selected benchmarks and metrics"}}
+ # Assign y-axes: group metrics by unit, up to 2 axes (left/right)
+ unit_to_axis: dict[str, int] = {}
+ metric_axis: dict[str, int] = {}
+ for metric, series, scale, unit in metrics_data:
+ if unit not in unit_to_axis:
+ unit_to_axis[unit] = min(len(unit_to_axis) + 1, 2)
+ metric_axis[metric.name] = unit_to_axis[unit]
+
+ axis_labels: dict[int, list[str]] = {}
+ for metric, _, _, unit in metrics_data:
+ ax = metric_axis[metric.name]
+ label = metric.display_name + (f" ({unit})" if unit else "")
+ if ax not in axis_labels:
+ axis_labels[ax] = []
+ if label not in axis_labels[ax]:
+ axis_labels[ax].append(label)
+
+ multi_metric = len(metrics_data) > 1
fig = go.Figure()
- series = {
- b: _apply_date_filter(BENCHMARK_COLLECTION.get_metric_series(b, metric), date_filter)
- for b in selected_benchmarks
+ for metric, series, scale, unit in metrics_data:
+ # Bar charts don't support multiple y-axes with grouped bars — always use y1
+ ax = metric_axis[metric.name] if chart_type == "line" else 1
+ yaxis_ref = "y" if ax == 1 else "y2"
+
+ if chart_type == "bar":
+ traces = _make_bar_traces(series, scale, selected_runs, metric, multi_metric, yaxis_ref)
+ else:
+ traces = _make_line_traces(
+ series, scale, metric, date_filter, multi_metric, yaxis_ref, collection
+ )
+
+ for trace in traces:
+ fig.add_trace(trace)
+
+ metric_names_str = " / ".join(m.display_name for m, *_ in metrics_data)
+ title_prefix = "Comparison" if chart_type == "bar" else "Trends"
+
+ layout_kwargs = {
+ "title": f"{title_prefix}: {metric_names_str}",
+ "xaxis_title": "Run",
+ "yaxis": {"title": " / ".join(axis_labels.get(1, []))},
+ "legend": {"orientation": "h", "yanchor": "top", "y": -0.1, "xanchor": "center", "x": 0.5},
}
- series = {b: df for b, df in series.items() if not df.empty}
- if not series:
- return {"layout": {"title": "No data available for the selected benchmarks and metric"}}
+ if chart_type == "line" and 2 in axis_labels:
+ layout_kwargs["yaxis2"] = {
+ "title": " / ".join(axis_labels[2]),
+ "overlaying": "y",
+ "side": "right",
+ }
+
+ if chart_type == "line":
+ layout_kwargs["hovermode"] = "x unified"
+ if chart_type == "bar":
+ layout_kwargs["barmode"] = "group"
+
+ fig.update_layout(**layout_kwargs)
+ return fig
- all_values = pd.concat([df["value"] for df in series.values()])
- scale, plot_unit = metric.get_plot_scale_and_unit(all_values)
+def _make_line_traces(series, scale, metric, date_filter, multi_metric, yaxis_ref, collection):
+ traces = []
for benchmark, df in series.items():
+ name = f"{benchmark} ({metric.display_name})" if multi_metric else benchmark
trace_kwargs = {
"x": df["timestamp"],
"y": df["value"] / scale,
"mode": "lines+markers",
- "name": benchmark,
+ "name": name,
+ "yaxis": yaxis_ref,
}
error_bar_config = metric.get_error_bar_config()
if error_bar_config:
error_bar_metric = error_bar_config["metric"]
error_df = _apply_date_filter(
- BENCHMARK_COLLECTION.get_metric_series(benchmark, error_bar_metric), date_filter
+ collection.get_metric_series(benchmark, error_bar_metric), date_filter
)
if not error_df.empty:
merged = df.merge(error_df, on=["run_id", "timestamp"], suffixes=("", "_error"))
if "value_error" in merged.columns:
- trace_kwargs["error_y"] = dict(type="data", array=merged["value_error"] / scale, visible=True)
+ trace_kwargs["error_y"] = dict(
+ type="data", array=merged["value_error"] / scale, visible=True
+ )
trace_kwargs["x"] = merged["timestamp"]
trace_kwargs["y"] = merged["value"] / scale
- fig.add_trace(go.Scatter(**trace_kwargs))
+ traces.append(go.Scatter(**trace_kwargs))
+ return traces
- y_axis_label = metric.display_name
- if plot_unit:
- y_axis_label += f" ({plot_unit})"
- fig.update_layout(
- xaxis_title="Run",
- yaxis_title=y_axis_label,
- hovermode="x unified",
- title=f"Trends: {metric.display_name}",
- legend={"orientation": "h", "yanchor": "top", "y": -0.1, "xanchor": "center", "x": 0.5},
- )
- return fig
+def _make_bar_traces(series, scale, selected_runs, metric, multi_metric, yaxis_ref):
+ run_ids_in_data = set()
+ for df in series.values():
+ run_ids_in_data.update(df["run_id"].tolist())
+
+ if selected_runs:
+ run_ids = [r for r in selected_runs if r in run_ids_in_data]
+ else:
+ run_ids = sorted(run_ids_in_data)
+
+ if not run_ids:
+ return []
+
+ traces = []
+ for benchmark, df in series.items():
+ name = f"{benchmark} ({metric.display_name})" if multi_metric else benchmark
+ df_filtered = df[df["run_id"].isin(run_ids)]
+ df_filtered = df_filtered.set_index("run_id").reindex(run_ids).reset_index()
+ traces.append(
+ go.Bar(
+ x=df_filtered["run_id"],
+ y=df_filtered["value"] / scale,
+ name=name,
+ yaxis=yaxis_ref,
+ )
+ )
+ return traces
diff --git a/src/lbench/dashboard/metrics/benchmark_collection.py b/src/lbench/dashboard/metrics/benchmark_collection.py
index 1846add..38c3130 100644
--- a/src/lbench/dashboard/metrics/benchmark_collection.py
+++ b/src/lbench/dashboard/metrics/benchmark_collection.py
@@ -64,12 +64,7 @@ def __init__(self, run_data: dict, registry: MetricRegistry):
if not bm_name:
continue
- run = BenchmarkRun(
- name=bm_name,
- run_id=run_id,
- timestamp=timestamp,
- raw_data=bm_data
- )
+ run = BenchmarkRun(name=bm_name, run_id=run_id, timestamp=timestamp, raw_data=bm_data)
self.runs.append(run)
@@ -110,11 +105,13 @@ def get_metric_series(self, benchmark: str, metric: Metric) -> pd.DataFrame:
for run in runs:
value = run.get_metric_value(metric)
if value is not None: # Only include runs where metric is available
- data.append({
- "run_id": run.run_id,
- "timestamp": run.timestamp,
- "value": value,
- })
+ data.append(
+ {
+ "run_id": run.run_id,
+ "timestamp": run.timestamp,
+ "value": value,
+ }
+ )
return pd.DataFrame(data)
diff --git a/src/lbench/dashboard/metrics/groups/dask_group.py b/src/lbench/dashboard/metrics/groups/dask_group.py
index 9eae655..5ed0398 100644
--- a/src/lbench/dashboard/metrics/groups/dask_group.py
+++ b/src/lbench/dashboard/metrics/groups/dask_group.py
@@ -1,6 +1,7 @@
from typing import Optional
from lbench.dashboard.metrics import Metric, DurationMetric, MemoryMetric
+from lbench.dashboard.metrics.groups.execution_group import CountMetric
from lbench.dashboard.metrics.metric_group import MetricGroup
@@ -15,7 +16,7 @@ def get_dask_stats(self, benchmark_data: dict) -> Optional[dict]:
return None
-class DaskTaskCount(DaskMetric):
+class DaskTaskCount(DaskMetric, CountMetric):
"""Number of Dask tasks."""
def __init__(self):
@@ -30,11 +31,6 @@ def extract(self, benchmark_data: dict) -> Optional[float]:
pass
return None
- def format_value(self, value: Optional[float]) -> str:
- if value is None:
- return "-"
- return str(int(value))
-
class DaskTotalTime(DaskMetric, DurationMetric):
"""Total Dask execution time."""
@@ -47,10 +43,7 @@ def extract(self, benchmark_data: dict) -> Optional[float]:
if dask_stats:
try:
startstops = dask_stats.get("startstops", [])
- times = [
- sum([k["stop"] - k["start"] for k in s])
- for s in startstops
- ]
+ times = [sum([k["stop"] - k["start"] for k in s]) for s in startstops]
return sum(times) if times else None
except (TypeError, ValueError, KeyError):
pass
@@ -73,7 +66,7 @@ def extract(self, benchmark_data: dict) -> Optional[float]:
return None
-class DaskGraphLength(DaskMetric):
+class DaskGraphLength(DaskMetric, CountMetric):
"""Size of dask graph"""
def __init__(self):
@@ -135,29 +128,27 @@ def render_card(self, benchmark_data: dict, run_name: str) -> Optional[any]:
if keys:
from lbench.dashboard.utils import format_duration
- times = [
- sum([k["stop"] - k["start"] for k in s])
- for s in dask_stats.get("startstops", [])
- ]
+ times = [sum([k["stop"] - k["start"] for k in s]) for s in dask_stats.get("startstops", [])]
total_time_by_key = {}
for k, t in zip(keys, times):
total_time_by_key[k] = total_time_by_key.get(k, 0) + t
- sorted_key_times = sorted(
- total_time_by_key.items(), key=lambda x: x[1], reverse=True
- )
+ sorted_key_times = sorted(total_time_by_key.items(), key=lambda x: x[1], reverse=True)
formatted_times = [format_duration(t) for _, t in sorted_key_times]
- task_table = pd.DataFrame({
- "task_key": [k for k, _ in sorted_key_times],
- "total time": [f"{v} {u}" for v, u in formatted_times],
- })
+ task_table = pd.DataFrame(
+ {
+ "task_key": [k for k, _ in sorted_key_times],
+ "total time": [f"{v} {u}" for v, u in formatted_times],
+ }
+ )
components.append(html.H5("Dask Task Times", className="card-title mt-3"))
components.append(
- dbc.Table.from_dataframe(task_table, striped=True, bordered=True, hover=True))
+ dbc.Table.from_dataframe(task_table, striped=True, bordered=True, hover=True)
+ )
return dbc.CardBody(components) if components else None
@@ -174,13 +165,15 @@ def get_action_buttons(self, benchmark_data: dict, run_name: str):
report_path = dask_stats.get("performance_report")
if report_path:
report_name = Path(report_path).name
- buttons.append(html.A(
- "Open Dask Performance Report",
- href=f"/file/{run_name}/{report_name}",
- target="_blank",
- className="btn btn-outline-primary mt-2",
- role="button",
- ))
+ buttons.append(
+ html.A(
+ "Open Dask Performance Report",
+ href=f"/file/{run_name}/{report_name}",
+ target="_blank",
+ className="btn btn-outline-primary mt-2",
+ role="button",
+ )
+ )
return buttons
@@ -188,5 +181,5 @@ def get_action_buttons(self, benchmark_data: dict, run_name: str):
dask_group = DaskGroup(
"dask",
"Dask Metrics",
- [DaskTaskCount(), DaskTotalTime(), DaskPeakMemory(), DaskGraphLength(), DaskGraphSize()]
+ [DaskTaskCount(), DaskTotalTime(), DaskPeakMemory(), DaskGraphLength(), DaskGraphSize()],
)
diff --git a/src/lbench/dashboard/metrics/groups/execution_group.py b/src/lbench/dashboard/metrics/groups/execution_group.py
index d85780c..70438c4 100644
--- a/src/lbench/dashboard/metrics/groups/execution_group.py
+++ b/src/lbench/dashboard/metrics/groups/execution_group.py
@@ -38,6 +38,7 @@ def extract(self, benchmark_data: dict) -> Optional[float]:
except (TypeError, ValueError):
return None
+
execution_group = MetricGroup(
"execution",
"Execution Info",
@@ -45,5 +46,5 @@ def extract(self, benchmark_data: dict) -> Optional[float]:
CountMetric("rounds", "Rounds"),
CountMetric("iterations", "Iterations"),
PeakMemory(),
- ]
+ ],
)
diff --git a/src/lbench/dashboard/metrics/groups/profiling_group.py b/src/lbench/dashboard/metrics/groups/profiling_group.py
index d4f1c03..d14f3fa 100644
--- a/src/lbench/dashboard/metrics/groups/profiling_group.py
+++ b/src/lbench/dashboard/metrics/groups/profiling_group.py
@@ -32,13 +32,15 @@ def get_action_buttons(self, benchmark_data: dict, run_name: str) -> List[Any]:
if "cprofile_path" in extra_info:
profile_path = extra_info["cprofile_path"]
profile_name = Path(profile_path).name
- buttons.append(html.A(
- "Open Flamegraph",
- href=f"/flamegraph/{run_name}/{profile_name}",
- target="_blank",
- className="btn btn-outline-secondary mt-2",
- role="button",
- ))
+ buttons.append(
+ html.A(
+ "Open Flamegraph",
+ href=f"/flamegraph/{run_name}/{profile_name}",
+ target="_blank",
+ className="btn btn-outline-secondary mt-2",
+ role="button",
+ )
+ )
return buttons
diff --git a/src/lbench/dashboard/metrics/groups/stats_group.py b/src/lbench/dashboard/metrics/groups/stats_group.py
index 0358d8d..012fa26 100644
--- a/src/lbench/dashboard/metrics/groups/stats_group.py
+++ b/src/lbench/dashboard/metrics/groups/stats_group.py
@@ -7,8 +7,9 @@
class StatsMetric(DurationMetric):
"""Base class for metrics from the 'stats' section with time formatting."""
- def __init__(self, name: str, display_name: str, stats_key: str = None,
- error_bar_metric: "StatsMetric" = None):
+ def __init__(
+ self, name: str, display_name: str, stats_key: str = None, error_bar_metric: "StatsMetric" = None
+ ):
super().__init__(name, display_name)
self.stats_key = stats_key or name
self._error_bar_metric = error_bar_metric
@@ -23,10 +24,7 @@ def extract(self, benchmark_data: dict) -> Optional[float]:
def get_error_bar_config(self) -> Optional[Dict[str, Any]]:
"""Return error bar configuration if this metric has one."""
if self._error_bar_metric:
- return {
- "metric": self._error_bar_metric,
- "type": "symmetric"
- }
+ return {"metric": self._error_bar_metric, "type": "symmetric"}
return None
@@ -44,5 +42,5 @@ def get_error_bar_config(self) -> Optional[Dict[str, Any]]:
stats_group = MetricGroup(
"stats",
"Performance Statistics",
- [min_metric, max_metric, mean_metric, median_metric, stddev_metric, iqr_metric, q1_metric, q3_metric]
+ [min_metric, max_metric, mean_metric, median_metric, stddev_metric, iqr_metric, q1_metric, q3_metric],
)
diff --git a/src/lbench/dashboard/metrics/registry.py b/src/lbench/dashboard/metrics/registry.py
index f1c60ee..cec001c 100644
--- a/src/lbench/dashboard/metrics/registry.py
+++ b/src/lbench/dashboard/metrics/registry.py
@@ -3,6 +3,7 @@
from lbench.dashboard.metrics.metric import Metric
from lbench.dashboard.metrics.metric_group import MetricGroup
+
class MetricRegistry:
"""Registry for managing available metrics and groups.
@@ -77,4 +78,3 @@ def get_available_groups(self, benchmark_data: dict) -> List[MetricGroup]:
List of available groups
"""
return [g for g in self._groups.values() if g.is_available(benchmark_data)]
-
diff --git a/src/lbench/dashboard/utils.py b/src/lbench/dashboard/utils.py
index fd8df8b..bb15396 100644
--- a/src/lbench/dashboard/utils.py
+++ b/src/lbench/dashboard/utils.py
@@ -20,6 +20,7 @@ def format_memory(bytes_value, digits=2):
return f"{num_bytes:.{digits}f}", units[-1]
+
def format_duration(seconds, digits=3):
"""
Format a duration in seconds using the most appropriate unit.
diff --git a/src/lbench/notebook/__init__.py b/src/lbench/notebook/__init__.py
new file mode 100644
index 0000000..f357dbe
--- /dev/null
+++ b/src/lbench/notebook/__init__.py
@@ -0,0 +1,7 @@
+"""Jupyter integration for lbench. Load with: %load_ext lbench.notebook"""
+
+from lbench.notebook.magic import LbenchMagics
+
+
+def load_ipython_extension(ip):
+ ip.register_magics(LbenchMagics)
diff --git a/src/lbench/notebook/magic.py b/src/lbench/notebook/magic.py
new file mode 100644
index 0000000..d89ae0d
--- /dev/null
+++ b/src/lbench/notebook/magic.py
@@ -0,0 +1,194 @@
+"""
+Jupyter cell magic for lbench benchmarks.
+
+Usage
+-----
+Load the extension once per notebook::
+
+ %load_ext lbench.notebook
+
+Then use the cell magic (similar to %%timeit)::
+
+ %%lbench
+ my_expensive_function()
+
+Options::
+
+ %%lbench --rounds 10 --warmup --memory --profile --dask --name my_bench
+ my_dask_function()
+
+ # Also capture Dask graph stats from a collection variable:
+ %%lbench --dask --collection my_df
+ my_df.compute()
+
+Options
+-------
+--rounds / -r Number of timed rounds (default: 5)
+--warmup / -w Run one un-timed warmup round first
+--memory / -m Track peak memory with memray
+--profile / -p Capture a cProfile .prof file
+--dask / -d Collect Dask metrics (task stream, memory, performance report)
+--collection VAR Name of a Dask collection variable; also records graph size/length
+--name / -n Name for this benchmark entry (default: auto-generated)
+"""
+
+from __future__ import annotations
+
+import sys
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+
+from IPython.core.magic import Magics, cell_magic, magics_class
+from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring
+
+from lbench.cli.env import get_lbench_root_dir
+from lbench.runner import (
+ make_benchmark_entry,
+ run_cprofile,
+ run_dask_benchmark,
+ run_memray,
+ time_function,
+ write_benchmark_json,
+)
+
+# -- session state -----------------------------------------------------------
+
+_run_dir: Optional[Path] = None
+
+
+def _get_run_dir() -> Path:
+ global _run_dir
+ if _run_dir is None:
+ root = get_lbench_root_dir()
+ run_id = datetime.now().strftime("%Y%m%d-%H%M%S")
+ _run_dir = root / run_id
+ _run_dir.mkdir(parents=True, exist_ok=True)
+ return _run_dir
+
+
+def reset_session():
+ """Start a fresh run directory for this notebook session."""
+ global _run_dir
+ _run_dir = None
+
+
+# -- display helpers ---------------------------------------------------------
+
+
+def _fmt_time(seconds: float) -> str:
+ if seconds >= 1:
+ return f"{seconds:.3f} s"
+ if seconds >= 1e-3:
+ return f"{seconds * 1e3:.3f} ms"
+ if seconds >= 1e-6:
+ return f"{seconds * 1e6:.3f} µs"
+ return f"{seconds * 1e9:.3f} ns"
+
+
+def _fmt_memory(nbytes: int) -> str:
+ for unit, scale in [("GiB", 2**30), ("MiB", 2**20), ("KiB", 2**10)]:
+ if nbytes >= scale:
+ return f"{nbytes / scale:.2f} {unit}"
+ return f"{nbytes} B"
+
+
+# -- magic class -------------------------------------------------------------
+
+
+@magics_class
+class LbenchMagics(Magics):
+ """Provides the %%lbench cell magic."""
+
+ @cell_magic
+ @magic_arguments()
+ @argument("--rounds", "-r", type=int, default=5, help="Number of timed rounds (default: 5)")
+ @argument("--warmup", "-w", action="store_true", help="Run one un-timed warmup round before measuring")
+ @argument("--memory", "-m", action="store_true", help="Track peak memory usage with memray")
+ @argument("--profile", "-p", action="store_true", help="Capture a cProfile .prof file")
+ @argument(
+ "--dask",
+ "-d",
+ action="store_true",
+ help="Collect Dask metrics (task stream, memory sampler, performance report)",
+ )
+ @argument(
+ "--collection",
+ type=str,
+ default=None,
+ metavar="VAR",
+ help="Name of a Dask collection variable; also records graph size and length",
+ )
+ @argument("--name", "-n", type=str, default=None, help="Name for this benchmark entry")
+ def lbench(self, line: str, cell: str):
+ """Benchmark a cell's code and save results to a lbench-compatible JSON log."""
+ args = parse_argstring(self.lbench, line)
+
+ ip = self.shell
+ ns = ip.user_ns
+
+ name = args.name or f"cell_{datetime.now().strftime('%H%M%S')}"
+ fullname = f"notebook::{name}"
+ run_dir = _get_run_dir()
+
+ code = compile(cell, f"", "exec")
+
+ def run_cell():
+ exec(code, ns) # noqa: S102 – intentional notebook execution
+
+ # --- time -----------------------------------------------------------
+ data = time_function(run_cell, rounds=args.rounds, warmup=args.warmup)
+
+ # --- optional profiling ---------------------------------------------
+ extra_info: dict = {}
+
+ if args.profile:
+ extra_info["cprofile_path"] = run_cprofile(run_cell, run_dir)
+
+ if args.memory:
+ extra_info["peak_memory_bytes"] = run_memray(run_cell, run_dir)
+
+ # --- optional dask metrics ------------------------------------------
+ if args.dask:
+ dask_info = run_dask_benchmark(run_cell, run_dir)
+
+ if args.collection:
+ collection = ns.get(args.collection)
+ if collection is None:
+ raise NameError(f"--collection: variable {args.collection!r} not found in namespace")
+ graph = collection.dask
+ dask_info["dask_graph_len"] = len(graph)
+ dask_info["dask_graph_size_bytes"] = sum(sys.getsizeof(graph[k]) for k in graph)
+
+ extra_info["dask"] = dask_info
+
+ # --- build entry & write JSON ---------------------------------------
+ entry = make_benchmark_entry(
+ name=name,
+ fullname=fullname,
+ data=data,
+ extra_info=extra_info,
+ )
+ json_path = write_benchmark_json(run_dir, [entry])
+
+ # --- display --------------------------------------------------------
+ stats = entry["stats"]
+ print(
+ f"{stats['rounds']} rounds "
+ f"mean: {_fmt_time(stats['mean'])} ± {_fmt_time(stats['stddev'])} "
+ f"(min: {_fmt_time(stats['min'])}, max: {_fmt_time(stats['max'])})"
+ )
+ if "peak_memory_bytes" in extra_info:
+ print(f"peak memory: {_fmt_memory(extra_info['peak_memory_bytes'])}")
+ if "dask" in extra_info:
+ d = extra_info["dask"]
+ print(f"dask tasks: {d.get('n_tasks', '?')}", end="")
+ if "peak_memory_bytes" in d:
+ print(f" peak memory: {_fmt_memory(d['peak_memory_bytes'])}", end="")
+ if "dask_graph_len" in d:
+ print(f" graph nodes: {d['dask_graph_len']}", end="")
+ print()
+ print(f"perf report: {d['performance_report']}")
+ if "cprofile_path" in extra_info:
+ print(f"cProfile: {extra_info['cprofile_path']}")
+ print(f"log: {json_path}")
diff --git a/src/lbench/pytest/fixtures.py b/src/lbench/pytest/fixtures.py
index 630da67..5cd09f9 100644
--- a/src/lbench/pytest/fixtures.py
+++ b/src/lbench/pytest/fixtures.py
@@ -1,6 +1,8 @@
from pathlib import Path
import sys
+from dask.sizeof import sizeof
+
import pytest
from pytest import fixture
from distributed import Client
@@ -52,15 +54,16 @@ def dask_benchmark_func(func, *args, **kwargs):
@fixture
def lbench_dask_collection(lbench_dask, benchmark):
- def collection_benchmark_func(collection):
+ def collection_benchmark_func(collection, measure_memory=True):
run_func = lambda: collection.compute()
graph = collection.dask
graph_len = len(graph)
- graph_size = sum(sys.getsizeof(graph[key]) for key in graph.keys())
lbench_dask(run_func)
benchmark.extra_info["dask"]["dask_graph_len"] = graph_len
- benchmark.extra_info["dask"]["dask_graph_size_bytes"] = graph_size
+ if measure_memory:
+ size = sizeof(graph)
+ benchmark.extra_info["dask"]["dask_graph_size_bytes"] = size
return collection_benchmark_func
diff --git a/src/lbench/pytest/plugin.py b/src/lbench/pytest/plugin.py
index 72f6e38..fd8c7c4 100644
--- a/src/lbench/pytest/plugin.py
+++ b/src/lbench/pytest/plugin.py
@@ -39,7 +39,7 @@ def pytest_configure(config: pytest.Config):
# configure pytest-benchmark
config.option.benchmark_only = True
- config.option.benchmark_json = (run_dir / "pytest-benchmark.json").open("wb") # kinda hacky
+ config.option.benchmark_json = (run_dir / "pytest-benchmark.json").open("wb") # kinda hacky
terminal = config.pluginmanager.get_plugin("terminalreporter")
if terminal:
diff --git a/src/lbench/runner.py b/src/lbench/runner.py
index 2b9b3db..abf796a 100644
--- a/src/lbench/runner.py
+++ b/src/lbench/runner.py
@@ -38,6 +38,7 @@ def run_dask_benchmark(
if client is None:
from distributed import get_client
+
client = get_client()
report_path = run_dir / f"dask_performance_report_{uuid.uuid4()}.html"
@@ -105,9 +106,7 @@ def compute_stats(data: list) -> dict:
q1, q3 = min_val, max_val
iqr = q3 - q1
- iqr_outliers = sum(
- 1 for x in data if x < q1 - 1.5 * iqr or x > q3 + 1.5 * iqr
- )
+ iqr_outliers = sum(1 for x in data if x < q1 - 1.5 * iqr or x > q3 + 1.5 * iqr)
stddev_outliers = sum(1 for x in data if abs(x - mean) > stddev)
return {
@@ -139,9 +138,7 @@ def make_benchmark_entry(
params: Optional[dict] = None,
) -> dict:
"""Build a benchmark entry dict in pytest-benchmark JSON format."""
- param_str = (
- "-".join(str(v) for v in params.values()) if params else None
- )
+ param_str = "-".join(str(v) for v in params.values()) if params else None
return {
"group": group,
"name": name,
@@ -185,18 +182,20 @@ def get_machine_info() -> dict:
def get_commit_info() -> dict:
try:
- git_id = subprocess.check_output(
- ["git", "rev-parse", "HEAD"], stderr=subprocess.DEVNULL
- ).decode().strip()
- git_time = subprocess.check_output(
- ["git", "log", "-1", "--format=%cI"], stderr=subprocess.DEVNULL
- ).decode().strip()
- git_branch = subprocess.check_output(
- ["git", "rev-parse", "--abbrev-ref", "HEAD"], stderr=subprocess.DEVNULL
- ).decode().strip()
- dirty = (
- subprocess.call(["git", "diff", "--quiet"], stderr=subprocess.DEVNULL) != 0
+ git_id = (
+ subprocess.check_output(["git", "rev-parse", "HEAD"], stderr=subprocess.DEVNULL).decode().strip()
+ )
+ git_time = (
+ subprocess.check_output(["git", "log", "-1", "--format=%cI"], stderr=subprocess.DEVNULL)
+ .decode()
+ .strip()
+ )
+ git_branch = (
+ subprocess.check_output(["git", "rev-parse", "--abbrev-ref", "HEAD"], stderr=subprocess.DEVNULL)
+ .decode()
+ .strip()
)
+ dirty = subprocess.call(["git", "diff", "--quiet"], stderr=subprocess.DEVNULL) != 0
return {
"id": git_id,
"time": git_time,
diff --git a/tests/lbench/conftest.py b/tests/lbench/conftest.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/lbench/test_packaging.py b/tests/lbench/test_packaging.py
new file mode 100644
index 0000000..42d6a3b
--- /dev/null
+++ b/tests/lbench/test_packaging.py
@@ -0,0 +1,6 @@
+import lbench
+
+
+def test_version():
+ """Check to see that we can get the package version"""
+ assert lbench.__version__ is not None