diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 84d0e17..9a1bf0b 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -20,9 +20,10 @@ jobs: steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v5 - - run: pip install . - - run: pip install pylint - - run: pylint --unsafe-load-any-extension=y --disable=fixme $(git ls-files '*.py') + - run: | + pip install . + pip install pylint + pylint --unsafe-load-any-extension=y --disable=fixme $(git ls-files '*.py') precommit: runs-on: ubuntu-latest diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8ddf124..c49176f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,10 +13,11 @@ repos: - id: end-of-file-fixer - id: debug-statements -# - repo: https://github.com/christopher-hacker/enforce-notebook-run-order -# rev: 2.1.1 -# hooks: -# - id: enforce-notebook-run-order + - repo: https://github.com/christopher-hacker/enforce-notebook-run-order + rev: 2.1.1 + hooks: + - id: enforce-notebook-run-order + exclude: tests/examples/bad.ipynb - repo: local hooks: @@ -29,9 +30,9 @@ repos: types: [jupyter] exclude: tests/examples/bad.ipynb - - id: check-badges - name: check badges - entry: check_badges + - id: check-notebook-open-atmos-structure + name: check notebook has open-atmos structure + entry: python -m hooks.check_notebook_open_atmos_structure additional_dependencies: - nbformat - pytest diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 29786dc..00b401f 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -6,10 +6,10 @@ stages: [pre-commit] types: [jupyter] -- id: check-badges - name: check badges - description: check badges in Jupyter Notebook - entry: check_badges +- id: check-notebook-open-atmos-structure + name: check notebook has open-atmos structure + entry: check_notebook_open_atmos_structure + description: check notebook has open-atmos structure language: python stages: [pre-commit] types: [jupyter] diff --git a/README.md b/README.md index 5a00378..ebf7b00 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,74 @@ # devops_tests pytest test routines asserting for GitHub issue-linked TO-DO labelling in the code, README link consistency and some Jupyter notebook sanity checks + +## Pre-commit hooks: +### `check_notebook_open_atmos_structure` +Check Jupyter notebooks structure default for `open-atmos` projects. +Required: +- three badges in the first cell: + +[![preview notebook](https://img.shields.io/static/v1?label=render%20on&logo=github&color=87ce3e&message=GitHub)](https://github.com/open-atmos/devops_tests/blob/main/tests/examples/good.ipynb) +[![launch on mybinder.org](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/open-atmos/devops_tests.git/main?urlpath=lab/tree/tests/examples/good.ipynb) +[![launch on Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/open-atmos/devops_tests/blob/main/tests/examples/good.ipynb) + +- markdown type of the second cell +- Colab-header in the third cell +``` +import os, sys +os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' # PySDM & PyMPDATA don't work with TBB; OpenMP has extra dependencies on macOS +if 'google.colab' in sys.modules: + !pip --quiet install open-atmos-jupyter-utils + from open_atmos_jupyter_utils import pip_install_on_colab + pip_install_on_colab('devops_tests-examples', 'devops_tests') +``` + +## 0xx -> Execution Problems + +These errors relate to notebook execution state and structural integrity. + +- **NB000** – Missing execution count +- **NB001** – Empty execution +- **NB002** – Cell contains execution error +- **NB003** – Insufficient number of cells (minimum required not met) +- **NB004** – Invalid or missing required content in first cell + +--- + +## 1xx -> Badge Problems + +These errors relate to required badges in the first notebook cell. + +- **NB100** - Incorrect GitHub preview badge +- **NB101** – Incorrect MyBinder badge +- **NB102** – Incorrect Colab badge + +--- + +## 2xx -> Markdown Problems + +These errors relate to required markdown structure in the second notebook cell. + +- **NB200** – Required markdown cell missing +- **NB201** – Invalid markdown structure +- **NB202** – Required markdown section missing + +--- + +## 3xx -> Colab Header Problems + +These errors relate to the required Colab setup header. + +- **NB300** – Header missing +- **NB301** – Header version mismatch +- **NB302** – Invalid header content +- **NB303** – Header in incorrect position + +--- + +## 4xx -> `open-atmos-jupyter-utils` Problems + +These errors relate to the usage of `show_anim()` and `show_plot()` +functions from `open-atmos-jupyter-utils` package required when matplotlib is used. + +- **NB400** - show_plot not used +- **NB401** - show_anim not used diff --git a/hooks/check_badges.py b/hooks/check_badges.py deleted file mode 100755 index 7cc398c..0000000 --- a/hooks/check_badges.py +++ /dev/null @@ -1,262 +0,0 @@ -#!/usr/bin/env python3 -# pylint: disable=missing-function-docstring -""" -Checks/repairs notebook badge headers. - -This version optionally uses Git to discover the repository root (to build -repo-relative notebook URLs) and uses Python's logging module instead of print() -for structured messages. - -Behavior: -- By default it will attempt to detect the git repo root (if GitPython is - installed and the file is in a git working tree) and fall back to Path.cwd(). -- Use --no-git to force using Path.cwd(). -- Use --repo-root PATH to explicitly set repository root. -- Use --verbose to enable debug logging. - -Usage: - check_badges --repo-name=devops_tests [--repo-owner=open-atmos] [--fix-header] - [--no-git] [--repo-root PATH] [--verbose] FILES... -""" - -from __future__ import annotations - -import argparse -import logging -from collections.abc import Sequence -from pathlib import Path -from typing import Iterable, List, Tuple, Optional - -import nbformat -from nbformat import NotebookNode - -from .utils import NotebookTestError - -REPO_OWNER_DEFAULT = "open-atmos" - - -logger = logging.getLogger(__name__) - - -def relative_path(absolute_path, repo_root): - """returns a path relative to the repo base (converting backslashes to slashes on Windows)""" - absolute_path = Path(absolute_path).resolve() - repo_root = Path(repo_root).resolve() - - try: - relpath = absolute_path.relative_to(repo_root) - except ValueError as exc: - raise ValueError( - f"{absolute_path} is not inside repo root {repo_root}" - ) from exc - - return relpath.as_posix() - - -def preview_badge_markdown(relpath: str, repo_name: str, repo_owner: str) -> str: - svg_badge_url = ( - "https://img.shields.io/static/v1?" - + "label=render%20on&logo=github&color=87ce3e&message=GitHub" - ) - link = f"https://github.com/{repo_owner}/{repo_name}/blob/main/{relpath}" - return f"[![preview notebook]({svg_badge_url})]({link})" - - -def mybinder_badge_markdown(relpath: str, repo_name: str, repo_owner: str) -> str: - svg_badge_url = "https://mybinder.org/badge_logo.svg" - link = ( - f"https://mybinder.org/v2/gh/{repo_owner}/{repo_name}.git/main?urlpath=lab/tree/" - + f"{relpath}" - ) - return f"[![launch on mybinder.org]({svg_badge_url})]({link})" - - -def colab_badge_markdown(relpath: str, repo_name: str, repo_owner: str) -> str: - svg_badge_url = "https://colab.research.google.com/assets/colab-badge.svg" - link = ( - f"https://colab.research.google.com/github/{repo_owner}/{repo_name}/blob/main/" - + f"{relpath}" - ) - return f"[![launch on Colab]({svg_badge_url})]({link})" - - -def find_repo_root(start_path: Path, prefer_git: bool = True) -> Path: - """ - Find repository root for the given start_path. - - If prefer_git is True, attempt to use GitPython to locate the repository root - (searching parent directories). If that fails, fall back to cwd(). - """ - if prefer_git: - try: - # Import locally so the module doesn't hard-depend on GitPython at import time - from git import Repo # pylint: disable=import-outside-toplevel - - try: - repo = Repo(start_path, search_parent_directories=True) - if repo.working_tree_dir: - root = Path(repo.working_tree_dir) - logger.debug("Discovered git repository root: %s", root) - return root - except Exception as exc: # pylint: disable=broad-exception-caught - logger.debug("Git repo detection failed for %s: %s", start_path, exc) - except ImportError as exc: - logger.debug("GitPython not available or import failed: %s", exc) - - cwd = Path.cwd() - logger.debug("Using current working directory as repo root: %s", cwd) - return cwd - - -def expected_badges_for( - notebook_path: Path, - repo_name: str, - repo_owner: str, - repo_root: Optional[Path] = None, -) -> List[str]: - """ - Return the canonical badge lines expected for notebook_path. - If repo_root is provided, attempt to build a relative path from it; otherwise - find repository root automatically (using find_repo_root). - """ - if repo_root is None: - repo_root = find_repo_root(notebook_path) - - if repo_root is None: - raise ValueError("Could not determine repo root") - - relpath = relative_path(notebook_path, repo_root) - return [ - preview_badge_markdown(relpath, repo_name, repo_owner), - mybinder_badge_markdown(relpath, repo_name, repo_owner), - colab_badge_markdown(relpath, repo_name, repo_owner), - ] - - -def read_notebook(path: Path) -> NotebookNode: - with path.open(encoding="utf8") as fp: - return nbformat.read(fp, nbformat.NO_CONVERT) - - -def write_notebook(path: Path, nb: NotebookNode) -> None: - with path.open("w", encoding="utf8") as fp: - nbformat.write(nb, fp) - - -def first_cell_lines(nb: NotebookNode) -> List[str]: - """Return list of stripped lines from the first cell if it's markdown, else []""" - if not nb.cells: - return [] - first = nb.cells[0] - if first.cell_type != "markdown": - return [] - return [ln.strip() for ln in str(first.source).splitlines() if ln.strip() != ""] - - -def badges_match( - actual_lines: Iterable[str], expected_lines: Iterable[str] -) -> Tuple[bool, str]: - """ - Check whether the expected badge lines are present in actual_lines. - Tolerant: ignores order, strips whitespace. - Returns (matches, message). Message empty on match else explains which badges missing. - """ - actual_set = {ln.strip() for ln in actual_lines} - expected_list = list(expected_lines) - missing = [exp for exp in expected_list if exp.strip() not in actual_set] - if not missing: - return True, "" - return False, f"Missing badges: {missing}" - - -def test_notebook_has_at_least_three_cells(notebook_filename: str) -> None: - """checks if all notebooks have at least three cells""" - nb = read_notebook(Path(notebook_filename)) - if len(nb.cells) < 3: - raise ValueError("Notebook should have at least 3 cells") - - -def test_first_cell_contains_three_badges( - notebook_filename: str, - repo_name: str, - repo_owner: str = REPO_OWNER_DEFAULT, - repo_root: Optional[Path] = None, -) -> None: - """ - checks if the notebook's first cell contains the three badges. - Raises ValueError on failure. - - The optional repo_root can be provided to control how the notebook path is - converted into the remote URL. If None, the module will attempt to detect - a git repo root and fall back to cwd(). - """ - nb = read_notebook(Path(notebook_filename)) - lines = first_cell_lines(nb) - expected = expected_badges_for( - Path(notebook_filename), repo_name, repo_owner, repo_root - ) - ok, msg = badges_match(lines, expected) - if not ok: - raise ValueError(msg) - - -def test_second_cell_is_a_markdown_cell(notebook_filename: str) -> None: - """checks if all notebooks have their second cell as markdown""" - nb = read_notebook(Path(notebook_filename)) - if len(nb.cells) < 2: - raise ValueError("Notebook has no second cell") - if nb.cells[1].cell_type != "markdown": - raise ValueError("Second cell is not a markdown cell") - - -def main(argv: Sequence[str] | None = None) -> int: - parser = argparse.ArgumentParser() - parser.add_argument("--repo-name", required=True) - parser.add_argument("--repo-owner", default=REPO_OWNER_DEFAULT) - parser.add_argument( - "--fix-header", - action="store_true", - help="If set, attempt to fix notebooks missing the header.", - ) - parser.add_argument( - "--no-git", - action="store_true", - help="Do not attempt to detect git repo root; use cwd()", - ) - parser.add_argument( - "--repo-root", help="Explicit repository root to use when building URLs" - ) - parser.add_argument("--verbose", action="store_true", help="Enable debug logging") - parser.add_argument("filenames", nargs="*", help="Filenames to check.") - args = parser.parse_args(argv) - - # configure logging - level = logging.DEBUG if args.verbose else logging.INFO - logging.basicConfig(level=level, format="%(levelname)s: %(message)s") - - prefer_git = not args.no_git - repo_root_path: Optional[Path] = Path(args.repo_root) if args.repo_root else None - retval = 0 - for filename in args.filenames: - path = Path(filename) - try: - effective_repo_root = repo_root_path or ( - find_repo_root(path, prefer_git) if prefer_git else Path.cwd() - ) - - test_notebook_has_at_least_three_cells(filename) - test_first_cell_contains_three_badges( - filename, args.repo_name, args.repo_owner, effective_repo_root - ) - test_second_cell_is_a_markdown_cell(filename) - - logger.info("%s: OK", filename) - retval = retval or 0 - except NotebookTestError as exc: - logger.error("%s: %s", filename, exc) - retval = 1 - return retval - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/hooks/check_notebook_open_atmos_structure.py b/hooks/check_notebook_open_atmos_structure.py new file mode 100755 index 0000000..6bbc8bc --- /dev/null +++ b/hooks/check_notebook_open_atmos_structure.py @@ -0,0 +1,267 @@ +#!/usr/bin/env python3 +""" +Checks notebook structure required in open-atmos projects. +Requirements: +- first cell contains three correct badges +- second cell is of type markdown +- third cell is Colab magick cell +""" + +from __future__ import annotations + +import argparse +import logging +from collections.abc import Sequence, Iterable +from pathlib import Path +from typing import Optional, List, Tuple + +import nbformat +from nbformat import NotebookNode + +from .utils import cell_error, open_and_test_notebooks +from .open_atmos_colab_header import check_colab_header + +REPO_OWNER_DEFAULT = "open-atmos" + +logger = logging.getLogger(__name__) + + +def resolve_repo_root( + start_path: Path, + *, + explicit_root: Path | None, + prefer_git: bool, +) -> Path: + """Resolve the repository root for the given path.""" + if explicit_root is not None: + return explicit_root + if prefer_git: + try: + # Import locally so the module doesn't hard-depend on GitPython at import time + from git import Repo # pylint: disable=import-outside-toplevel + + try: + repo = Repo(start_path, search_parent_directories=True) + if repo.working_tree_dir: + root = Path(repo.working_tree_dir) + logger.debug("Discovered git repository root: %s", root) + return root + except Exception as exc: # pylint: disable=broad-exception-caught + repo = None + logger.debug("Git repo detection failed for %s: %s", start_path, exc) + except ImportError as exc: + logger.debug("GitPython not available or import failed: %s", exc) + + cwd = Path.cwd() + logger.debug("Using current working directory as repo root: %s", cwd) + return cwd + + +def relative_path(absolute_path, repo_root): + """Return the path relative to the repository root.""" + absolute_path = Path(absolute_path).resolve() + repo_root = Path(repo_root).resolve() + + try: + relpath = absolute_path.relative_to(repo_root) + except ValueError as exc: + raise ValueError( + f"{absolute_path} is not inside repo root {repo_root}" + ) from exc + + return relpath.as_posix() + + +def preview_badge_markdown(relpath: str, repo_name: str, repo_owner: str) -> str: + """Create markdown for the GitHub-preview badge.""" + url = ( + "https://img.shields.io/static/v1?" + + "label=render%20on&logo=github&color=87ce3e&message=GitHub" + ) + link = f"https://github.com/{repo_owner}/{repo_name}/blob/main/{relpath}" + return f"[![preview notebook]({url})]({link})" + + +def mybinder_badge_markdown(relpath: str, repo_name: str, repo_owner: str) -> str: + """Create markdown for the Binder badge.""" + url = "https://mybinder.org/badge_logo.svg" + link = ( + f"https://mybinder.org/v2/gh/{repo_owner}/{repo_name}.git/main?urlpath=lab/tree/" + + f"{relpath}" + ) + return f"[![launch on mybinder.org]({url})]({link})" + + +def colab_badge_markdown(relpath: str, repo_name: str, repo_owner: str) -> str: + """Create markdown for the Colab badge.""" + url = "https://colab.research.google.com/assets/colab-badge.svg" + link = ( + f"https://colab.research.google.com/github/{repo_owner}/{repo_name}/blob/main/" + + f"{relpath}" + ) + return f"[![launch on Colab]({url})]({link})" + + +def expected_badges_for( + notebook_path: Path, + repo_name: str, + repo_owner: str, + repo_root: Optional[Path], +) -> List[str]: + """ + Return the canonical badge lines expected for notebook_path. + If repo_root is provided, attempt to build a relative path from it; otherwise + find repository root automatically (using find_repo_root). + """ + relpath = relative_path(notebook_path, repo_root) + args = (relpath, repo_name, repo_owner) + return [ + preview_badge_markdown(*args), + mybinder_badge_markdown(*args), + colab_badge_markdown(*args), + ] + + +def read_notebook(path: Path) -> NotebookNode: + with path.open(encoding="utf8") as fp: + return nbformat.read(fp, nbformat.NO_CONVERT) + + +def write_notebook(path: Path, nb: NotebookNode) -> None: + with path.open("w", encoding="utf8") as fp: + nbformat.write(nb, fp) + + +def first_cell_lines(nb: NotebookNode) -> List[str]: + if not nb.cells or nb.cells[0].cell_type != "markdown": + return [] + return [ln.strip() for ln in str(nb.cells[0].source).splitlines() if ln.strip()] + + +def badges_match( + actual_lines: Iterable[str], expected_lines: Iterable[str] +) -> Tuple[bool, str]: + actual_set = {ln.strip() for ln in actual_lines} + missing = [exp for exp in expected_lines if exp.strip() not in actual_set] + if not missing: + return True, "" + return False, f"Missing badges: {missing}" + + +def test_notebook_has_at_least_three_cells(notebook, filename) -> Iterable: + """checks if all notebooks have at least three cells""" + if len(notebook.cells) < 3: + yield cell_error( + filename, + 0, + code="NB003", + message="Insufficient number of cells (minimum required is 3).", + ) + + +def test_first_cell_contains_three_badges( + notebook_filename: str, + repo_name: str, + repo_owner: str, + repo_root: Path, +) -> Iterable: + nb = read_notebook(Path(notebook_filename)) + lines = first_cell_lines(nb) + expected = expected_badges_for( + Path(notebook_filename), repo_name, repo_owner, repo_root + ) + ok, msg = badges_match(lines, expected) + if not ok: + yield cell_error(notebook_filename, 0, code="NB004", message=msg) + + +def test_second_cell_is_a_markdown_cell(notebook, filename) -> Iterable: + if len(notebook.cells) < 2: + yield cell_error( + filename, 1, code="NB200", message="Notebook has no second cell." + ) + elif notebook.cells[1].cell_type != "markdown": + yield cell_error( + filename, + 1, + code="NB201", + message="Second cell is not a markdown cell", + ) + + +def test_colab_header( + notebook_filename: str, repo_name: str, fix_header: bool = False +) -> Iterable: + """Wrap check_colab_header to yield errors instead of raising.""" + try: + yield from check_colab_header( + Path(notebook_filename), repo_name, fix_header, "" + ) + except Exception as exc: + yield cell_error( + notebook_filename, 2, code="NB205", message=f"Colab header error: {exc}" + ) + + +def build_parser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser() + p.add_argument("--repo-name", required=True) + p.add_argument("--repo-owner", default=REPO_OWNER_DEFAULT) + p.add_argument( + "--fix-header", + action="store_true", + help="Attempt to fix notebooks missing header", + ) + p.add_argument( + "--no-git", action="store_true", help="Do not detect git repo root, use cwd()" + ) + p.add_argument("--repo-root", help="Explicit repository root path") + p.add_argument("--verbose", action="store_true", help="Enable debug logging") + p.add_argument("filenames", nargs="*", help="Notebooks to check") + return p + + +def configure_logging(verbose: bool) -> None: + level = logging.DEBUG if verbose else logging.INFO + logging.basicConfig(level=level, format="%(levelname)s: %(message)s") + + +def main(argv: Sequence[str] | None = None) -> int: + args = build_parser().parse_args(argv) + configure_logging(args.verbose) + + explicit_repo_root = Path(args.repo_root) if args.repo_root else None + prefer_git = not args.no_git + + repo_root = None + if explicit_repo_root or prefer_git: + if args.filenames: + repo_root = resolve_repo_root( + Path(args.filenames[0]), + explicit_root=explicit_repo_root, + prefer_git=prefer_git, + ) + + def wrap_first_cell_badges(notebook, filename): + yield from test_first_cell_contains_three_badges( + filename, args.repo_name, args.repo_owner, repo_root + ) + + def wrap_colab_header(notebook, filename): + yield from test_colab_header(filename, args.repo_name, args.fix_header) + + test_functions = [ + test_notebook_has_at_least_three_cells, + wrap_first_cell_badges, + test_second_cell_is_a_markdown_cell, + wrap_colab_header, + ] + + return open_and_test_notebooks( + args.filenames, + test_functions, + ) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/hooks/check_notebooks.py b/hooks/check_notebooks.py index c23648b..d3a26df 100755 --- a/hooks/check_notebooks.py +++ b/hooks/check_notebooks.py @@ -1,27 +1,38 @@ #!/usr/bin/env python3 -# pylint: disable=missing-module-docstring +""" +Checks notebook execution status for Jupyter notebooks""" + from __future__ import annotations +import argparse from collections.abc import Sequence -from .utils import open_and_test_notebooks +from .utils import open_and_test_notebooks, cell_error -def test_jetbrains_bug_py_66491(notebook): +def test_jetbrains_bug_py_66491(notebook, filename): """checks if all notebook have the execution_count key for each cell in JSON, which is required by GitHub renderer and may not be generated by some buggy PyCharm versions: https://youtrack.jetbrains.com/issue/PY-66491""" - for cell in notebook.cells: + for idx, cell in enumerate(notebook.cells): if cell.cell_type == "code" and not hasattr(cell, "execution_count"): - raise ValueError( - "Notebook cell missing execution_count attribute. " - "(May be due to PyCharm bug, see: https://youtrack.jetbrains.com/issue/PY-66491 )" + yield cell_error( + filename, + idx, + code="NB000", + message="Notebook cell missing execution_count attribute. " + "(May be due to PyCharm bug, see: https://youtrack.jetbrains.com/issue/PY-66491 )", ) def main(argv: Sequence[str] | None = None) -> int: - """test all notebooks""" + """Test all notebooks""" + + parser = argparse.ArgumentParser() + parser.add_argument("filenames", nargs="*", help="Filenames to check.") + args = parser.parse_args(argv) + return open_and_test_notebooks( - argv=argv, + filenames=args.filenames, test_functions=[ test_jetbrains_bug_py_66491, ], diff --git a/hooks/notebooks_output.py b/hooks/notebooks_output.py index 20dcdec..bffdfdd 100755 --- a/hooks/notebooks_output.py +++ b/hooks/notebooks_output.py @@ -3,19 +3,25 @@ from __future__ import annotations +import argparse from collections.abc import Sequence -from .utils import open_and_test_notebooks +from .utils import open_and_test_notebooks, cell_error -def test_cell_contains_output(notebook): +def test_cell_contains_output(notebook, filename): """checks if all notebook cells have an output present""" for cell_idx, cell in enumerate(notebook.cells): if cell.cell_type == "code" and cell.source != "": if cell.execution_count is None: - raise ValueError(f"Cell {cell_idx} does not contain output") + yield cell_error( + filename, + cell_idx, + code="NB001", + message="Cell does not contain output", + ) -def test_no_errors_or_warnings_in_output(notebook): +def test_no_errors_or_warnings_in_output(notebook, filename): """checks if all example Jupyter notebooks have clear std-err output (i.e., no errors or warnings) visible; except acceptable diagnostics from the joblib package""" @@ -23,21 +29,36 @@ def test_no_errors_or_warnings_in_output(notebook): if cell.cell_type == "code": for output in cell.outputs: ot = output.get("output_type") - if ot == "error": - raise ValueError( - f"Cell [{cell_idx}] contain error or warning. \n\n" - f"Cell [{cell_idx}] output:\n{output}\n" + is_error = ot == "error" + is_stderr = ( + ot == "stream" + and output.get("name") == "stderr" + and (text := output.get("text")) + and not text.startswith("[Parallel(n_jobs=") + ) + + if is_error or is_stderr: + yield cell_error( + filename, + cell_idx, + code="NB002", + message=( + "Cell contains execution error or warning.\n\n" + f"Cell output:\n{output}\n" + ), ) - if ot == "stream" and output.get("name") == "stderr": - out_text = output.get("text") - if out_text and not out_text.startswith("[Parallel(n_jobs="): - raise ValueError(f" Cell [{cell_idx}]: {out_text}") def main(argv: Sequence[str] | None = None) -> int: - """test all notebooks""" + """Test all notebooks""" + parser = argparse.ArgumentParser() + parser.add_argument("filenames", nargs="*", help="Filenames to check.") + parser = argparse.ArgumentParser() + parser.add_argument("filenames", nargs="*", help="Filenames to check.") + args = parser.parse_args(argv) + return open_and_test_notebooks( - argv=argv, + filenames=args.filenames, test_functions=[ test_cell_contains_output, test_no_errors_or_warnings_in_output, diff --git a/hooks/notebooks_using_jupyter_utils.py b/hooks/notebooks_using_jupyter_utils.py index 8338443..eccf275 100755 --- a/hooks/notebooks_using_jupyter_utils.py +++ b/hooks/notebooks_using_jupyter_utils.py @@ -4,27 +4,33 @@ from __future__ import annotations +import argparse from collections.abc import Sequence -from .utils import open_and_test_notebooks +from .utils import open_and_test_notebooks, cell_error -def test_show_plot_used_instead_of_matplotlib(notebook): +def test_show_plot_used_instead_of_matplotlib(notebook, filename): """checks if plotting is done with open_atmos_jupyter_utils show_plot()""" matplot_used = False show_plot_used = False - for cell in notebook.cells: + matplot_idx = 0 + for idx, cell in enumerate(notebook.cells): if cell.cell_type == "code": if "pyplot.show(" in cell.source or "plt.show(" in cell.source: matplot_used = True + matplot_idx = idx if "show_plot(" in cell.source: show_plot_used = True if matplot_used and not show_plot_used: - raise ValueError( - "if using matplotlib, please use open_atmos_jupyter_utils.show_plot()" + yield cell_error( + filename, + matplot_idx, + code="NB400", + message="If using matplotlib, please use open_atmos_jupyter_utils.show_plot()", ) -def test_show_anim_used_instead_of_matplotlib(notebook): +def test_show_anim_used_instead_of_matplotlib(notebook, filename): """checks if animation generation is done with open_atmos_jupyter_utils show_anim()""" matplot_used = False show_anim_used = False @@ -39,14 +45,24 @@ def test_show_anim_used_instead_of_matplotlib(notebook): if "show_anim(" in cell.source: show_anim_used = True if matplot_used and not show_anim_used: - raise AssertionError("""if using matplotlib for animations, - please use open_atmos_jupyter_utils.show_anim()""") + yield cell_error( + filename, + matplot_idx, + code="NB401", + message="If using matplotlib for animations, " + "please use open_atmos_jupyter_utils.show_anim()", + ) def main(argv: Sequence[str] | None = None) -> int: - """test all notebooks""" + """Test all notebooks""" + + parser = argparse.ArgumentParser() + parser.add_argument("filenames", nargs="*", help="Filenames to check.") + args = parser.parse_args(argv) + return open_and_test_notebooks( - argv=argv, + filenames=args.filenames, test_functions=[ test_show_anim_used_instead_of_matplotlib, test_show_plot_used_instead_of_matplotlib, diff --git a/hooks/open_atmos_colab_header.py b/hooks/open_atmos_colab_header.py new file mode 100644 index 0000000..d16db07 --- /dev/null +++ b/hooks/open_atmos_colab_header.py @@ -0,0 +1,133 @@ +"""Extract version from existing header and check if header is correct""" + +from __future__ import annotations + +import re +import nbformat +from pygments.styles.dracula import yellow + +from .utils import cell_error + +_PIP_INSTALL_RE = re.compile( + r"pip_install_on_colab\(\s*" + r"['\"](?P[^'\"]+)['\"]\s*,\s*" + r"['\"](?P
[^'\"]+)['\"]\s*\)" +) + + +def extract_versions(cell_source: str, repo_name: str): + """ + Extract version info from cell source + Returns: + (examples_version, main_version) or (None, None) if invalid. + """ + text_found = _PIP_INSTALL_RE.search(cell_source) + if not text_found: + return None, None + + examples_pkg = text_found.group("examples") + main_pkg = text_found.group("main") + + if not main_pkg.startswith(repo_name) or not examples_pkg.startswith( + f"{repo_name}-examples" + ): + return None, None + print(examples_pkg, main_pkg) + return examples_pkg[len(f"{repo_name}-examples") :], main_pkg[len(repo_name) :] + + +def resolve_version(existing: str | None, hook_version: str | None) -> str: + """ + Precedence: + 1. Version in notebook + 2. Hook version + 3. No version + """ + if existing: + return existing + if hook_version: + return hook_version + return "" + + +def build_header(repo_name: str, version: str) -> str: + """required header pattern in open-atmos notebooks""" + return f"""import os, sys +os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' # PySDM & PyMPDATA don't work with TBB; OpenMP has extra dependencies on macOS +if 'google.colab' in sys.modules: + !pip --quiet install open-atmos-jupyter-utils + from open_atmos_jupyter_utils import pip_install_on_colab + pip_install_on_colab('{repo_name}-examples{version}', '{repo_name}{version}')""" + + +HEADER_REQUIRED_PATTERNS = [ + "google.colab", + "open-atmos-jupyter-utils", + "pip_install_on_colab", +] + + +def looks_like_header(cell_source: str) -> bool: + """check if the cell source looks like required header""" + return all(pat in cell_source for pat in HEADER_REQUIRED_PATTERNS) + + +def check_colab_header(notebook_path, repo_name, fix, hook_version): + """check if colab header is correct""" + nb = nbformat.read(notebook_path, as_version=nbformat.NO_CONVERT) + header_index = None + + for idx, cell in enumerate(nb.cells): + if cell.cell_type == "code" and looks_like_header(cell.source): + header_index = idx + break + + if header_index is None: + final_version = resolve_version(None, hook_version) + header_source = build_header(repo_name, final_version) + nb.cells.insert(2, nbformat.v4.new_code_cell(header_source)) + nbformat.write(nb, notebook_path) + return + + header_cell = nb.cells[header_index] + examples_version, main_version = extract_versions(header_cell.source, repo_name) + + if examples_version != main_version: + yield cell_error( + notebook_path, + header_index, + "NB301", + f"\nVersion mismatch in header: {examples_version!r} != {main_version!r}", + ) + + final_version = resolve_version(main_version, hook_version) + correct_header = build_header(repo_name, final_version) + + modified = False + if header_cell.source != correct_header: + if not fix: + yield cell_error( + notebook_path, + header_index, + "NB302", + f"Incorrect Colab cell, expected header:\n---\n{correct_header}\n---", + ) + else: + header_cell.source = correct_header + modified = True + + if header_index != 2: + if not fix: + yield cell_error( + notebook_path, + header_index, + code="NB303", + message="Colab header in wrong position. Expected cell index: 2.", + ) + else: + nb.cells.insert(2, nb.cells.pop(header_index)) + modified = True + + if modified: + nbformat.write(nb, notebook_path) + return modified diff --git a/hooks/utils.py b/hooks/utils.py index 2008907..9c26d8d 100644 --- a/hooks/utils.py +++ b/hooks/utils.py @@ -4,13 +4,34 @@ import os import pathlib -import argparse +from pathlib import Path from git import Git + +from dataclasses import dataclass +from typing import Callable, Iterable, Any import nbformat -class NotebookTestError(BaseException): - """Raised when a notebook validation test fails.""" +@dataclass +class NotebookError: + path: str + line: int + col: int + code: str + message: str + + def format(self) -> str: + return f"{self.path}:{self.line}:{self.col}: {self.code} {self.message}" + + +def cell_error(path, cell_idx, code, message): + return NotebookError( + path=path, + line=cell_idx + 1, + col=1, + code=code, + message=message, + ) def find_files(path_to_folder_from_project_root=".", file_extension=None): @@ -41,20 +62,51 @@ def repo_path(): return path -def open_and_test_notebooks(argv, test_functions): - """Create argparser and run notebook tests""" - parser = argparse.ArgumentParser() - parser.add_argument("filenames", nargs="*", help="Filenames to check.") - args = parser.parse_args(argv) - - retval = 0 - for filename in args.filenames: - with open(filename, encoding="utf8") as notebook_file: - notebook = nbformat.read(notebook_file, nbformat.NO_CONVERT) - for func in test_functions: - try: - func(notebook) - except NotebookTestError as e: - print(f"{filename} : {e}") - retval = 1 - return retval +def open_and_test_notebooks( + filenames: list[str], + test_functions: list[Callable[[nbformat.NotebookNode, str], Iterable[Any]]], +) -> int: + """ + Run notebook tests on a list of filenames using generator-based hooks. + + Each test function should accept two arguments: + notebook: nbformat.NotebookNode + notebook_filename: str + and yield NotebookError objects. + """ + all_errors = [] + + for filename in filenames: + notebook_path = Path(filename) + try: + with notebook_path.open(encoding="utf8") as f: + notebook = nbformat.read(f, nbformat.NO_CONVERT) + except Exception as exc: + all_errors.append( + cell_error( + filename, + 0, + code="NB000", + message=f"Failed to read notebook: {exc}", + ) + ) + continue + + for test_func in test_functions: + try: + for error in test_func(notebook, filename): + all_errors.append(error) + except Exception as exc: + all_errors.append( + cell_error( + filename, + 0, + code="NBXXX", + message=f"Exception in test {test_func.__name__}: {exc}", + ) + ) + + for error in all_errors: + print(error.format()) + + return 1 if all_errors else 0 diff --git a/pyproject.toml b/pyproject.toml index 7fea13a..d25de5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,6 @@ dynamic = ['version'] [project.scripts] check_notebooks = "hooks.check_notebooks:main" -check_badges = "hooks.check_badges:main" +check_notebook_open_atmos_structure = "hooks.check_notebook_open_atmos_structure:main" notebooks_output = "hooks.notebooks_output:main" notebooks_using_jupyter_utils = "hooks.notebooks_using_jupyter_utils:main" diff --git a/tests/examples/good.ipynb b/tests/examples/good.ipynb index 329f58c..c43ec47 100644 --- a/tests/examples/good.ipynb +++ b/tests/examples/good.ipynb @@ -1,32 +1,29 @@ { "cells": [ { - "cell_type": "markdown", - "id": "18b1047b", "metadata": {}, + "cell_type": "markdown", "source": [ "[![preview notebook](https://img.shields.io/static/v1?label=render%20on&logo=github&color=87ce3e&message=GitHub)](https://github.com/open-atmos/devops_tests/blob/main/tests/examples/good.ipynb)\n", "[![launch on mybinder.org](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/open-atmos/devops_tests.git/main?urlpath=lab/tree/tests/examples/good.ipynb)\n", "[![launch on Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/open-atmos/devops_tests/blob/main/tests/examples/good.ipynb)" - ] + ], + "id": "7896d6a8e70874de" }, { - "cell_type": "markdown", - "id": "59bad8ebb84ac4ce", "metadata": {}, - "source": [ - "Notebook description" - ] + "cell_type": "markdown", + "source": "Notebook description", + "id": "dd6f026f26ee6758" }, { - "cell_type": "code", - "id": "cc05fb4d", "metadata": { "ExecuteTime": { - "end_time": "2026-02-06T19:44:32.687512Z", - "start_time": "2026-02-06T19:44:32.681345Z" + "end_time": "2026-02-07T18:01:46.587131Z", + "start_time": "2026-02-07T18:01:46.580954Z" } }, + "cell_type": "code", "source": [ "import os, sys\n", "os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' # PySDM & PyMPDATA don't work with TBB; OpenMP has extra dependencies on macOS\n", @@ -35,40 +32,29 @@ " from open_atmos_jupyter_utils import pip_install_on_colab\n", " pip_install_on_colab('devops_tests-examples', 'devops_tests')" ], + "id": "62a42069512ea38a", "outputs": [], "execution_count": 1 }, { - "cell_type": "code", - "id": "405d8abe602ec659", "metadata": { "ExecuteTime": { - "end_time": "2026-02-06T19:44:32.693544Z", - "start_time": "2026-02-06T19:44:32.692046Z" + "end_time": "2026-02-07T18:01:46.630852Z", + "start_time": "2026-02-07T18:01:46.628713Z" } }, - "source": [], + "cell_type": "code", + "source": "", + "id": "5115215c3656a30b", "outputs": [], "execution_count": null } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "name": "python3", "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.6" + "display_name": "Python 3 (ipykernel)" } }, "nbformat": 4, diff --git a/tests/test_check_badges_examples.py b/tests/test_check_notebook_open_atmos_structure.py similarity index 90% rename from tests/test_check_badges_examples.py rename to tests/test_check_notebook_open_atmos_structure.py index 494f41a..ef3c8f1 100644 --- a/tests/test_check_badges_examples.py +++ b/tests/test_check_notebook_open_atmos_structure.py @@ -1,17 +1,18 @@ # pylint: disable=missing-function-docstring """ -Unit tests for hooks/check_badges.py: good and bad notebook examples. +Unit tests for hooks/check_notebook_open_atmos_structure.py: good and bad notebook examples. These tests write small notebooks to temporary files and call the badge-check functions that expect filenames. """ +import pathlib import nbformat from nbformat.v4 import new_notebook, new_markdown_cell, new_code_cell import pytest -from hooks import check_badges as cb +from hooks import check_notebook_open_atmos_structure as cb def _write_nb_and_return_path(tmp_path, nb, name="nb.ipynb"): @@ -70,7 +71,9 @@ def test_first_cell_bad_badges_raises(tmp_path): ) path = _write_nb_and_return_path(tmp_path, nb, name="badbadges.ipynb") with pytest.raises(ValueError): - cb.test_first_cell_contains_three_badges(path, "devops_tests") + cb.test_first_cell_contains_three_badges( + path, "devops_tests", repo_root=pathlib.Path("") + ) def test_second_cell_not_markdown_raises(tmp_path): diff --git a/tests/test_run_hooks_on_examples.py b/tests/test_run_hooks_on_examples.py index 6372c74..199eca3 100644 --- a/tests/test_run_hooks_on_examples.py +++ b/tests/test_run_hooks_on_examples.py @@ -1,16 +1,11 @@ # pylint: disable=missing-function-docstring - """ -Run the hooks on good and bad example notebooks and assert behavior + logs. - -This test executes the hook modules the same way pre-commit would: - python -m hooks.check_badges --repo-name=... PATH - python -m hooks.check_notebooks PATH - -It captures stdout/stderr (which is where logging.basicConfig writes), asserts -expected exit codes, and checks stderr for expected failure substrings. +Unit test for checking correct error message +when running hooks on example files (good.ipynb, bad.ipynb). -Adjust the candidate paths in _find_example() if your examples are stored elsewhere. +Test for local pre-commit hooks: +- notebook_output, +- check_notebooks. """ from __future__ import annotations @@ -42,44 +37,6 @@ def _run_module(module: str, args: list[str]) -> subprocess.CompletedProcess: return subprocess.run(cmd, capture_output=True, text=True, check=False) -@pytest.mark.parametrize( - "name, expected_badge_exit, expected_badge_msg", - [ - ("good.ipynb", 0, ""), - ("bad.ipynb", 1, "Missing badges"), - ], -) -def test_check_badges_on_examples( - name: str, expected_badge_exit: int, expected_badge_msg: str -): - nb_path = _find_example(name) - if nb_path is None: - pytest.skip(f"No example notebook found for {name};") - - res = _run_module("hooks.check_badges", ["--repo-name=devops_tests", str(nb_path)]) - combined_msgs = (res.stderr or "") + "\n" + (res.stdout or "") - - # Check exit code first - if expected_badge_exit == 0: - assert ( - res.returncode == 0 - ), f"Expected success; stderr:\n{res.stderr}\nstdout:\n{res.stdout}" - # For success, ensure no obvious error strings are present - assert "Missing badges" not in combined_msgs - assert "ERROR" not in combined_msgs - assert "Traceback" not in combined_msgs - else: - assert ( - res.returncode != 0 - ), f"Expected failure; got exit {res.returncode}\nstdout:\n{res.stdout}" - - assert expected_badge_msg in combined_msgs, ( - f"Expected to find {expected_badge_msg!r} in output" - f"\n---STDERR---\n{res.stderr}" - f"\n---STDOUT---\n{res.stdout}" - ) - - @pytest.mark.parametrize( "name, should_fail, expected_msg_substr", [