From 382006a826cde26bd260f04d3ec1c4281e64eeec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Tue, 27 Jan 2026 15:42:52 +0100 Subject: [PATCH 01/16] update black and create separate file for colab header --- .pre-commit-config.yaml | 2 +- hooks/check_badges.py | 173 ++++++++++++++++++++++++++++------------ hooks/colab_header.py | 119 +++++++++++++++++++++++++++ 3 files changed, 240 insertions(+), 54 deletions(-) create mode 100644 hooks/colab_header.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6c29f34..ba10bf0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ default_stages: [pre-commit] repos: - repo: https://github.com/psf/black-pre-commit-mirror - rev: 25.11.0 + rev: 26.1.0 hooks: - id: black diff --git a/hooks/check_badges.py b/hooks/check_badges.py index dc6f19a..0ef15f7 100755 --- a/hooks/check_badges.py +++ b/hooks/check_badges.py @@ -1,24 +1,59 @@ #!/usr/bin/env python3 # pylint: disable=missing-function-docstring """ -Checks whether notebooks contain badges.""" +Checks whether notebooks contain badges and ensures a consistent Colab header. +""" + from __future__ import annotations import argparse from collections.abc import Sequence +import re import nbformat +_PIP_INSTALL_RE = re.compile( + r"pip_install_on_colab\(\s*" + r"['\"](?P[^'\"]+)['\"]\s*,\s*" + r"['\"](?P
[^'\"]+)['\"]\s*\)" +) -def _header_cell_text(repo_name, version): - if version is None: - version = "" - return f"""import os, sys -os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' # PySDM & PyMPDATA don't work with TBB; OpenMP has extra dependencies on macOS -if 'google.colab' in sys.modules: - !pip --quiet install open-atmos-jupyter-utils - from open_atmos_jupyter_utils import pip_install_on_colab - pip_install_on_colab('{repo_name}-examples{version}', '{repo_name}{version}')""" + +def extract_versions(cell_source: str, repo_name: str): + """ + Extract versions from both arguments: + pip_install_on_colab('repo-examples{v}', 'repo{v}') + + Returns: + (examples_version, main_version) or (None, None) if invalid. + """ + m = _PIP_INSTALL_RE.search(cell_source) + if not m: + return None, None + + examples_pkg = m.group("examples") + main_pkg = m.group("main") + + if not examples_pkg.startswith(f"{repo_name}-examples") or not main_pkg.startswith( + repo_name + ): + return None, None + + return examples_pkg[len(f"{repo_name}-examples") :], main_pkg[len(repo_name) :] + + +def resolve_version(existing: str | None, hook_version: str | None) -> str: + """ + Precedence: + 1. Version in notebook + 2. Hook version + 3. No version + """ + if existing: + return existing + if hook_version: + return hook_version + return "" HEADER_KEY_PATTERNS = [ @@ -28,58 +63,76 @@ def _header_cell_text(repo_name, version): ] -def is_colab_header(cell_source: str) -> bool: - """Return True if the cell looks like a Colab header.""" +def build_header(repo_name: str, version: str) -> str: + return f"""import os, sys +os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' +if 'google.colab' in sys.modules: + !pip --quiet install open-atmos-jupyter-utils + from open_atmos_jupyter_utils import pip_install_on_colab + pip_install_on_colab('{repo_name}-examples{version}', '{repo_name}{version}') +""" + + +def looks_like_header(cell_source: str) -> bool: return all(pat in cell_source for pat in HEADER_KEY_PATTERNS) -def check_colab_header(notebook_path, repo_name, fix, version): - """Check Colab-magic cell and fix if is misspelled, in wrong position or not exists""" +def check_colab_header(notebook_path, repo_name, fix, hook_version): nb = nbformat.read(notebook_path, as_version=nbformat.NO_CONVERT) - header_index = None - correct_header = _header_cell_text(repo_name, version) - modified = False - - if not fix: - if nb.cells[2].cell_type != "code" or nb.cells[2].source != correct_header: - raise ValueError("Third cell does not contain correct header") - return modified + if len(nb.cells) < 3: + raise ValueError("Notebook should have at least 3 cells") + # Find existing header if present + header_index = None for idx, cell in enumerate(nb.cells): - if cell.cell_type == "code" and is_colab_header(cell.source): + if cell.cell_type == "code" and looks_like_header(cell.source): header_index = idx break - if header_index is not None: - if nb.cells[header_index].source != correct_header: - nb.cells[header_index].source = correct_header - modified = True - if header_index != 2: - nb.cells.insert(2, nb.cells.pop(header_index)) - modified = True - else: - new_cell = nbformat.v4.new_code_cell(correct_header) - nb.cells.insert(2, new_cell) + # Build final header + if header_index is None: + final_version = resolve_version(None, hook_version) + header_source = build_header(repo_name, final_version) + nb.cells.insert(2, nbformat.v4.new_code_cell(header_source)) + nbformat.write(nb, notebook_path) + return True + + header_cell = nb.cells[header_index] + examples_version, main_version = extract_versions(header_cell.source, repo_name) + + if examples_version is None or main_version is None: + raise ValueError("Colab header is malformed") + + if examples_version != main_version: + raise ValueError( + f"Version mismatch in header: {examples_version!r} != {main_version!r}" + ) + + final_version = resolve_version(main_version, hook_version) + header_source = build_header(repo_name, final_version) + + modified = False + + if header_cell.source != header_source: + if not fix: + raise ValueError("Colab header is incorrect") + header_cell.source = header_source + modified = True + + if header_index != 2: + nb.cells.insert(2, nb.cells.pop(header_index)) modified = True + if modified: nbformat.write(nb, notebook_path) + return modified -def print_hook_summary(reformatted_files, unchanged_files): - """Print a Black-style summary.""" - for f in reformatted_files: - print(f"\nreformatted {f}") - - total_ref = len(reformatted_files) - total_unchanged = len(unchanged_files) - if total_ref > 0: - print("\nAll done! ✨ 🍰 ✨") - print( - f"{total_ref} file{'s' if total_ref != 1 else ''} reformatted, " - f"{total_unchanged} file{'s' if total_unchanged != 1 else ''} left unchanged." - ) +# ------------------------------------------------------------------- +# Badge checks +# ------------------------------------------------------------------- def _preview_badge_markdown(absolute_path, repo_name): @@ -110,7 +163,6 @@ def _colab_badge_markdown(absolute_path, repo_name): def test_notebook_has_at_least_three_cells(notebook_filename): - """checks if all notebooks have at least three cells""" with open(notebook_filename, encoding="utf8") as fp: nb = nbformat.read(fp, nbformat.NO_CONVERT) if len(nb.cells) < 3: @@ -118,14 +170,16 @@ def test_notebook_has_at_least_three_cells(notebook_filename): def test_first_cell_contains_three_badges(notebook_filename, repo_name): - """checks if all notebooks feature three badges in the first cell""" with open(notebook_filename, encoding="utf8") as fp: nb = nbformat.read(fp, nbformat.NO_CONVERT) + if nb.cells[0].cell_type != "markdown": raise ValueError("First cell is not a markdown cell") + lines = nb.cells[0].source.split("\n") if len(lines) != 3: raise ValueError("First cell does not contain exactly 3 lines (badges)") + if lines[0] != _preview_badge_markdown(notebook_filename, repo_name): raise ValueError("First badge does not match Github preview badge") if lines[1] != _mybinder_badge_markdown(notebook_filename, repo_name): @@ -135,32 +189,45 @@ def test_first_cell_contains_three_badges(notebook_filename, repo_name): def test_second_cell_is_a_markdown_cell(notebook_filename): - """checks if all notebooks have their second cell with some markdown - (hopefully clarifying what the example is about)""" with open(notebook_filename, encoding="utf8") as fp: nb = nbformat.read(fp, nbformat.NO_CONVERT) if nb.cells[1].cell_type != "markdown": raise ValueError("Second cell is not a markdown cell") +def print_hook_summary(reformatted_files, unchanged_files): + for f in reformatted_files: + print(f"\nreformatted {f}") + + total_ref = len(reformatted_files) + total_unchanged = len(unchanged_files) + if total_ref > 0: + print("\nAll done! ✨ 🍰 ✨") + print( + f"{total_ref} file{'s' if total_ref != 1 else ''} reformatted, " + f"{total_unchanged} file{'s' if total_unchanged != 1 else ''} left unchanged." + ) + + def main(argv: Sequence[str] | None = None) -> int: - """collect failed notebook checks""" parser = argparse.ArgumentParser() parser.add_argument("--repo-name") parser.add_argument("--fix-header", action="store_true") parser.add_argument("--pip-install-on-colab-version") parser.add_argument("filenames", nargs="*", help="Filenames to check.") args = parser.parse_args(argv) + failed_files = False reformatted_files = [] unchanged_files = [] + for filename in args.filenames: try: modified = check_colab_header( filename, repo_name=args.repo_name, fix=args.fix_header, - version=args.pip_install_on_colab_version, + hook_version=args.pip_install_on_colab_version, ) if modified: reformatted_files.append(str(filename)) @@ -169,11 +236,11 @@ def main(argv: Sequence[str] | None = None) -> int: except ValueError as exc: print(f"[ERROR] {filename}: {exc}") failed_files = True + try: test_notebook_has_at_least_three_cells(filename) test_first_cell_contains_three_badges(filename, repo_name=args.repo_name) test_second_cell_is_a_markdown_cell(filename) - except ValueError as exc: print(f"[ERROR] {filename}: {exc}") failed_files = True diff --git a/hooks/colab_header.py b/hooks/colab_header.py new file mode 100644 index 0000000..79e9263 --- /dev/null +++ b/hooks/colab_header.py @@ -0,0 +1,119 @@ +# colab_header.py +from __future__ import annotations + +import re +import nbformat + +_PIP_INSTALL_RE = re.compile( + r"pip_install_on_colab\(\s*" + r"['\"](?P[^'\"]+)['\"]\s*,\s*" + r"['\"](?P
[^'\"]+)['\"]\s*\)" +) + +HEADER_KEY_PATTERNS = [ + "install open-atmos-jupyter-utils", + "google.colab", + "pip_install_on_colab", +] + + +def extract_versions(cell_source: str, repo_name: str): + """ + Extract version info from cell source + Returns: + (examples_version, main_version) or (None, None) if invalid. + """ + m = _PIP_INSTALL_RE.search(cell_source) + if not m: + return None, None + + examples_pkg = m.group("examples") + main_pkg = m.group("main") + + if not examples_pkg.startswith(f"{repo_name}-examples") or not main_pkg.startswith( + repo_name + ): + return None, None + + return examples_pkg[len(f"{repo_name}-examples") :], main_pkg[len(repo_name) :] + + +def resolve_version(existing: str | None, hook_version: str | None) -> str: + """ + Precedence: + 1. Version in notebook + 2. Hook version + 3. No version + """ + if existing: + return existing + if hook_version: + return hook_version + return "" + + +def build_header(repo_name: str, version: str) -> str: + return f"""import os, sys +os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' # PySDM & PyMPDATA don't work with TBB; OpenMP has extra dependencies on macOS +if 'google.colab' in sys.modules: + !pip --quiet install open-atmos-jupyter-utils + from open_atmos_jupyter_utils import pip_install_on_colab + pip_install_on_colab('{repo_name}-examples{version}', '{repo_name}{version}')""" + + +def looks_like_header(cell_source: str) -> bool: + return all(pat in cell_source for pat in HEADER_KEY_PATTERNS) + + +def check_colab_header(notebook_path, repo_name, fix, hook_version): + nb = nbformat.read(notebook_path, as_version=nbformat.NO_CONVERT) + + if len(nb.cells) < 3: + raise ValueError("Notebook should have at least 3 cells") + + # Find existing header if present + header_index = None + for idx, cell in enumerate(nb.cells): + if cell.cell_type == "code" and looks_like_header(cell.source): + header_index = idx + break + + # If header doesn't exist, create it + if header_index is None: + final_version = resolve_version(None, hook_version) + header_source = build_header(repo_name, final_version) + nb.cells.insert(2, nbformat.v4.new_code_cell(header_source)) + nbformat.write(nb, notebook_path) + return True + + # If header exists, validate and optionally fix + header_cell = nb.cells[header_index] + examples_version, main_version = extract_versions(header_cell.source, repo_name) + + if examples_version is None or main_version is None: + raise ValueError("Colab header is malformed") + + if examples_version != main_version: + raise ValueError( + f"Version mismatch in header: {examples_version!r} != {main_version!r}" + ) + + final_version = resolve_version(main_version, hook_version) + header_source = build_header(repo_name, final_version) + + modified = False + + if header_cell.source != header_source: + if not fix: + raise ValueError("Colab header is incorrect") + header_cell.source = header_source + modified = True + + if header_index != 2: + nb.cells.insert(2, nb.cells.pop(header_index)) + modified = True + + if modified: + nbformat.write(nb, notebook_path) + + return modified From 4cb386300a93db8f62c82bd9c2b9148ea9fd1382 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Tue, 27 Jan 2026 15:43:45 +0100 Subject: [PATCH 02/16] run new black --- hooks/check_notebooks.py | 7 +++---- test_files/template.ipynb | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/hooks/check_notebooks.py b/hooks/check_notebooks.py index 870a124..b7d6d3e 100755 --- a/hooks/check_notebooks.py +++ b/hooks/check_notebooks.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 """ Checks notebook execution status for Jupyter notebooks""" + from __future__ import annotations import argparse @@ -64,10 +65,8 @@ def test_show_anim_used_instead_of_matplotlib(notebook): if "show_anim(" in cell.source: show_anim_used = True if matplot_used and not show_anim_used: - raise AssertionError( - """if using matplotlib for animations, - please use open_atmos_jupyter_utils.show_anim()""" - ) + raise AssertionError("""if using matplotlib for animations, + please use open_atmos_jupyter_utils.show_anim()""") def test_jetbrains_bug_py_66491(notebook): diff --git a/test_files/template.ipynb b/test_files/template.ipynb index be45e76..5b7ef4e 100644 --- a/test_files/template.ipynb +++ b/test_files/template.ipynb @@ -29,11 +29,11 @@ "outputs": [], "source": [ "import os, sys\n", - "os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' # PySDM & PyMPDATA don't work with TBB; OpenMP has extra dependencies on macOS\n", + "os.environ['NUMBA_THREADING_LAYER'] = 'workqueue'\n", "if 'google.colab' in sys.modules:\n", " !pip --quiet install open-atmos-jupyter-utils\n", " from open_atmos_jupyter_utils import pip_install_on_colab\n", - " pip_install_on_colab('devops_tests-examples', 'devops_tests')" + " pip_install_on_colab('devops_tests-examples', 'devops_tests')\n" ] } ], From 5443b729800ee59280ead962978a40851e459d0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Tue, 27 Jan 2026 15:54:33 +0100 Subject: [PATCH 03/16] update check-badges file --- hooks/check_badges.py | 130 +----------------------------------------- 1 file changed, 2 insertions(+), 128 deletions(-) diff --git a/hooks/check_badges.py b/hooks/check_badges.py index 0ef15f7..b714620 100755 --- a/hooks/check_badges.py +++ b/hooks/check_badges.py @@ -1,138 +1,12 @@ -#!/usr/bin/env python3 -# pylint: disable=missing-function-docstring -""" -Checks whether notebooks contain badges and ensures a consistent Colab header. -""" - +# notebook_checks.py from __future__ import annotations import argparse from collections.abc import Sequence -import re import nbformat -_PIP_INSTALL_RE = re.compile( - r"pip_install_on_colab\(\s*" - r"['\"](?P[^'\"]+)['\"]\s*,\s*" - r"['\"](?P
[^'\"]+)['\"]\s*\)" -) - - -def extract_versions(cell_source: str, repo_name: str): - """ - Extract versions from both arguments: - pip_install_on_colab('repo-examples{v}', 'repo{v}') - - Returns: - (examples_version, main_version) or (None, None) if invalid. - """ - m = _PIP_INSTALL_RE.search(cell_source) - if not m: - return None, None - - examples_pkg = m.group("examples") - main_pkg = m.group("main") - - if not examples_pkg.startswith(f"{repo_name}-examples") or not main_pkg.startswith( - repo_name - ): - return None, None - - return examples_pkg[len(f"{repo_name}-examples") :], main_pkg[len(repo_name) :] - - -def resolve_version(existing: str | None, hook_version: str | None) -> str: - """ - Precedence: - 1. Version in notebook - 2. Hook version - 3. No version - """ - if existing: - return existing - if hook_version: - return hook_version - return "" - - -HEADER_KEY_PATTERNS = [ - "install open-atmos-jupyter-utils", - "google.colab", - "pip_install_on_colab", -] - - -def build_header(repo_name: str, version: str) -> str: - return f"""import os, sys -os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' -if 'google.colab' in sys.modules: - !pip --quiet install open-atmos-jupyter-utils - from open_atmos_jupyter_utils import pip_install_on_colab - pip_install_on_colab('{repo_name}-examples{version}', '{repo_name}{version}') -""" - - -def looks_like_header(cell_source: str) -> bool: - return all(pat in cell_source for pat in HEADER_KEY_PATTERNS) - - -def check_colab_header(notebook_path, repo_name, fix, hook_version): - nb = nbformat.read(notebook_path, as_version=nbformat.NO_CONVERT) - - if len(nb.cells) < 3: - raise ValueError("Notebook should have at least 3 cells") - - # Find existing header if present - header_index = None - for idx, cell in enumerate(nb.cells): - if cell.cell_type == "code" and looks_like_header(cell.source): - header_index = idx - break - - # Build final header - if header_index is None: - final_version = resolve_version(None, hook_version) - header_source = build_header(repo_name, final_version) - nb.cells.insert(2, nbformat.v4.new_code_cell(header_source)) - nbformat.write(nb, notebook_path) - return True - - header_cell = nb.cells[header_index] - examples_version, main_version = extract_versions(header_cell.source, repo_name) - - if examples_version is None or main_version is None: - raise ValueError("Colab header is malformed") - - if examples_version != main_version: - raise ValueError( - f"Version mismatch in header: {examples_version!r} != {main_version!r}" - ) - - final_version = resolve_version(main_version, hook_version) - header_source = build_header(repo_name, final_version) - - modified = False - - if header_cell.source != header_source: - if not fix: - raise ValueError("Colab header is incorrect") - header_cell.source = header_source - modified = True - - if header_index != 2: - nb.cells.insert(2, nb.cells.pop(header_index)) - modified = True - - if modified: - nbformat.write(nb, notebook_path) - - return modified - - -# ------------------------------------------------------------------- -# Badge checks -# ------------------------------------------------------------------- +from .colab_header import check_colab_header def _preview_badge_markdown(absolute_path, repo_name): From 02fdfee7f38986c551ba3cb578c1c9b52bd7c01f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Tue, 27 Jan 2026 15:55:01 +0100 Subject: [PATCH 04/16] rerun notebook --- test_files/template.ipynb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test_files/template.ipynb b/test_files/template.ipynb index 5b7ef4e..1d40fbe 100644 --- a/test_files/template.ipynb +++ b/test_files/template.ipynb @@ -22,18 +22,18 @@ "id": "72ccd23c0ab9f08e", "metadata": { "ExecuteTime": { - "end_time": "2024-10-26T12:29:32.925592Z", - "start_time": "2024-10-26T12:29:32.919920Z" + "end_time": "2026-01-27T14:51:07.477258Z", + "start_time": "2026-01-27T14:51:07.473160Z" } }, "outputs": [], "source": [ "import os, sys\n", - "os.environ['NUMBA_THREADING_LAYER'] = 'workqueue'\n", + "os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' # PySDM & PyMPDATA don't work with TBB; OpenMP has extra dependencies on macOS\n", "if 'google.colab' in sys.modules:\n", " !pip --quiet install open-atmos-jupyter-utils\n", " from open_atmos_jupyter_utils import pip_install_on_colab\n", - " pip_install_on_colab('devops_tests-examples', 'devops_tests')\n" + " pip_install_on_colab('devops_tests-examples', 'devops_tests')" ] } ], From f8cb79717ea65998267cb7dfaea3808aad5721bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Tue, 27 Jan 2026 16:23:13 +0100 Subject: [PATCH 05/16] rename check --- .pre-commit-config.yaml | 6 +-- .pre-commit-hooks.yaml | 8 ++-- ...=> check_notebook_open_atmos_structure.py} | 2 +- hooks/colab_header.py | 41 ++++++++----------- pyproject.toml | 2 +- 5 files changed, 27 insertions(+), 32 deletions(-) rename hooks/{check_badges.py => check_notebook_open_atmos_structure.py} (97%) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ba10bf0..a65d07a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -28,9 +28,9 @@ repos: language: python types: [jupyter] - - id: check-badges - name: check badges - entry: check_badges + - id: check-notebook-open-atmos-structure + name: check notebook has open-atmos structure + entry: check_notebook_open_atmos_structure additional_dependencies: - nbformat - pytest diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 922a4c2..6695bde 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -6,10 +6,10 @@ stages: [pre-commit] types: [jupyter] -- id: check-badges - name: check badges - description: check badges in Jupyter Notebook - entry: check_badges +- id: check-notebook-open-atmos-structure + name: check notebook has open-atmos structure + entry: check_notebook_open_atmos_structure + description: check notebook has open-atmos structure language: python stages: [pre-commit] types: [jupyter] diff --git a/hooks/check_badges.py b/hooks/check_notebook_open_atmos_structure.py similarity index 97% rename from hooks/check_badges.py rename to hooks/check_notebook_open_atmos_structure.py index b714620..965edd7 100755 --- a/hooks/check_badges.py +++ b/hooks/check_notebook_open_atmos_structure.py @@ -1,4 +1,4 @@ -# notebook_checks.py +# pre-commit hook checking if badges in first cell match pattern used in open-atmos Jupyter Notebooks from __future__ import annotations import argparse diff --git a/hooks/colab_header.py b/hooks/colab_header.py index 79e9263..1206b31 100644 --- a/hooks/colab_header.py +++ b/hooks/colab_header.py @@ -1,4 +1,4 @@ -# colab_header.py +# Extract version from existing header and check if header is correct from __future__ import annotations import re @@ -10,12 +10,6 @@ r"['\"](?P
[^'\"]+)['\"]\s*\)" ) -HEADER_KEY_PATTERNS = [ - "install open-atmos-jupyter-utils", - "google.colab", - "pip_install_on_colab", -] - def extract_versions(cell_source: str, repo_name: str): """ @@ -23,18 +17,17 @@ def extract_versions(cell_source: str, repo_name: str): Returns: (examples_version, main_version) or (None, None) if invalid. """ - m = _PIP_INSTALL_RE.search(cell_source) - if not m: + text_found = _PIP_INSTALL_RE.search(cell_source) + if not text_found: return None, None - examples_pkg = m.group("examples") - main_pkg = m.group("main") + examples_pkg = text_found.group("examples") + main_pkg = text_found.group("main") - if not examples_pkg.startswith(f"{repo_name}-examples") or not main_pkg.startswith( - repo_name + if not main_pkg.startswith(repo_name) or not examples_pkg.startswith( + f"{repo_name}-examples" ): return None, None - return examples_pkg[len(f"{repo_name}-examples") :], main_pkg[len(repo_name) :] @@ -61,8 +54,15 @@ def build_header(repo_name: str, version: str) -> str: pip_install_on_colab('{repo_name}-examples{version}', '{repo_name}{version}')""" +HEADER_REQUIRED_PATTERNS = [ + "google.colab", + "open-atmos-jupyter-utils", + "pip_install_on_colab", +] + + def looks_like_header(cell_source: str) -> bool: - return all(pat in cell_source for pat in HEADER_KEY_PATTERNS) + return all(pat in cell_source for pat in HEADER_REQUIRED_PATTERNS) def check_colab_header(notebook_path, repo_name, fix, hook_version): @@ -71,14 +71,12 @@ def check_colab_header(notebook_path, repo_name, fix, hook_version): if len(nb.cells) < 3: raise ValueError("Notebook should have at least 3 cells") - # Find existing header if present header_index = None for idx, cell in enumerate(nb.cells): if cell.cell_type == "code" and looks_like_header(cell.source): header_index = idx break - # If header doesn't exist, create it if header_index is None: final_version = resolve_version(None, hook_version) header_source = build_header(repo_name, final_version) @@ -86,7 +84,6 @@ def check_colab_header(notebook_path, repo_name, fix, hook_version): nbformat.write(nb, notebook_path) return True - # If header exists, validate and optionally fix header_cell = nb.cells[header_index] examples_version, main_version = extract_versions(header_cell.source, repo_name) @@ -99,14 +96,13 @@ def check_colab_header(notebook_path, repo_name, fix, hook_version): ) final_version = resolve_version(main_version, hook_version) - header_source = build_header(repo_name, final_version) + correct_header = build_header(repo_name, final_version) modified = False - - if header_cell.source != header_source: + if header_cell.source != correct_header: if not fix: raise ValueError("Colab header is incorrect") - header_cell.source = header_source + header_cell.source = correct_header modified = True if header_index != 2: @@ -115,5 +111,4 @@ def check_colab_header(notebook_path, repo_name, fix, hook_version): if modified: nbformat.write(nb, notebook_path) - return modified diff --git a/pyproject.toml b/pyproject.toml index e3dfb25..3d1851a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,4 +28,4 @@ dynamic = ['version'] [project.scripts] check_notebooks = "hooks.check_notebooks:main" -check_badges = "hooks.check_badges:main" +check_badges = "hooks.check_notebook_open_atmos_structure:main" From 59b3e8cb95405ca1cd5bdbd1f7d7790ececba375 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Tue, 27 Jan 2026 16:38:36 +0100 Subject: [PATCH 06/16] change name and entry --- .pre-commit-config.yaml | 2 +- hooks/check_notebook_open_atmos_structure.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a65d07a..a1e613f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,7 +30,7 @@ repos: - id: check-notebook-open-atmos-structure name: check notebook has open-atmos structure - entry: check_notebook_open_atmos_structure + entry: python -m hooks.check_notebook_open_atmos_structure additional_dependencies: - nbformat - pytest diff --git a/hooks/check_notebook_open_atmos_structure.py b/hooks/check_notebook_open_atmos_structure.py index 965edd7..9f4f28b 100755 --- a/hooks/check_notebook_open_atmos_structure.py +++ b/hooks/check_notebook_open_atmos_structure.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + # pre-commit hook checking if badges in first cell match pattern used in open-atmos Jupyter Notebooks from __future__ import annotations From 83ed0231180dd1370026c07cdab004ad4079acdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Tue, 27 Jan 2026 16:57:50 +0100 Subject: [PATCH 07/16] add missing docstrings --- hooks/check_notebook_open_atmos_structure.py | 16 +++++++++++++--- ...olab_header.py => open_atmos_colab_header.py} | 6 +++++- 2 files changed, 18 insertions(+), 4 deletions(-) rename hooks/{colab_header.py => open_atmos_colab_header.py} (93%) diff --git a/hooks/check_notebook_open_atmos_structure.py b/hooks/check_notebook_open_atmos_structure.py index 9f4f28b..0d465ae 100755 --- a/hooks/check_notebook_open_atmos_structure.py +++ b/hooks/check_notebook_open_atmos_structure.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 +"""pre-commit hook checking if badges in first cell +match pattern used in open-atmos Jupyter Notebooks""" -# pre-commit hook checking if badges in first cell match pattern used in open-atmos Jupyter Notebooks from __future__ import annotations import argparse @@ -8,10 +9,11 @@ import nbformat -from .colab_header import check_colab_header +from .open_atmos_colab_header import check_colab_header def _preview_badge_markdown(absolute_path, repo_name): + """Markdown preview badge structure used in open-atmos notebooks""" svg_badge_url = ( "https://img.shields.io/static/v1?" + "label=render%20on&logo=github&color=87ce3e&message=GitHub" @@ -21,6 +23,7 @@ def _preview_badge_markdown(absolute_path, repo_name): def _mybinder_badge_markdown(absolute_path, repo_name): + """mybinder badge structure used in open-atmos notebooks""" svg_badge_url = "https://mybinder.org/badge_logo.svg" link = ( f"https://mybinder.org/v2/gh/open-atmos/{repo_name}.git/main?urlpath=lab/tree/" @@ -30,6 +33,7 @@ def _mybinder_badge_markdown(absolute_path, repo_name): def _colab_badge_markdown(absolute_path, repo_name): + """colab badge structure used in open-atmos notebooks""" svg_badge_url = "https://colab.research.google.com/assets/colab-badge.svg" link = ( f"https://colab.research.google.com/github/open-atmos/{repo_name}/blob/main/" @@ -39,13 +43,15 @@ def _colab_badge_markdown(absolute_path, repo_name): def test_notebook_has_at_least_three_cells(notebook_filename): + """check if notebook has enough cells to have all required ones""" with open(notebook_filename, encoding="utf8") as fp: nb = nbformat.read(fp, nbformat.NO_CONVERT) if len(nb.cells) < 3: - raise ValueError("Notebook should have at least 4 cells") + raise ValueError("Notebook should have at least 3 cells") def test_first_cell_contains_three_badges(notebook_filename, repo_name): + """check if badges are in the first cell and match patterns""" with open(notebook_filename, encoding="utf8") as fp: nb = nbformat.read(fp, nbformat.NO_CONVERT) @@ -65,6 +71,8 @@ def test_first_cell_contains_three_badges(notebook_filename, repo_name): def test_second_cell_is_a_markdown_cell(notebook_filename): + """Test if second cell is a markdown cell + it should contain description for the notebook""" with open(notebook_filename, encoding="utf8") as fp: nb = nbformat.read(fp, nbformat.NO_CONVERT) if nb.cells[1].cell_type != "markdown": @@ -72,6 +80,7 @@ def test_second_cell_is_a_markdown_cell(notebook_filename): def print_hook_summary(reformatted_files, unchanged_files): + """Summary for the whole hook""" for f in reformatted_files: print(f"\nreformatted {f}") @@ -86,6 +95,7 @@ def print_hook_summary(reformatted_files, unchanged_files): def main(argv: Sequence[str] | None = None) -> int: + """collect arguments and run hook""" parser = argparse.ArgumentParser() parser.add_argument("--repo-name") parser.add_argument("--fix-header", action="store_true") diff --git a/hooks/colab_header.py b/hooks/open_atmos_colab_header.py similarity index 93% rename from hooks/colab_header.py rename to hooks/open_atmos_colab_header.py index 1206b31..d863fd5 100644 --- a/hooks/colab_header.py +++ b/hooks/open_atmos_colab_header.py @@ -1,4 +1,5 @@ -# Extract version from existing header and check if header is correct +"""Extract version from existing header and check if header is correct""" + from __future__ import annotations import re @@ -46,6 +47,7 @@ def resolve_version(existing: str | None, hook_version: str | None) -> str: def build_header(repo_name: str, version: str) -> str: + """required header pattern in open-atmos notebooks""" return f"""import os, sys os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' # PySDM & PyMPDATA don't work with TBB; OpenMP has extra dependencies on macOS if 'google.colab' in sys.modules: @@ -62,10 +64,12 @@ def build_header(repo_name: str, version: str) -> str: def looks_like_header(cell_source: str) -> bool: + """check if the cell source looks like required header""" return all(pat in cell_source for pat in HEADER_REQUIRED_PATTERNS) def check_colab_header(notebook_path, repo_name, fix, hook_version): + """check if colab header is correct""" nb = nbformat.read(notebook_path, as_version=nbformat.NO_CONVERT) if len(nb.cells) < 3: From ceb3e539f4946433d435a4d789b5329389ef0092 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Sun, 8 Feb 2026 00:38:25 +0100 Subject: [PATCH 08/16] tests updates, cleanup, remove duplicated logic or unused parameters --- .github/workflows/checks.yml | 7 +- .pre-commit-config.yaml | 1 + hooks/check_notebook_open_atmos_structure.py | 159 ++++++++++--------- hooks/open_atmos_colab_header.py | 5 +- tests/examples/bad.ipynb | 61 +++++++ 5 files changed, 147 insertions(+), 86 deletions(-) create mode 100644 tests/examples/bad.ipynb diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 84d0e17..9a1bf0b 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -20,9 +20,10 @@ jobs: steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v5 - - run: pip install . - - run: pip install pylint - - run: pylint --unsafe-load-any-extension=y --disable=fixme $(git ls-files '*.py') + - run: | + pip install . + pip install pylint + pylint --unsafe-load-any-extension=y --disable=fixme $(git ls-files '*.py') precommit: runs-on: ubuntu-latest diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 81b8044..c49176f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,6 +17,7 @@ repos: rev: 2.1.1 hooks: - id: enforce-notebook-run-order + exclude: tests/examples/bad.ipynb - repo: local hooks: diff --git a/hooks/check_notebook_open_atmos_structure.py b/hooks/check_notebook_open_atmos_structure.py index 7cc398c..22d2cba 100755 --- a/hooks/check_notebook_open_atmos_structure.py +++ b/hooks/check_notebook_open_atmos_structure.py @@ -1,22 +1,6 @@ #!/usr/bin/env python3 -# pylint: disable=missing-function-docstring """ -Checks/repairs notebook badge headers. - -This version optionally uses Git to discover the repository root (to build -repo-relative notebook URLs) and uses Python's logging module instead of print() -for structured messages. - -Behavior: -- By default it will attempt to detect the git repo root (if GitPython is - installed and the file is in a git working tree) and fall back to Path.cwd(). -- Use --no-git to force using Path.cwd(). -- Use --repo-root PATH to explicitly set repository root. -- Use --verbose to enable debug logging. - -Usage: - check_badges --repo-name=devops_tests [--repo-owner=open-atmos] [--fix-header] - [--no-git] [--repo-root PATH] [--verbose] FILES... +Checks notebooks structure required in open-atmos projects. """ from __future__ import annotations @@ -30,6 +14,7 @@ import nbformat from nbformat import NotebookNode +from .open_atmos_colab_header import check_colab_header from .utils import NotebookTestError REPO_OWNER_DEFAULT = "open-atmos" @@ -38,8 +23,38 @@ logger = logging.getLogger(__name__) +def resolve_repo_root( + start_path: Path, + *, + explicit_root: Path | None, + prefer_git: bool, +) -> Path: + """Resolve the repository root for the given path.""" + if explicit_root is not None: + return explicit_root + if prefer_git: + try: + # Import locally so the module doesn't hard-depend on GitPython at import time + from git import Repo # pylint: disable=import-outside-toplevel + + try: + repo = Repo(start_path, search_parent_directories=True) + if repo.working_tree_dir: + root = Path(repo.working_tree_dir) + logger.debug("Discovered git repository root: %s", root) + return root + except Exception as exc: # pylint: disable=broad-exception-caught + logger.debug("Git repo detection failed for %s: %s", start_path, exc) + except ImportError as exc: + logger.debug("GitPython not available or import failed: %s", exc) + + cwd = Path.cwd() + logger.debug("Using current working directory as repo root: %s", cwd) + return cwd + + def relative_path(absolute_path, repo_root): - """returns a path relative to the repo base (converting backslashes to slashes on Windows)""" + """Return the path relative to the repository root.""" absolute_path = Path(absolute_path).resolve() repo_root = Path(repo_root).resolve() @@ -54,6 +69,7 @@ def relative_path(absolute_path, repo_root): def preview_badge_markdown(relpath: str, repo_name: str, repo_owner: str) -> str: + """Create markdown for the GitHub-preview badge.""" svg_badge_url = ( "https://img.shields.io/static/v1?" + "label=render%20on&logo=github&color=87ce3e&message=GitHub" @@ -63,6 +79,7 @@ def preview_badge_markdown(relpath: str, repo_name: str, repo_owner: str) -> str def mybinder_badge_markdown(relpath: str, repo_name: str, repo_owner: str) -> str: + """Create markdown for the Binder badge.""" svg_badge_url = "https://mybinder.org/badge_logo.svg" link = ( f"https://mybinder.org/v2/gh/{repo_owner}/{repo_name}.git/main?urlpath=lab/tree/" @@ -72,6 +89,7 @@ def mybinder_badge_markdown(relpath: str, repo_name: str, repo_owner: str) -> st def colab_badge_markdown(relpath: str, repo_name: str, repo_owner: str) -> str: + """Create markdown for the Colab badge.""" svg_badge_url = "https://colab.research.google.com/assets/colab-badge.svg" link = ( f"https://colab.research.google.com/github/{repo_owner}/{repo_name}/blob/main/" @@ -80,65 +98,34 @@ def colab_badge_markdown(relpath: str, repo_name: str, repo_owner: str) -> str: return f"[![launch on Colab]({svg_badge_url})]({link})" -def find_repo_root(start_path: Path, prefer_git: bool = True) -> Path: - """ - Find repository root for the given start_path. - - If prefer_git is True, attempt to use GitPython to locate the repository root - (searching parent directories). If that fails, fall back to cwd(). - """ - if prefer_git: - try: - # Import locally so the module doesn't hard-depend on GitPython at import time - from git import Repo # pylint: disable=import-outside-toplevel - - try: - repo = Repo(start_path, search_parent_directories=True) - if repo.working_tree_dir: - root = Path(repo.working_tree_dir) - logger.debug("Discovered git repository root: %s", root) - return root - except Exception as exc: # pylint: disable=broad-exception-caught - logger.debug("Git repo detection failed for %s: %s", start_path, exc) - except ImportError as exc: - logger.debug("GitPython not available or import failed: %s", exc) - - cwd = Path.cwd() - logger.debug("Using current working directory as repo root: %s", cwd) - return cwd - - def expected_badges_for( notebook_path: Path, repo_name: str, repo_owner: str, - repo_root: Optional[Path] = None, + repo_root: Optional[Path], ) -> List[str]: """ Return the canonical badge lines expected for notebook_path. If repo_root is provided, attempt to build a relative path from it; otherwise find repository root automatically (using find_repo_root). """ - if repo_root is None: - repo_root = find_repo_root(notebook_path) - - if repo_root is None: - raise ValueError("Could not determine repo root") - relpath = relative_path(notebook_path, repo_root) + args = (relpath, repo_name, repo_owner) return [ - preview_badge_markdown(relpath, repo_name, repo_owner), - mybinder_badge_markdown(relpath, repo_name, repo_owner), - colab_badge_markdown(relpath, repo_name, repo_owner), + preview_badge_markdown(*args), + mybinder_badge_markdown(*args), + colab_badge_markdown(*args), ] def read_notebook(path: Path) -> NotebookNode: + """Read a Jupyter notebook without format conversion.""" with path.open(encoding="utf8") as fp: return nbformat.read(fp, nbformat.NO_CONVERT) def write_notebook(path: Path, nb: NotebookNode) -> None: + """Write a Jupyter notebook to disk.""" with path.open("w", encoding="utf8") as fp: nbformat.write(nb, fp) @@ -209,53 +196,67 @@ def test_second_cell_is_a_markdown_cell(notebook_filename: str) -> None: raise ValueError("Second cell is not a markdown cell") -def main(argv: Sequence[str] | None = None) -> int: - parser = argparse.ArgumentParser() - parser.add_argument("--repo-name", required=True) - parser.add_argument("--repo-owner", default=REPO_OWNER_DEFAULT) - parser.add_argument( +def build_parser() -> argparse.ArgumentParser: + """Build parser for command line arguments.""" + p = argparse.ArgumentParser() + p.add_argument("--repo-name", required=True) + p.add_argument("--repo-owner", default=REPO_OWNER_DEFAULT) + p.add_argument( "--fix-header", action="store_true", help="If set, attempt to fix notebooks missing the header.", ) - parser.add_argument( + p.add_argument( "--no-git", action="store_true", help="Do not attempt to detect git repo root; use cwd()", ) - parser.add_argument( + p.add_argument( "--repo-root", help="Explicit repository root to use when building URLs" ) - parser.add_argument("--verbose", action="store_true", help="Enable debug logging") - parser.add_argument("filenames", nargs="*", help="Filenames to check.") - args = parser.parse_args(argv) + p.add_argument("--verbose", action="store_true", help="Enable debug logging") + p.add_argument("filenames", nargs="*", help="Filenames to check.") + return p - # configure logging - level = logging.DEBUG if args.verbose else logging.INFO + +def configure_logging(verbose: bool) -> None: + """Configure logging with --verbose flag""" + level = logging.DEBUG if verbose else logging.INFO logging.basicConfig(level=level, format="%(levelname)s: %(message)s") + +def main(argv: Sequence[str] | None = None) -> int: + """Test notebook structure: + - first cell with 3 correct badges, + - second cell is of type markdown (best if with notebook description), + - third cell is a Colab magick cell. + """ + args = build_parser().parse_args(argv) + configure_logging(args.verbose) + explicit_repo_root = Path(args.repo_root) if args.repo_root else None prefer_git = not args.no_git - repo_root_path: Optional[Path] = Path(args.repo_root) if args.repo_root else None - retval = 0 + + failed = False for filename in args.filenames: path = Path(filename) try: - effective_repo_root = repo_root_path or ( - find_repo_root(path, prefer_git) if prefer_git else Path.cwd() + repo_root = resolve_repo_root( + path, + explicit_root=explicit_repo_root, + prefer_git=prefer_git, ) - test_notebook_has_at_least_three_cells(filename) test_first_cell_contains_three_badges( - filename, args.repo_name, args.repo_owner, effective_repo_root + filename, args.repo_name, args.repo_owner, repo_root ) test_second_cell_is_a_markdown_cell(filename) + check_colab_header(path, args.repo_name, args.fix_header, "") + logger.info("%s: OK", path) - logger.info("%s: OK", filename) - retval = retval or 0 except NotebookTestError as exc: - logger.error("%s: %s", filename, exc) - retval = 1 - return retval + logger.error("%s: %s", path, exc) + failed = True + return int(failed) if __name__ == "__main__": diff --git a/hooks/open_atmos_colab_header.py b/hooks/open_atmos_colab_header.py index d863fd5..c77923f 100644 --- a/hooks/open_atmos_colab_header.py +++ b/hooks/open_atmos_colab_header.py @@ -71,11 +71,8 @@ def looks_like_header(cell_source: str) -> bool: def check_colab_header(notebook_path, repo_name, fix, hook_version): """check if colab header is correct""" nb = nbformat.read(notebook_path, as_version=nbformat.NO_CONVERT) - - if len(nb.cells) < 3: - raise ValueError("Notebook should have at least 3 cells") - header_index = None + for idx, cell in enumerate(nb.cells): if cell.cell_type == "code" and looks_like_header(cell.source): header_index = idx diff --git a/tests/examples/bad.ipynb b/tests/examples/bad.ipynb new file mode 100644 index 0000000..d7a3b3e --- /dev/null +++ b/tests/examples/bad.ipynb @@ -0,0 +1,61 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is a deliberately failing notebook for tests: it contains two failing patterns\n", + "- a code cell missing the execution_count key (should trigger `Cell does not contain output!`)\n", + "- a stderr output (should trigger `test_no_errors_or_warnings_in_output` if reached)\n" + ], + "id": "4b5a02d6cfbf77e5" + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "print('this cell lacks the execution_count key')" + ], + "id": "e754fd89bde86cde", + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2026-02-06T20:09:40.737153Z", + "start_time": "2026-02-06T20:09:40.734389Z" + } + }, + "source": [ + "import sys\n", + "print('error to stderr', file=sys.stderr)" + ], + "id": "2d54502ea03151e", + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "error to stderr\n" + ] + } + ], + "execution_count": 2 + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 31ac8be9c725281131e5ac9d783116276d049837 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Sun, 8 Feb 2026 00:59:59 +0100 Subject: [PATCH 09/16] reformat hook name --- ...es.py => test_check_notebook_open_atmos_structure.py} | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) rename tests/{test_check_badges_examples.py => test_check_notebook_open_atmos_structure.py} (90%) diff --git a/tests/test_check_badges_examples.py b/tests/test_check_notebook_open_atmos_structure.py similarity index 90% rename from tests/test_check_badges_examples.py rename to tests/test_check_notebook_open_atmos_structure.py index 494f41a..ef3c8f1 100644 --- a/tests/test_check_badges_examples.py +++ b/tests/test_check_notebook_open_atmos_structure.py @@ -1,17 +1,18 @@ # pylint: disable=missing-function-docstring """ -Unit tests for hooks/check_badges.py: good and bad notebook examples. +Unit tests for hooks/check_notebook_open_atmos_structure.py: good and bad notebook examples. These tests write small notebooks to temporary files and call the badge-check functions that expect filenames. """ +import pathlib import nbformat from nbformat.v4 import new_notebook, new_markdown_cell, new_code_cell import pytest -from hooks import check_badges as cb +from hooks import check_notebook_open_atmos_structure as cb def _write_nb_and_return_path(tmp_path, nb, name="nb.ipynb"): @@ -70,7 +71,9 @@ def test_first_cell_bad_badges_raises(tmp_path): ) path = _write_nb_and_return_path(tmp_path, nb, name="badbadges.ipynb") with pytest.raises(ValueError): - cb.test_first_cell_contains_three_badges(path, "devops_tests") + cb.test_first_cell_contains_three_badges( + path, "devops_tests", repo_root=pathlib.Path("") + ) def test_second_cell_not_markdown_raises(tmp_path): From 9f8e04a8d109df4bec58f7bf464147ec1277ce54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Sun, 8 Feb 2026 01:06:24 +0100 Subject: [PATCH 10/16] reformat hook name in pyproject.toml --- pyproject.toml | 2 +- tests/test_run_hooks_on_examples.py | 50 ----------------------------- 2 files changed, 1 insertion(+), 51 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7fea13a..d25de5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,6 @@ dynamic = ['version'] [project.scripts] check_notebooks = "hooks.check_notebooks:main" -check_badges = "hooks.check_badges:main" +check_notebook_open_atmos_structure = "hooks.check_notebook_open_atmos_structure:main" notebooks_output = "hooks.notebooks_output:main" notebooks_using_jupyter_utils = "hooks.notebooks_using_jupyter_utils:main" diff --git a/tests/test_run_hooks_on_examples.py b/tests/test_run_hooks_on_examples.py index 6372c74..82cefe1 100644 --- a/tests/test_run_hooks_on_examples.py +++ b/tests/test_run_hooks_on_examples.py @@ -1,17 +1,5 @@ # pylint: disable=missing-function-docstring -""" -Run the hooks on good and bad example notebooks and assert behavior + logs. - -This test executes the hook modules the same way pre-commit would: - python -m hooks.check_badges --repo-name=... PATH - python -m hooks.check_notebooks PATH - -It captures stdout/stderr (which is where logging.basicConfig writes), asserts -expected exit codes, and checks stderr for expected failure substrings. - -Adjust the candidate paths in _find_example() if your examples are stored elsewhere. -""" from __future__ import annotations @@ -42,44 +30,6 @@ def _run_module(module: str, args: list[str]) -> subprocess.CompletedProcess: return subprocess.run(cmd, capture_output=True, text=True, check=False) -@pytest.mark.parametrize( - "name, expected_badge_exit, expected_badge_msg", - [ - ("good.ipynb", 0, ""), - ("bad.ipynb", 1, "Missing badges"), - ], -) -def test_check_badges_on_examples( - name: str, expected_badge_exit: int, expected_badge_msg: str -): - nb_path = _find_example(name) - if nb_path is None: - pytest.skip(f"No example notebook found for {name};") - - res = _run_module("hooks.check_badges", ["--repo-name=devops_tests", str(nb_path)]) - combined_msgs = (res.stderr or "") + "\n" + (res.stdout or "") - - # Check exit code first - if expected_badge_exit == 0: - assert ( - res.returncode == 0 - ), f"Expected success; stderr:\n{res.stderr}\nstdout:\n{res.stdout}" - # For success, ensure no obvious error strings are present - assert "Missing badges" not in combined_msgs - assert "ERROR" not in combined_msgs - assert "Traceback" not in combined_msgs - else: - assert ( - res.returncode != 0 - ), f"Expected failure; got exit {res.returncode}\nstdout:\n{res.stdout}" - - assert expected_badge_msg in combined_msgs, ( - f"Expected to find {expected_badge_msg!r} in output" - f"\n---STDERR---\n{res.stderr}" - f"\n---STDOUT---\n{res.stdout}" - ) - - @pytest.mark.parametrize( "name, should_fail, expected_msg_substr", [ From 6260ff80f0fb46f72de26dcbe173ce5564f236de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Sun, 15 Feb 2026 16:20:17 +0100 Subject: [PATCH 11/16] update docstrings --- hooks/check_notebook_open_atmos_structure.py | 4 ++++ tests/test_run_hooks_on_examples.py | 9 ++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/hooks/check_notebook_open_atmos_structure.py b/hooks/check_notebook_open_atmos_structure.py index 22d2cba..bed268f 100755 --- a/hooks/check_notebook_open_atmos_structure.py +++ b/hooks/check_notebook_open_atmos_structure.py @@ -1,6 +1,10 @@ #!/usr/bin/env python3 """ Checks notebooks structure required in open-atmos projects. +Requirements: +- first cell contains three correct badges +- second cell is of type markdown +- third cell is Colab magick cell """ from __future__ import annotations diff --git a/tests/test_run_hooks_on_examples.py b/tests/test_run_hooks_on_examples.py index 82cefe1..199eca3 100644 --- a/tests/test_run_hooks_on_examples.py +++ b/tests/test_run_hooks_on_examples.py @@ -1,5 +1,12 @@ # pylint: disable=missing-function-docstring - +""" +Unit test for checking correct error message +when running hooks on example files (good.ipynb, bad.ipynb). + +Test for local pre-commit hooks: +- notebook_output, +- check_notebooks. +""" from __future__ import annotations From c66ba0553d95838cc3fb28dbb64a242be475e610 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Tue, 17 Feb 2026 14:00:35 +0100 Subject: [PATCH 12/16] remove wrong ValueError when missing repo-examples or repo in header; verbose error for wrong header --- hooks/open_atmos_colab_header.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hooks/open_atmos_colab_header.py b/hooks/open_atmos_colab_header.py index c77923f..26c6326 100644 --- a/hooks/open_atmos_colab_header.py +++ b/hooks/open_atmos_colab_header.py @@ -29,6 +29,7 @@ def extract_versions(cell_source: str, repo_name: str): f"{repo_name}-examples" ): return None, None + print(examples_pkg, main_pkg) return examples_pkg[len(f"{repo_name}-examples") :], main_pkg[len(repo_name) :] @@ -88,9 +89,6 @@ def check_colab_header(notebook_path, repo_name, fix, hook_version): header_cell = nb.cells[header_index] examples_version, main_version = extract_versions(header_cell.source, repo_name) - if examples_version is None or main_version is None: - raise ValueError("Colab header is malformed") - if examples_version != main_version: raise ValueError( f"Version mismatch in header: {examples_version!r} != {main_version!r}" @@ -102,7 +100,9 @@ def check_colab_header(notebook_path, repo_name, fix, hook_version): modified = False if header_cell.source != correct_header: if not fix: - raise ValueError("Colab header is incorrect") + raise ValueError( + f"Incorrect Colab header.\n Expected header:\n---\n{correct_header}\n---" + ) header_cell.source = correct_header modified = True From b2f0a463b946d4d229e2e5edf2bed0c0f4c566d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Tue, 17 Feb 2026 14:03:36 +0100 Subject: [PATCH 13/16] return notebook path in error messages --- hooks/open_atmos_colab_header.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hooks/open_atmos_colab_header.py b/hooks/open_atmos_colab_header.py index 26c6326..af0f53f 100644 --- a/hooks/open_atmos_colab_header.py +++ b/hooks/open_atmos_colab_header.py @@ -91,7 +91,7 @@ def check_colab_header(notebook_path, repo_name, fix, hook_version): if examples_version != main_version: raise ValueError( - f"Version mismatch in header: {examples_version!r} != {main_version!r}" + f"{notebook_path}\nVersion mismatch in header: {examples_version!r} != {main_version!r}" ) final_version = resolve_version(main_version, hook_version) @@ -101,7 +101,7 @@ def check_colab_header(notebook_path, repo_name, fix, hook_version): if header_cell.source != correct_header: if not fix: raise ValueError( - f"Incorrect Colab header.\n Expected header:\n---\n{correct_header}\n---" + f"{notebook_path}\nIncorrect Colab header.\n Expected header:\n---\n{correct_header}\n---" ) header_cell.source = correct_header modified = True From dc88fdee2a5a3e4bd1e6e8b2a1c7c7418ea5fbf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Sat, 21 Feb 2026 17:37:12 +0100 Subject: [PATCH 14/16] fix too long line --- hooks/open_atmos_colab_header.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hooks/open_atmos_colab_header.py b/hooks/open_atmos_colab_header.py index af0f53f..43a674f 100644 --- a/hooks/open_atmos_colab_header.py +++ b/hooks/open_atmos_colab_header.py @@ -101,7 +101,9 @@ def check_colab_header(notebook_path, repo_name, fix, hook_version): if header_cell.source != correct_header: if not fix: raise ValueError( - f"{notebook_path}\nIncorrect Colab header.\n Expected header:\n---\n{correct_header}\n---" + f"{notebook_path}\n" + f"Incorrect Colab cell, expected header:\n" + f"---\n{correct_header}\n---" ) header_cell.source = correct_header modified = True From b409b47b1659112dfbf07a4b6cfd4f121aa84f01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Sun, 22 Feb 2026 13:02:45 +0100 Subject: [PATCH 15/16] start simple documentation for pre-commit hooks; start error codes formatting --- README.md | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/README.md b/README.md index 5a00378..7c797ae 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,42 @@ # devops_tests pytest test routines asserting for GitHub issue-linked TO-DO labelling in the code, README link consistency and some Jupyter notebook sanity checks + +## Pre-commit hooks: +### `check_notebook_open_atmos_structure` +Check Jupyter notebooks structure default for `open-atmos` projects. +Required: +- three badges in the first cell: + +[![preview notebook](https://img.shields.io/static/v1?label=render%20on&logo=github&color=87ce3e&message=GitHub)](https://github.com/open-atmos/devops_tests/blob/main/tests/examples/good.ipynb) +[![launch on mybinder.org](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/open-atmos/devops_tests.git/main?urlpath=lab/tree/tests/examples/good.ipynb) +[![launch on Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/open-atmos/devops_tests/blob/main/tests/examples/good.ipynb) + +- markdown type of the second cell +- Colab-header in the third cell +``` +import os, sys +os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' # PySDM & PyMPDATA don't work with TBB; OpenMP has extra dependencies on macOS +if 'google.colab' in sys.modules: + !pip --quiet install open-atmos-jupyter-utils + from open_atmos_jupyter_utils import pip_install_on_colab + pip_install_on_colab('devops_tests-examples', 'devops_tests') +``` + +## Error and warning labeling +### **0xx -> Execution problems** +- NB000 missing execution count +- NB001 empty execution +- NB002 cell contains error + +### **1xx -> Badges problems** +- NB100 wrong GitHub preview badge +- NB101 wrong mybinder badge +- NB102 wrong Colab badge + +### **2xx -> Markdown problems** +- NB200 markdown cell missing + +### **3xx -> Colab header problems** +- NB300 header missing +- NB301 version mismatch +- NB302 header in wrong position From 2c5166fc558a32633840b49648f8f1cbae5d077d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Sun, 22 Feb 2026 17:46:00 +0100 Subject: [PATCH 16/16] all check functions should yield error dformatted as in utils, remove all raise Value Error, introduce Error coding --- README.md | 68 ++++++-- hooks/check_notebook_open_atmos_structure.py | 166 +++++++++---------- hooks/check_notebooks.py | 25 ++- hooks/notebooks_output.py | 49 ++++-- hooks/notebooks_using_jupyter_utils.py | 36 ++-- hooks/open_atmos_colab_header.py | 38 +++-- hooks/utils.py | 92 +++++++--- 7 files changed, 310 insertions(+), 164 deletions(-) diff --git a/README.md b/README.md index 7c797ae..ebf7b00 100644 --- a/README.md +++ b/README.md @@ -22,21 +22,53 @@ if 'google.colab' in sys.modules: pip_install_on_colab('devops_tests-examples', 'devops_tests') ``` -## Error and warning labeling -### **0xx -> Execution problems** -- NB000 missing execution count -- NB001 empty execution -- NB002 cell contains error - -### **1xx -> Badges problems** -- NB100 wrong GitHub preview badge -- NB101 wrong mybinder badge -- NB102 wrong Colab badge - -### **2xx -> Markdown problems** -- NB200 markdown cell missing - -### **3xx -> Colab header problems** -- NB300 header missing -- NB301 version mismatch -- NB302 header in wrong position +## 0xx -> Execution Problems + +These errors relate to notebook execution state and structural integrity. + +- **NB000** – Missing execution count +- **NB001** – Empty execution +- **NB002** – Cell contains execution error +- **NB003** – Insufficient number of cells (minimum required not met) +- **NB004** – Invalid or missing required content in first cell + +--- + +## 1xx -> Badge Problems + +These errors relate to required badges in the first notebook cell. + +- **NB100** - Incorrect GitHub preview badge +- **NB101** – Incorrect MyBinder badge +- **NB102** – Incorrect Colab badge + +--- + +## 2xx -> Markdown Problems + +These errors relate to required markdown structure in the second notebook cell. + +- **NB200** – Required markdown cell missing +- **NB201** – Invalid markdown structure +- **NB202** – Required markdown section missing + +--- + +## 3xx -> Colab Header Problems + +These errors relate to the required Colab setup header. + +- **NB300** – Header missing +- **NB301** – Header version mismatch +- **NB302** – Invalid header content +- **NB303** – Header in incorrect position + +--- + +## 4xx -> `open-atmos-jupyter-utils` Problems + +These errors relate to the usage of `show_anim()` and `show_plot()` +functions from `open-atmos-jupyter-utils` package required when matplotlib is used. + +- **NB400** - show_plot not used +- **NB401** - show_anim not used diff --git a/hooks/check_notebook_open_atmos_structure.py b/hooks/check_notebook_open_atmos_structure.py index bed268f..6bbc8bc 100755 --- a/hooks/check_notebook_open_atmos_structure.py +++ b/hooks/check_notebook_open_atmos_structure.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Checks notebooks structure required in open-atmos projects. +Checks notebook structure required in open-atmos projects. Requirements: - first cell contains three correct badges - second cell is of type markdown @@ -11,19 +11,18 @@ import argparse import logging -from collections.abc import Sequence +from collections.abc import Sequence, Iterable from pathlib import Path -from typing import Iterable, List, Tuple, Optional +from typing import Optional, List, Tuple import nbformat from nbformat import NotebookNode +from .utils import cell_error, open_and_test_notebooks from .open_atmos_colab_header import check_colab_header -from .utils import NotebookTestError REPO_OWNER_DEFAULT = "open-atmos" - logger = logging.getLogger(__name__) @@ -48,6 +47,7 @@ def resolve_repo_root( logger.debug("Discovered git repository root: %s", root) return root except Exception as exc: # pylint: disable=broad-exception-caught + repo = None logger.debug("Git repo detection failed for %s: %s", start_path, exc) except ImportError as exc: logger.debug("GitPython not available or import failed: %s", exc) @@ -74,32 +74,32 @@ def relative_path(absolute_path, repo_root): def preview_badge_markdown(relpath: str, repo_name: str, repo_owner: str) -> str: """Create markdown for the GitHub-preview badge.""" - svg_badge_url = ( + url = ( "https://img.shields.io/static/v1?" + "label=render%20on&logo=github&color=87ce3e&message=GitHub" ) link = f"https://github.com/{repo_owner}/{repo_name}/blob/main/{relpath}" - return f"[![preview notebook]({svg_badge_url})]({link})" + return f"[![preview notebook]({url})]({link})" def mybinder_badge_markdown(relpath: str, repo_name: str, repo_owner: str) -> str: """Create markdown for the Binder badge.""" - svg_badge_url = "https://mybinder.org/badge_logo.svg" + url = "https://mybinder.org/badge_logo.svg" link = ( f"https://mybinder.org/v2/gh/{repo_owner}/{repo_name}.git/main?urlpath=lab/tree/" + f"{relpath}" ) - return f"[![launch on mybinder.org]({svg_badge_url})]({link})" + return f"[![launch on mybinder.org]({url})]({link})" def colab_badge_markdown(relpath: str, repo_name: str, repo_owner: str) -> str: """Create markdown for the Colab badge.""" - svg_badge_url = "https://colab.research.google.com/assets/colab-badge.svg" + url = "https://colab.research.google.com/assets/colab-badge.svg" link = ( f"https://colab.research.google.com/github/{repo_owner}/{repo_name}/blob/main/" + f"{relpath}" ) - return f"[![launch on Colab]({svg_badge_url})]({link})" + return f"[![launch on Colab]({url})]({link})" def expected_badges_for( @@ -123,64 +123,48 @@ def expected_badges_for( def read_notebook(path: Path) -> NotebookNode: - """Read a Jupyter notebook without format conversion.""" with path.open(encoding="utf8") as fp: return nbformat.read(fp, nbformat.NO_CONVERT) def write_notebook(path: Path, nb: NotebookNode) -> None: - """Write a Jupyter notebook to disk.""" with path.open("w", encoding="utf8") as fp: nbformat.write(nb, fp) def first_cell_lines(nb: NotebookNode) -> List[str]: - """Return list of stripped lines from the first cell if it's markdown, else []""" - if not nb.cells: - return [] - first = nb.cells[0] - if first.cell_type != "markdown": + if not nb.cells or nb.cells[0].cell_type != "markdown": return [] - return [ln.strip() for ln in str(first.source).splitlines() if ln.strip() != ""] + return [ln.strip() for ln in str(nb.cells[0].source).splitlines() if ln.strip()] def badges_match( actual_lines: Iterable[str], expected_lines: Iterable[str] ) -> Tuple[bool, str]: - """ - Check whether the expected badge lines are present in actual_lines. - Tolerant: ignores order, strips whitespace. - Returns (matches, message). Message empty on match else explains which badges missing. - """ actual_set = {ln.strip() for ln in actual_lines} - expected_list = list(expected_lines) - missing = [exp for exp in expected_list if exp.strip() not in actual_set] + missing = [exp for exp in expected_lines if exp.strip() not in actual_set] if not missing: return True, "" return False, f"Missing badges: {missing}" -def test_notebook_has_at_least_three_cells(notebook_filename: str) -> None: +def test_notebook_has_at_least_three_cells(notebook, filename) -> Iterable: """checks if all notebooks have at least three cells""" - nb = read_notebook(Path(notebook_filename)) - if len(nb.cells) < 3: - raise ValueError("Notebook should have at least 3 cells") + if len(notebook.cells) < 3: + yield cell_error( + filename, + 0, + code="NB003", + message="Insufficient number of cells (minimum required is 3).", + ) def test_first_cell_contains_three_badges( notebook_filename: str, repo_name: str, - repo_owner: str = REPO_OWNER_DEFAULT, - repo_root: Optional[Path] = None, -) -> None: - """ - checks if the notebook's first cell contains the three badges. - Raises ValueError on failure. - - The optional repo_root can be provided to control how the notebook path is - converted into the remote URL. If None, the module will attempt to detect - a git repo root and fall back to cwd(). - """ + repo_owner: str, + repo_root: Path, +) -> Iterable: nb = read_notebook(Path(notebook_filename)) lines = first_cell_lines(nb) expected = expected_badges_for( @@ -188,79 +172,95 @@ def test_first_cell_contains_three_badges( ) ok, msg = badges_match(lines, expected) if not ok: - raise ValueError(msg) - - -def test_second_cell_is_a_markdown_cell(notebook_filename: str) -> None: - """checks if all notebooks have their second cell as markdown""" - nb = read_notebook(Path(notebook_filename)) - if len(nb.cells) < 2: - raise ValueError("Notebook has no second cell") - if nb.cells[1].cell_type != "markdown": - raise ValueError("Second cell is not a markdown cell") + yield cell_error(notebook_filename, 0, code="NB004", message=msg) + + +def test_second_cell_is_a_markdown_cell(notebook, filename) -> Iterable: + if len(notebook.cells) < 2: + yield cell_error( + filename, 1, code="NB200", message="Notebook has no second cell." + ) + elif notebook.cells[1].cell_type != "markdown": + yield cell_error( + filename, + 1, + code="NB201", + message="Second cell is not a markdown cell", + ) + + +def test_colab_header( + notebook_filename: str, repo_name: str, fix_header: bool = False +) -> Iterable: + """Wrap check_colab_header to yield errors instead of raising.""" + try: + yield from check_colab_header( + Path(notebook_filename), repo_name, fix_header, "" + ) + except Exception as exc: + yield cell_error( + notebook_filename, 2, code="NB205", message=f"Colab header error: {exc}" + ) def build_parser() -> argparse.ArgumentParser: - """Build parser for command line arguments.""" p = argparse.ArgumentParser() p.add_argument("--repo-name", required=True) p.add_argument("--repo-owner", default=REPO_OWNER_DEFAULT) p.add_argument( "--fix-header", action="store_true", - help="If set, attempt to fix notebooks missing the header.", + help="Attempt to fix notebooks missing header", ) p.add_argument( - "--no-git", - action="store_true", - help="Do not attempt to detect git repo root; use cwd()", - ) - p.add_argument( - "--repo-root", help="Explicit repository root to use when building URLs" + "--no-git", action="store_true", help="Do not detect git repo root, use cwd()" ) + p.add_argument("--repo-root", help="Explicit repository root path") p.add_argument("--verbose", action="store_true", help="Enable debug logging") - p.add_argument("filenames", nargs="*", help="Filenames to check.") + p.add_argument("filenames", nargs="*", help="Notebooks to check") return p def configure_logging(verbose: bool) -> None: - """Configure logging with --verbose flag""" level = logging.DEBUG if verbose else logging.INFO logging.basicConfig(level=level, format="%(levelname)s: %(message)s") def main(argv: Sequence[str] | None = None) -> int: - """Test notebook structure: - - first cell with 3 correct badges, - - second cell is of type markdown (best if with notebook description), - - third cell is a Colab magick cell. - """ args = build_parser().parse_args(argv) configure_logging(args.verbose) + explicit_repo_root = Path(args.repo_root) if args.repo_root else None prefer_git = not args.no_git - failed = False - for filename in args.filenames: - path = Path(filename) - try: + repo_root = None + if explicit_repo_root or prefer_git: + if args.filenames: repo_root = resolve_repo_root( - path, + Path(args.filenames[0]), explicit_root=explicit_repo_root, prefer_git=prefer_git, ) - test_notebook_has_at_least_three_cells(filename) - test_first_cell_contains_three_badges( - filename, args.repo_name, args.repo_owner, repo_root - ) - test_second_cell_is_a_markdown_cell(filename) - check_colab_header(path, args.repo_name, args.fix_header, "") - logger.info("%s: OK", path) - - except NotebookTestError as exc: - logger.error("%s: %s", path, exc) - failed = True - return int(failed) + + def wrap_first_cell_badges(notebook, filename): + yield from test_first_cell_contains_three_badges( + filename, args.repo_name, args.repo_owner, repo_root + ) + + def wrap_colab_header(notebook, filename): + yield from test_colab_header(filename, args.repo_name, args.fix_header) + + test_functions = [ + test_notebook_has_at_least_three_cells, + wrap_first_cell_badges, + test_second_cell_is_a_markdown_cell, + wrap_colab_header, + ] + + return open_and_test_notebooks( + args.filenames, + test_functions, + ) if __name__ == "__main__": diff --git a/hooks/check_notebooks.py b/hooks/check_notebooks.py index 22078d1..d3a26df 100755 --- a/hooks/check_notebooks.py +++ b/hooks/check_notebooks.py @@ -4,26 +4,35 @@ from __future__ import annotations +import argparse from collections.abc import Sequence -from .utils import open_and_test_notebooks +from .utils import open_and_test_notebooks, cell_error -def test_jetbrains_bug_py_66491(notebook): +def test_jetbrains_bug_py_66491(notebook, filename): """checks if all notebook have the execution_count key for each cell in JSON, which is required by GitHub renderer and may not be generated by some buggy PyCharm versions: https://youtrack.jetbrains.com/issue/PY-66491""" - for cell in notebook.cells: + for idx, cell in enumerate(notebook.cells): if cell.cell_type == "code" and not hasattr(cell, "execution_count"): - raise ValueError( - "Notebook cell missing execution_count attribute. " - "(May be due to PyCharm bug, see: https://youtrack.jetbrains.com/issue/PY-66491 )" + yield cell_error( + filename, + idx, + code="NB000", + message="Notebook cell missing execution_count attribute. " + "(May be due to PyCharm bug, see: https://youtrack.jetbrains.com/issue/PY-66491 )", ) def main(argv: Sequence[str] | None = None) -> int: - """test all notebooks""" + """Test all notebooks""" + + parser = argparse.ArgumentParser() + parser.add_argument("filenames", nargs="*", help="Filenames to check.") + args = parser.parse_args(argv) + return open_and_test_notebooks( - argv=argv, + filenames=args.filenames, test_functions=[ test_jetbrains_bug_py_66491, ], diff --git a/hooks/notebooks_output.py b/hooks/notebooks_output.py index 20dcdec..bffdfdd 100755 --- a/hooks/notebooks_output.py +++ b/hooks/notebooks_output.py @@ -3,19 +3,25 @@ from __future__ import annotations +import argparse from collections.abc import Sequence -from .utils import open_and_test_notebooks +from .utils import open_and_test_notebooks, cell_error -def test_cell_contains_output(notebook): +def test_cell_contains_output(notebook, filename): """checks if all notebook cells have an output present""" for cell_idx, cell in enumerate(notebook.cells): if cell.cell_type == "code" and cell.source != "": if cell.execution_count is None: - raise ValueError(f"Cell {cell_idx} does not contain output") + yield cell_error( + filename, + cell_idx, + code="NB001", + message="Cell does not contain output", + ) -def test_no_errors_or_warnings_in_output(notebook): +def test_no_errors_or_warnings_in_output(notebook, filename): """checks if all example Jupyter notebooks have clear std-err output (i.e., no errors or warnings) visible; except acceptable diagnostics from the joblib package""" @@ -23,21 +29,36 @@ def test_no_errors_or_warnings_in_output(notebook): if cell.cell_type == "code": for output in cell.outputs: ot = output.get("output_type") - if ot == "error": - raise ValueError( - f"Cell [{cell_idx}] contain error or warning. \n\n" - f"Cell [{cell_idx}] output:\n{output}\n" + is_error = ot == "error" + is_stderr = ( + ot == "stream" + and output.get("name") == "stderr" + and (text := output.get("text")) + and not text.startswith("[Parallel(n_jobs=") + ) + + if is_error or is_stderr: + yield cell_error( + filename, + cell_idx, + code="NB002", + message=( + "Cell contains execution error or warning.\n\n" + f"Cell output:\n{output}\n" + ), ) - if ot == "stream" and output.get("name") == "stderr": - out_text = output.get("text") - if out_text and not out_text.startswith("[Parallel(n_jobs="): - raise ValueError(f" Cell [{cell_idx}]: {out_text}") def main(argv: Sequence[str] | None = None) -> int: - """test all notebooks""" + """Test all notebooks""" + parser = argparse.ArgumentParser() + parser.add_argument("filenames", nargs="*", help="Filenames to check.") + parser = argparse.ArgumentParser() + parser.add_argument("filenames", nargs="*", help="Filenames to check.") + args = parser.parse_args(argv) + return open_and_test_notebooks( - argv=argv, + filenames=args.filenames, test_functions=[ test_cell_contains_output, test_no_errors_or_warnings_in_output, diff --git a/hooks/notebooks_using_jupyter_utils.py b/hooks/notebooks_using_jupyter_utils.py index 8338443..eccf275 100755 --- a/hooks/notebooks_using_jupyter_utils.py +++ b/hooks/notebooks_using_jupyter_utils.py @@ -4,27 +4,33 @@ from __future__ import annotations +import argparse from collections.abc import Sequence -from .utils import open_and_test_notebooks +from .utils import open_and_test_notebooks, cell_error -def test_show_plot_used_instead_of_matplotlib(notebook): +def test_show_plot_used_instead_of_matplotlib(notebook, filename): """checks if plotting is done with open_atmos_jupyter_utils show_plot()""" matplot_used = False show_plot_used = False - for cell in notebook.cells: + matplot_idx = 0 + for idx, cell in enumerate(notebook.cells): if cell.cell_type == "code": if "pyplot.show(" in cell.source or "plt.show(" in cell.source: matplot_used = True + matplot_idx = idx if "show_plot(" in cell.source: show_plot_used = True if matplot_used and not show_plot_used: - raise ValueError( - "if using matplotlib, please use open_atmos_jupyter_utils.show_plot()" + yield cell_error( + filename, + matplot_idx, + code="NB400", + message="If using matplotlib, please use open_atmos_jupyter_utils.show_plot()", ) -def test_show_anim_used_instead_of_matplotlib(notebook): +def test_show_anim_used_instead_of_matplotlib(notebook, filename): """checks if animation generation is done with open_atmos_jupyter_utils show_anim()""" matplot_used = False show_anim_used = False @@ -39,14 +45,24 @@ def test_show_anim_used_instead_of_matplotlib(notebook): if "show_anim(" in cell.source: show_anim_used = True if matplot_used and not show_anim_used: - raise AssertionError("""if using matplotlib for animations, - please use open_atmos_jupyter_utils.show_anim()""") + yield cell_error( + filename, + matplot_idx, + code="NB401", + message="If using matplotlib for animations, " + "please use open_atmos_jupyter_utils.show_anim()", + ) def main(argv: Sequence[str] | None = None) -> int: - """test all notebooks""" + """Test all notebooks""" + + parser = argparse.ArgumentParser() + parser.add_argument("filenames", nargs="*", help="Filenames to check.") + args = parser.parse_args(argv) + return open_and_test_notebooks( - argv=argv, + filenames=args.filenames, test_functions=[ test_show_anim_used_instead_of_matplotlib, test_show_plot_used_instead_of_matplotlib, diff --git a/hooks/open_atmos_colab_header.py b/hooks/open_atmos_colab_header.py index 43a674f..d16db07 100644 --- a/hooks/open_atmos_colab_header.py +++ b/hooks/open_atmos_colab_header.py @@ -4,6 +4,9 @@ import re import nbformat +from pygments.styles.dracula import yellow + +from .utils import cell_error _PIP_INSTALL_RE = re.compile( r"pip_install_on_colab\(\s*" @@ -84,14 +87,17 @@ def check_colab_header(notebook_path, repo_name, fix, hook_version): header_source = build_header(repo_name, final_version) nb.cells.insert(2, nbformat.v4.new_code_cell(header_source)) nbformat.write(nb, notebook_path) - return True + return header_cell = nb.cells[header_index] examples_version, main_version = extract_versions(header_cell.source, repo_name) if examples_version != main_version: - raise ValueError( - f"{notebook_path}\nVersion mismatch in header: {examples_version!r} != {main_version!r}" + yield cell_error( + notebook_path, + header_index, + "NB301", + f"\nVersion mismatch in header: {examples_version!r} != {main_version!r}", ) final_version = resolve_version(main_version, hook_version) @@ -100,17 +106,27 @@ def check_colab_header(notebook_path, repo_name, fix, hook_version): modified = False if header_cell.source != correct_header: if not fix: - raise ValueError( - f"{notebook_path}\n" - f"Incorrect Colab cell, expected header:\n" - f"---\n{correct_header}\n---" + yield cell_error( + notebook_path, + header_index, + "NB302", + f"Incorrect Colab cell, expected header:\n---\n{correct_header}\n---", ) - header_cell.source = correct_header - modified = True + else: + header_cell.source = correct_header + modified = True if header_index != 2: - nb.cells.insert(2, nb.cells.pop(header_index)) - modified = True + if not fix: + yield cell_error( + notebook_path, + header_index, + code="NB303", + message="Colab header in wrong position. Expected cell index: 2.", + ) + else: + nb.cells.insert(2, nb.cells.pop(header_index)) + modified = True if modified: nbformat.write(nb, notebook_path) diff --git a/hooks/utils.py b/hooks/utils.py index 2008907..9c26d8d 100644 --- a/hooks/utils.py +++ b/hooks/utils.py @@ -4,13 +4,34 @@ import os import pathlib -import argparse +from pathlib import Path from git import Git + +from dataclasses import dataclass +from typing import Callable, Iterable, Any import nbformat -class NotebookTestError(BaseException): - """Raised when a notebook validation test fails.""" +@dataclass +class NotebookError: + path: str + line: int + col: int + code: str + message: str + + def format(self) -> str: + return f"{self.path}:{self.line}:{self.col}: {self.code} {self.message}" + + +def cell_error(path, cell_idx, code, message): + return NotebookError( + path=path, + line=cell_idx + 1, + col=1, + code=code, + message=message, + ) def find_files(path_to_folder_from_project_root=".", file_extension=None): @@ -41,20 +62,51 @@ def repo_path(): return path -def open_and_test_notebooks(argv, test_functions): - """Create argparser and run notebook tests""" - parser = argparse.ArgumentParser() - parser.add_argument("filenames", nargs="*", help="Filenames to check.") - args = parser.parse_args(argv) - - retval = 0 - for filename in args.filenames: - with open(filename, encoding="utf8") as notebook_file: - notebook = nbformat.read(notebook_file, nbformat.NO_CONVERT) - for func in test_functions: - try: - func(notebook) - except NotebookTestError as e: - print(f"{filename} : {e}") - retval = 1 - return retval +def open_and_test_notebooks( + filenames: list[str], + test_functions: list[Callable[[nbformat.NotebookNode, str], Iterable[Any]]], +) -> int: + """ + Run notebook tests on a list of filenames using generator-based hooks. + + Each test function should accept two arguments: + notebook: nbformat.NotebookNode + notebook_filename: str + and yield NotebookError objects. + """ + all_errors = [] + + for filename in filenames: + notebook_path = Path(filename) + try: + with notebook_path.open(encoding="utf8") as f: + notebook = nbformat.read(f, nbformat.NO_CONVERT) + except Exception as exc: + all_errors.append( + cell_error( + filename, + 0, + code="NB000", + message=f"Failed to read notebook: {exc}", + ) + ) + continue + + for test_func in test_functions: + try: + for error in test_func(notebook, filename): + all_errors.append(error) + except Exception as exc: + all_errors.append( + cell_error( + filename, + 0, + code="NBXXX", + message=f"Exception in test {test_func.__name__}: {exc}", + ) + ) + + for error in all_errors: + print(error.format()) + + return 1 if all_errors else 0