From 61729b138ded53a53f5819c02fa61852e2d4fa96 Mon Sep 17 00:00:00 2001 From: William Warriner <6930772+wwarriner@users.noreply.github.com> Date: Tue, 17 Mar 2026 19:39:23 -0500 Subject: [PATCH 1/4] refactor linkchecker script - extract special cases into yaml file - other refactorings to enhance maintainability --- .linkcheckerrc-special.yaml | 10 ++ .ruff.toml | 26 ++-- verification_scripts/__init__.py | 0 verification_scripts/linkchecker.py | 234 +++++++++++++++------------- 4 files changed, 151 insertions(+), 119 deletions(-) create mode 100644 .linkcheckerrc-special.yaml create mode 100644 verification_scripts/__init__.py diff --git a/.linkcheckerrc-special.yaml b/.linkcheckerrc-special.yaml new file mode 100644 index 000000000..f8864f0c5 --- /dev/null +++ b/.linkcheckerrc-special.yaml @@ -0,0 +1,10 @@ +drop: + https://doi.org: 300 + https://anaconda.org: 403 + https://claude.ai: 403 + https://idm.uab.edu/cgi-cas/xrmi/sites: 423 + https://idm.uab.edu/cgi-cas/xrmi/users: 423 +replace: + 200 OK: [300 Redirect, result] + ConnectTimeout: [408 Timeout, result] + https://padlock.idm.uab.edu: [423 Locked, url-after-redirection] diff --git a/.ruff.toml b/.ruff.toml index 3fc890a8a..f65fa13ca 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -5,6 +5,20 @@ required-version = ">=0.15.6" show-fixes = true [lint] +per-file-ignores = { "test/*" = [ + "ANN201", + "ANN202", + "D101", + "D102", + "D100", + "PT", +], "**/*.ipynb" = [ + "T201", + "ANN401", +], "**/__init__.py" = [ + "D104", +] } + ignore = [ "D203", # prefer conflicting D211 "D213", # prefer conflicting D212 @@ -21,18 +35,6 @@ ignore = [ "Q003", # END ] select = ["ALL"] -per-file-ignores = { "test/*" = [ - "ANN201", - "ANN202", - "D101", - "D102", - "D100", - "PT", -], "**/*.ipynb" = [ - "T201", - "ANN401", -] } - [format] indent-style = "space" line-ending = "lf" diff --git a/verification_scripts/__init__.py b/verification_scripts/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/verification_scripts/linkchecker.py b/verification_scripts/linkchecker.py index e64eff52c..54690f81f 100644 --- a/verification_scripts/linkchecker.py +++ b/verification_scripts/linkchecker.py @@ -1,4 +1,9 @@ -"""Runs linkchecker on docs and produces human-readable output.""" +"""Runs linkchecker on docs and produces human-readable output. + +Install with `pip install -r requirements-dev.txt`. + +Use with `python ./verification_scripts/linkchecker.py`. +""" from __future__ import annotations @@ -9,23 +14,7 @@ import pandas as pd import yaml - -""" -How to use: - -python ./scripts/linkchecker.py -""" - -# Cleans up output of linkchecker - -OUTPUT = PurePath("out") -Path(OUTPUT).mkdir(exist_ok=True) - -# FILE PATHS -LINKCHECKER_LOG = OUTPUT / "linkchecker.log" -LINKCHECKER_RAW_CSV = OUTPUT / "linkchecker-raw.csv" -LINKCHECKER_OUT_CSV = OUTPUT / "linkchecker-out.csv" -LINKCHECKER_OUT_YAML = OUTPUT / "linkchecker-out.yml" +from attrs import define # COLUMNS ## ORIGINAL @@ -41,9 +30,10 @@ MARKDOWN_FILE = "document" -def run_linkchecker() -> None: +# READ +def _run_linkchecker(path: PurePath) -> None: """Run the linkchecker application.""" - with Path(LINKCHECKER_LOG).open("wb", buffering=0) as f: + with Path(path).open("wb", buffering=0) as f: subprocess.run( # noqa: S603 [_get_linkchecker_path(), "--config", ".linkcheckerrc", "docs"], stdout=f, @@ -51,9 +41,14 @@ def run_linkchecker() -> None: ) -def load_output() -> pd.DataFrame: +def _get_linkchecker_path() -> PurePath: + return PurePath(sys.executable).parent / "Scripts" / "linkchecker" + + +# PROCESS +def _load_results(path: PurePath) -> pd.DataFrame: """Load the raw linkchecker output dataframe.""" - raw_linkchecker_data = pd.read_csv(LINKCHECKER_RAW_CSV) + raw_linkchecker_data = pd.read_csv(path) raw_linkchecker_data = raw_linkchecker_data[ [RESULT, URLNAME, URL, PARENTNAME, LINE, COLUMN] ] @@ -66,7 +61,77 @@ def load_output() -> pd.DataFrame: ) -def replace_rows( +def _drop_ok_with_no_redirects(_df: pd.DataFrame) -> pd.DataFrame: + """Drop rows with OK code (200) if there is no redirection.""" + same_url = _df[URL_IN_MARKDOWN] == _df[URL_AFTER_REDIRECTION] + result_ok = _df[RESULT].str.startswith("200") + drop = same_url & result_ok + return _df[~drop] + + +@define +class Drop: + """Information about rows to drop from linkchecker output.""" + + url: str + code: str + + +@define +class Replace: + """Information about rows to replace in linkchecker output.""" + + find: str + replace: str + where: str + + +@define +class Cases: + """All special case information.""" + + drops: list[Drop] + replacements: list[Replace] + + +def _read_special_cases() -> Cases: + with Path(".linkcheckerrc-special.yaml").open("r") as f: + data = yaml.safe_load(f) + + drops = [Drop(url, str(code)) for url, code in data["drop"].items()] + replaces = [Replace(pattern, v[0], v[1]) for pattern, v in data["replace"].items()] + return Cases(drops, replaces) + + +def _file_uris_to_paths(_s: pd.Series) -> pd.Series: + """Modify file URIs to a normalized format. + + Example: + file:///D|/repos/uabrc.github.io/dir/file.md -> dir/file.md + + """ + if _s.empty: + return _s + + keep = _s.str.startswith("file:") & _s.str.contains("repos/uabrc.github.io") + splits = _s.str.split("repos/uabrc.github.io", expand=True) + + fixes = splits.iloc[:, -1][keep] + fixes = fixes.apply(PurePath) # type: ignore[reportCallIssue,reportArgumentType] + fixes = fixes.astype(str) + fixes = fixes.str.lstrip(os.sep) + + out = _s.copy() + out[keep] = fixes + return out + + +def _find_rows_containing(_s: pd.Series, _containing: str) -> pd.Series: + """Find rows containing the supplied string in the supplied series.""" + return _s.str.contains(_containing) + + +def _replace_rows( _s: pd.Series, _containing: str, _with: str, @@ -87,15 +152,7 @@ def replace_rows( return out -def drop_ok_with_no_redirects(_df: pd.DataFrame) -> pd.DataFrame: - """Drop rows with OK code (200) if there is no redirection.""" - same_url = _df[URL_IN_MARKDOWN] == _df[URL_AFTER_REDIRECTION] - result_ok = _df[RESULT].str.startswith("200") - drop = same_url & result_ok - return _df[~drop] - - -def drop_rows( +def _drop_rows( _df: pd.DataFrame, _in: str, _containing: str, @@ -115,91 +172,54 @@ def drop_rows( return _df[~contains] -def modify_file_uris(_s: pd.Series) -> pd.Series: - """Modify file URIs to a normalized format. - - Example: - file:///D|/repos/uabrc.github.io/dir/file.md -> dir/file.md - - """ - keep = _s.str.startswith("file:") & _s.str.contains("repos/uabrc.github.io") - splits = _s.str.split("repos/uabrc.github.io", expand=True) +def _handle_special_cases(results: pd.DataFrame) -> pd.DataFrame: + cases = _read_special_cases() + for replace in cases.replacements: + results[RESULT] = _replace_rows( + results[RESULT], + replace.find, + replace.replace, + find_in=results[replace.where], + ) - fixes = splits.iloc[:, -1][keep] - fixes = fixes.apply(lambda x: PurePath(x)) # pyright: ignore[reportCallIssue,reportArgumentType] - fixes = fixes.astype(str) - fixes = fixes.str.lstrip(os.sep) + for drop in cases.drops: + results = _drop_rows(results, URL_IN_MARKDOWN, drop.url, drop.code) - out = _s.copy() - out[keep] = fixes - return out + return results -def _find_rows_containing(_s: pd.Series, _containing: str) -> pd.Series: - """Find rows containing the supplied string in the supplied series.""" - return _s.str.contains(_containing) +# WRITE +def _to_csv(results: pd.DataFrame, path: PurePath) -> None: + results.to_csv(path, index=False) -def _get_linkchecker_path() -> PurePath: - return PurePath(sys.executable).parent / "Scripts" / "linkchecker" +def _to_yaml(results: pd.DataFrame, path: PurePath) -> None: + records = results.to_dict(orient="records") if not results.empty else "" + with Path(path).open("w") as f: + yaml.safe_dump(records, f, sort_keys=False) -if __name__ == "__main__": - run_linkchecker() - results = load_output() - - ### drop good urls - results = drop_ok_with_no_redirects(results) - - ### replace unhelpful error messages - # change 200 OK to 300 Redirect for human clarity on successful redirects - results[RESULT] = replace_rows(results[RESULT], "200 OK", "300 Redirect") - # replace long error messages with short codes - results[RESULT] = replace_rows(results[RESULT], "ConnectTimeout", "408 Timeout") - # special code for SSO urls - results[RESULT] = replace_rows( - results[RESULT], - "https://padlock.idm.uab.edu", - "423 Locked", - find_in=results[URL_AFTER_REDIRECTION], - ) +# ENTRY POINT +def main() -> None: + """Primary entrypoint.""" + # config + output_path = PurePath("out") + Path(output_path).mkdir(exist_ok=True) - ### special url ignore rules - # doi.org always redirects, that's its purpose, so we ignore - results = drop_rows( - results, - URL_IN_MARKDOWN, - "https://doi.org", - if_result_code="300", - ) - # if anaconda.org goes down we'll surely hear about it - results = drop_rows( - results, - URL_IN_MARKDOWN, - "https://anaconda.org", - if_result_code="403", - ) - # UAB specific requiring login - results = drop_rows( - results, - URL_IN_MARKDOWN, - "https://idm.uab.edu/cgi-cas/xrmi/sites", - if_result_code="423", - ) + # generate input + _run_linkchecker(output_path / "linkchecker.log") + results = _load_results(output_path / "linkchecker-raw.csv") - ### modify file uris to improve readability - results[MARKDOWN_FILE] = modify_file_uris(results[MARKDOWN_FILE]) + # process + results = _drop_ok_with_no_redirects(results) + results = _handle_special_cases(results) + results[MARKDOWN_FILE] = _file_uris_to_paths(results[MARKDOWN_FILE]) + results = results.sort_values(by=[RESULT, MARKDOWN_FILE, LINE, COLUMN]) - ### organize - results = results.sort_values( - by=[RESULT, URL_IN_MARKDOWN, MARKDOWN_FILE, LINE, COLUMN], - ) + # write output + _to_csv(results, output_path / "linkchecker-out.csv") + _to_yaml(results, output_path / "linkchecker-out.yml") - ### output - # csv - results.to_csv(LINKCHECKER_OUT_CSV, index=False) - # yml - records = results.to_dict(orient="records") - with Path(LINKCHECKER_OUT_YAML).open("w") as f: - yaml.safe_dump(records, f, sort_keys=False) +if __name__ == "__main__": + main() From 6b5cf31bdb449d4dcf0cb22ec317aa3e2719642e Mon Sep 17 00:00:00 2001 From: William Warriner <6930772+wwarriner@users.noreply.github.com> Date: Tue, 17 Mar 2026 19:40:05 -0500 Subject: [PATCH 2/4] fix gitlab.com url --- docs/account/code.rc/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/account/code.rc/index.md b/docs/account/code.rc/index.md index f50b16e6f..9a0c230c6 100644 --- a/docs/account/code.rc/index.md +++ b/docs/account/code.rc/index.md @@ -5,7 +5,7 @@ hide: # Create and Manage Code.rc (GitLab) Accounts -UAB Research Computing maintains an on-premises GitLab server, part of the Research Computing System (RCS), called **Code.rc**. Generally speaking, [GitLab](https://about.gitlab.com/) is a service designed for collaborating on software development projects and is similar in structure and purpose to [GitHub](https://github.com/). In contrast to the Git hosting services [GitLab.com](https://gitlab.com) and [GitHub.com](https://github.com), Code.rc is hosted on-premises and stored in a secure physical environment on UAB Campus. +UAB Research Computing maintains an on-premises GitLab server, part of the Research Computing System (RCS), called **Code.rc**. Generally speaking, [GitLab](https://about.gitlab.com/) is a service designed for collaborating on software development projects and is similar in structure and purpose to [GitHub](https://github.com/). In contrast to the Git hosting services [GitLab.com](https://about.gitlab.com) and [GitHub.com](https://github.com), Code.rc is hosted on-premises and stored in a secure physical environment on UAB Campus. !!! important From c0915b0783ebab35897acdb136be725185be825a Mon Sep 17 00:00:00 2001 From: William Warriner <6930772+wwarriner@users.noreply.github.com> Date: Tue, 17 Mar 2026 19:52:05 -0500 Subject: [PATCH 3/4] fix pre-commit errors --- .../posts/2025-10-07-migration-overview.md | 36 +++++++++---------- macros/__init__.py | 4 ++- macros/render.py | 10 +++--- scripts/__init__.py | 1 - 4 files changed, 27 insertions(+), 24 deletions(-) diff --git a/docs/news/posts/2025-10-07-migration-overview.md b/docs/news/posts/2025-10-07-migration-overview.md index e074bc659..aa739d882 100644 --- a/docs/news/posts/2025-10-07-migration-overview.md +++ b/docs/news/posts/2025-10-07-migration-overview.md @@ -153,13 +153,13 @@ Compute nodes are only able to run jobs from one of GPFS 4 or GPFS 5 so compute **Current GPFS 5 Compute Capacity Pre-Migration**: -| Partition | Available Nodes | Notes | -|---|---|---| -| mainline | 20 (2560 cores) | Include AMD CPUs. See the [list of changes](#changes-to-mainline-partitions) for details | -| pascalnodes | 0 | All pascalnodes will be moved during the 1st compute migration | -| amperenodes | 5 (10 A100s) | 10 amperenodes will be added during the 1st compute migration with the remaining 5 added once the migration completes | -| amperenodes-medium | 1 (2 A100s) | Nodes will be added to the amperenodes-medium partition during both compute migrations | -| largemem | 0 | largemem and largmem-long nodes will remain on GPFS 4 until the full migration completes. If you require access to the 1.5 TiB RAM nodes, contact support | +| Partition | Available Nodes | Notes | +|--------------------|-----------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------| +| mainline | 20 (2560 cores) | Include AMD CPUs. See the [list of changes](#changes-to-mainline-partitions) for details | +| pascalnodes | 0 | All pascalnodes will be moved during the 1st compute migration | +| amperenodes | 5 (10 A100s) | 10 amperenodes will be added during the 1st compute migration with the remaining 5 added once the migration completes | +| amperenodes-medium | 1 (2 A100s) | Nodes will be added to the amperenodes-medium partition during both compute migrations | +| largemem | 0 | largemem and largmem-long nodes will remain on GPFS 4 until the full migration completes. If you require access to the 1.5 TiB RAM nodes, contact support | #### Changes to Mainline Partitions @@ -190,17 +190,17 @@ To best accomodate workload for both migrated and not-yet-migrated users, comput **50% Migration Completion**: -| Partition | GPFS 4 Nodes | GPFS 5 Nodes | Notes | -|---|---|---|---| -| mainline | 25 | 68 | GPFS 4 mainline partitions will use the remaining largemem and amd-hdr100 nodes. Mixed Intel and AMD hardware on both GPFS 4 and GPFS 5 | -| pascalnodes | 0 | 17 | | -| pascalnodes-medium | 0 | 8 | | -| amperenodes | 5 (10 A100s) | 15 (30 A100s) | | -| amperenodes-medium | 1 (2 A100s) | 7 (14 A100s) | | -| largemem | 13 | 0 | Shared with mainline partitions | -| largemem-long | 5 | 0 | Shared with mainline partitions | -| amd-hdr100 | 12 | 20 | Shared with mainline partitions | -| intel-dcb | 9 | 0 | | +| Partition | GPFS 4 Nodes | GPFS 5 Nodes | Notes | +|--------------------|--------------|---------------|-----------------------------------------------------------------------------------------------------------------------------------------| +| mainline | 25 | 68 | GPFS 4 mainline partitions will use the remaining largemem and amd-hdr100 nodes. Mixed Intel and AMD hardware on both GPFS 4 and GPFS 5 | +| pascalnodes | 0 | 17 | | +| pascalnodes-medium | 0 | 8 | | +| amperenodes | 5 (10 A100s) | 15 (30 A100s) | | +| amperenodes-medium | 1 (2 A100s) | 7 (14 A100s) | | +| largemem | 13 | 0 | Shared with mainline partitions | +| largemem-long | 5 | 0 | Shared with mainline partitions | +| amd-hdr100 | 12 | 20 | Shared with mainline partitions | +| intel-dcb | 9 | 0 | | #### Effects on Queue Times diff --git a/macros/__init__.py b/macros/__init__.py index e2bb8bc45..c473ee01c 100644 --- a/macros/__init__.py +++ b/macros/__init__.py @@ -3,7 +3,7 @@ from __future__ import annotations from pathlib import Path, PurePath -from typing import TYPE_CHECKING, Callable +from typing import TYPE_CHECKING import yaml @@ -13,6 +13,8 @@ from .render import CardRenderer if TYPE_CHECKING: + from collections.abc import Callable + from mkdocs.structure.pages import Page from mkdocs_macros.plugin import MacrosPlugin diff --git a/macros/render.py b/macros/render.py index ba2126d70..3f9429508 100644 --- a/macros/render.py +++ b/macros/render.py @@ -4,12 +4,14 @@ import textwrap from pathlib import PurePath -from typing import TYPE_CHECKING, Callable +from typing import TYPE_CHECKING from macros.card import Card, CardNamespace, EmojiSizesCss, EmojiVerticalAlignmentCss from macros.util import normalize_page_link if TYPE_CHECKING: + from collections.abc import Callable + from mkdocs.structure.pages import Page @@ -145,7 +147,7 @@ def _icon_vertical_alignment(self) -> str: def _icon_color(self) -> str: color = self._card.icon_color - return color if color else self._DEFAULT_ICON_COLOR + return color or self._DEFAULT_ICON_COLOR #### CONTENT PART def _content_part(self) -> str | None: @@ -159,11 +161,11 @@ def _link_part(self) -> str | None: def _link_text(self) -> str: text = self._card.link_text - return text if text else self._DEFAULT_LINK_TEXT + return text or self._DEFAULT_LINK_TEXT def _link_icon(self) -> str: name = self._card.link_icon_name - return name if name else self._DEFAULT_LINK_ICON_NAME + return name or self._DEFAULT_LINK_ICON_NAME def _link_url(self) -> str | None: url = self._card.link_url diff --git a/scripts/__init__.py b/scripts/__init__.py index 6e031999e..e69de29bb 100644 --- a/scripts/__init__.py +++ b/scripts/__init__.py @@ -1 +0,0 @@ -# noqa: D104 From 83fdf631d328c6c10bf14221bfd390de441a8c02 Mon Sep 17 00:00:00 2001 From: William Warriner <6930772+wwarriner@users.noreply.github.com> Date: Mon, 6 Apr 2026 14:16:07 -0500 Subject: [PATCH 4/4] fix broken URL to new URL --- docs/data_management/storage/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data_management/storage/index.md b/docs/data_management/storage/index.md index a8e567a00..cdd89cf22 100644 --- a/docs/data_management/storage/index.md +++ b/docs/data_management/storage/index.md @@ -190,7 +190,7 @@ Periodically review permissions, clean up unused data, and follow institutional At this time, Research Computing does not offer a method of archival. If you have need for archival, please feel free to contact [Support](../../help/support.md) to start a conversation. -A possible external resource for archival is available through University of Oklahoma (OU) Supercomputing Center for Education and Research (OSCER). Please see the following link for details: . +A possible external resource for archival is available through University of Oklahoma (OU) Supercomputing Center for Education and Research (OSCER). Please see the following link for details: . ### Backups