From 8aa53280145f492dac05b8c10551dc1da024fa36 Mon Sep 17 00:00:00 2001 From: faraaz-bot Date: Mon, 16 Mar 2026 13:55:16 -0600 Subject: [PATCH 1/5] Add broken image link checker and GitHub Actions workflow - Add check_broken_images.py script to detect broken image references - Support multiple formats: MyST, Markdown, HTML, and reStructuredText - Add optional orphaned image detection - Add GitHub Actions workflow to run checks on push and PR - Tested on 38 markdown files with 117 image references --- .github/workflows/check-images.yml | 32 ++++ check_broken_images.py | 253 +++++++++++++++++++++++++++++ 2 files changed, 285 insertions(+) create mode 100644 .github/workflows/check-images.yml create mode 100755 check_broken_images.py diff --git a/.github/workflows/check-images.yml b/.github/workflows/check-images.yml new file mode 100644 index 00000000..2cc28d43 --- /dev/null +++ b/.github/workflows/check-images.yml @@ -0,0 +1,32 @@ +name: Check Images + +on: + push: + branches: + - develop + - main + pull_request: + branches: + - develop + - main + +jobs: + check-broken-images: + name: Check for broken image links + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Check for broken image references + run: python3 check_broken_images.py + + - name: Check for orphaned images (warning only) + if: success() || failure() + run: python3 check_broken_images.py --check-orphans || true diff --git a/check_broken_images.py b/check_broken_images.py new file mode 100755 index 00000000..bce97117 --- /dev/null +++ b/check_broken_images.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python3 +""" +Lint check for broken image references in documentation. + +This script scans all markdown files in the docs directory and verifies that: +1. All referenced images exist in the filesystem +2. Image paths are correctly formatted +3. No orphaned images exist (optional check) +""" + +import os +import re +import sys +from pathlib import Path +from typing import List, Tuple, Set + + +class ImageChecker: + """Check for broken image references in markdown documentation.""" + + def __init__(self, docs_dir: str = "docs"): + self.docs_dir = Path(docs_dir) + self.errors: List[str] = [] + self.warnings: List[str] = [] + self.checked_files = 0 + self.checked_images = 0 + + def find_markdown_files(self) -> List[Path]: + """Find all markdown files in the docs directory.""" + md_files = [] + for ext in ["*.md", "*.rst"]: + md_files.extend(self.docs_dir.rglob(ext)) + return sorted(md_files) + + def extract_image_references(self, file_path: Path) -> List[Tuple[str, int]]: + """ + Extract all image references from a markdown file. + + Returns list of tuples: (image_path, line_number) + """ + image_refs = [] + + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + lines = content.split('\n') + except Exception as e: + self.errors.append(f"Error reading {file_path}: {e}") + return [] + + # Pattern 1: MyST grid card syntax - :img-top: path + img_top_pattern = re.compile(r':img-top:\s+(.+?)(?:\s|$)') + + # Pattern 2: Standard markdown - ![alt](path) or ![alt](path "title") + md_pattern = re.compile(r'!\[.*?\]\(([^\s\)"]+)') + + # Pattern 3: HTML img tags - or + html_pattern = re.compile(r']+src=["\'"]([^"\']+)["\'"]') + + # Pattern 4: reStructuredText image directive + rst_pattern = re.compile(r'\.\.\s+image::\s+(.+?)(?:\s|$)') + + # Pattern 5: reStructuredText figure directive + rst_figure_pattern = re.compile(r'\.\.\s+figure::\s+(.+?)(?:\s|$)') + + for line_num, line in enumerate(lines, start=1): + # Check all patterns + for pattern in [img_top_pattern, md_pattern, html_pattern, rst_pattern, rst_figure_pattern]: + matches = pattern.findall(line) + for match in matches: + # Clean up the path + img_path = match.strip() + # Remove surrounding quotes if present + if (img_path.startswith('"') and img_path.endswith('"')) or \ + (img_path.startswith("'") and img_path.endswith("'")): + img_path = img_path[1:-1] + # Skip URLs + if img_path.startswith(('http://', 'https://', '//')): + continue + # Skip data URIs + if img_path.startswith('data:'): + continue + image_refs.append((img_path, line_num)) + + return image_refs + + def resolve_image_path(self, md_file: Path, img_ref: str) -> Path: + """ + Resolve relative image path to absolute path. + + Args: + md_file: Path to the markdown file + img_ref: Image reference from the markdown file + + Returns: + Resolved absolute path + """ + # Get the directory containing the markdown file + md_dir = md_file.parent + + # Resolve the image path relative to the markdown file + img_path = (md_dir / img_ref).resolve() + + return img_path + + def check_file(self, md_file: Path) -> None: + """Check all image references in a single markdown file.""" + self.checked_files += 1 + image_refs = self.extract_image_references(md_file) + + for img_ref, line_num in image_refs: + self.checked_images += 1 + img_path = self.resolve_image_path(md_file, img_ref) + + # Get relative path for error messages + try: + rel_md_path = md_file.relative_to(Path.cwd()) + except ValueError: + rel_md_path = md_file + + if not img_path.exists(): + self.errors.append( + f"{rel_md_path}:{line_num}: Broken image reference: '{img_ref}' " + f"(resolved to: {img_path})" + ) + elif not img_path.is_file(): + self.errors.append( + f"{rel_md_path}:{line_num}: Image path is not a file: '{img_ref}' " + f"(resolved to: {img_path})" + ) + + def find_orphaned_images(self) -> Set[Path]: + """ + Find images in the images directory that are not referenced anywhere. + + Returns: + Set of orphaned image paths + """ + # Find all image files + image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp', '.bmp'} + all_images = set() + + images_dir = self.docs_dir / 'images' + if images_dir.exists(): + for img_file in images_dir.rglob('*'): + if img_file.is_file() and img_file.suffix.lower() in image_extensions: + all_images.add(img_file) + + # Find all referenced images + referenced_images = set() + md_files = self.find_markdown_files() + + for md_file in md_files: + image_refs = self.extract_image_references(md_file) + for img_ref, _ in image_refs: + img_path = self.resolve_image_path(md_file, img_ref) + if img_path.exists(): + referenced_images.add(img_path.resolve()) + + # Find orphaned images + orphaned = all_images - referenced_images + return orphaned + + def run(self, check_orphans: bool = False) -> int: + """ + Run the image checker. + + Args: + check_orphans: Whether to check for orphaned images + + Returns: + Exit code (0 for success, 1 for errors) + """ + print(f"Checking images in {self.docs_dir}...") + print() + + md_files = self.find_markdown_files() + + if not md_files: + print(f"No markdown files found in {self.docs_dir}") + return 1 + + # Check each file + for md_file in md_files: + self.check_file(md_file) + + # Check for orphaned images if requested + if check_orphans: + orphaned = self.find_orphaned_images() + for img in sorted(orphaned): + try: + rel_path = img.relative_to(Path.cwd()) + except ValueError: + rel_path = img + self.warnings.append(f"Orphaned image (not referenced): {rel_path}") + + # Print results + print(f"Checked {self.checked_files} markdown files") + print(f"Checked {self.checked_images} image references") + print() + + if self.errors: + print(f"❌ Found {len(self.errors)} broken image reference(s):") + print() + for error in self.errors: + print(f" {error}") + print() + + if self.warnings: + print(f"⚠️ Found {len(self.warnings)} warning(s):") + print() + for warning in self.warnings: + print(f" {warning}") + print() + + if not self.errors and not self.warnings: + print("✅ All image references are valid!") + return 0 + elif not self.errors: + print("✅ All image references are valid (warnings only)") + return 0 + else: + return 1 + + +def main(): + """Main entry point.""" + import argparse + + parser = argparse.ArgumentParser( + description="Check for broken image references in documentation" + ) + parser.add_argument( + "--docs-dir", + default="docs", + help="Documentation directory to check (default: docs)" + ) + parser.add_argument( + "--check-orphans", + action="store_true", + help="Also check for orphaned images (images not referenced anywhere)" + ) + + args = parser.parse_args() + + checker = ImageChecker(args.docs_dir) + exit_code = checker.run(check_orphans=args.check_orphans) + + sys.exit(exit_code) + + +if __name__ == "__main__": + main() From ea8761dd5b671aedf5f6da409934ca803c7e77e0 Mon Sep 17 00:00:00 2001 From: faraaz-bot Date: Mon, 16 Mar 2026 14:11:42 -0600 Subject: [PATCH 2/5] Integrate image checker into existing linting workflow - Add check-broken-images job to linting.yml - Remove standalone check-images.yml workflow - Image checks will now run alongside existing documentation linting --- .github/workflows/linting.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index a56602ba..8598b2d2 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,3 +14,23 @@ jobs: call-workflow-passing-data: name: Documentation uses: ROCm/rocm-docs-core/.github/workflows/linting.yml@develop + + check-broken-images: + name: Check Images + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Check for broken image references + run: python3 check_broken_images.py + + - name: Check for orphaned images (warning only) + if: success() || failure() + run: python3 check_broken_images.py --check-orphans || true From 61f5ad0ec850551247a76f686ba4936931da8a99 Mon Sep 17 00:00:00 2001 From: faraaz-bot Date: Mon, 16 Mar 2026 14:18:14 -0600 Subject: [PATCH 3/5] Remove unused check-images.yml workflow file The image checker is now integrated into linting.yml, so the standalone check-images.yml workflow is no longer needed. --- .github/workflows/check-images.yml | 32 ------------------------------ 1 file changed, 32 deletions(-) delete mode 100644 .github/workflows/check-images.yml diff --git a/.github/workflows/check-images.yml b/.github/workflows/check-images.yml deleted file mode 100644 index 2cc28d43..00000000 --- a/.github/workflows/check-images.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: Check Images - -on: - push: - branches: - - develop - - main - pull_request: - branches: - - develop - - main - -jobs: - check-broken-images: - name: Check for broken image links - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - - - name: Check for broken image references - run: python3 check_broken_images.py - - - name: Check for orphaned images (warning only) - if: success() || failure() - run: python3 check_broken_images.py --check-orphans || true From d29935b01e97237008d5ad7d349b34a2adeff78b Mon Sep 17 00:00:00 2001 From: faraaz-bot Date: Mon, 16 Mar 2026 14:29:26 -0600 Subject: [PATCH 4/5] Trigger workflow re-run From 621efc5cd730bbde304398c2ebdc1ded919b2d1b Mon Sep 17 00:00:00 2001 From: faraaz-bot Date: Mon, 16 Mar 2026 14:33:54 -0600 Subject: [PATCH 5/5] Create standalone Check Images workflow - Move image checking to separate workflow file for better visibility - Restore linting.yml to original state (only calls reusable workflow) - Add concurrency control to prevent duplicate runs - Workflow will now appear as separate check in PR status --- .github/workflows/check-images.yml | 36 ++++++++++++++++++++++++++++++ .github/workflows/linting.yml | 20 ----------------- 2 files changed, 36 insertions(+), 20 deletions(-) create mode 100644 .github/workflows/check-images.yml diff --git a/.github/workflows/check-images.yml b/.github/workflows/check-images.yml new file mode 100644 index 00000000..9807dbda --- /dev/null +++ b/.github/workflows/check-images.yml @@ -0,0 +1,36 @@ +name: Check Images + +on: + push: + branches: + - develop + - main + pull_request: + branches: + - develop + - main + +concurrency: + group: ${{ github.ref }}-check-images + cancel-in-progress: true + +jobs: + check-broken-images: + name: Broken Image Links + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Check for broken image references + run: python3 check_broken_images.py + + - name: Check for orphaned images (warning only) + if: success() || failure() + run: python3 check_broken_images.py --check-orphans || true diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 8598b2d2..a56602ba 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,23 +14,3 @@ jobs: call-workflow-passing-data: name: Documentation uses: ROCm/rocm-docs-core/.github/workflows/linting.yml@develop - - check-broken-images: - name: Check Images - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - - - name: Check for broken image references - run: python3 check_broken_images.py - - - name: Check for orphaned images (warning only) - if: success() || failure() - run: python3 check_broken_images.py --check-orphans || true