From 045aad0a1d9d19a5683f3649417d1dabce1fb8de Mon Sep 17 00:00:00 2001 From: Frankie Roberto Date: Mon, 1 Jun 2026 16:48:03 +0100 Subject: [PATCH] Add Markdown checks This adds a script which will do some basic Markdown syntax checks (currently only checking for the redundant presence of a redundant h1). It can be run locally, or will run automatically on any open pull requests, with the result being a comment left (or deleted once resolved). Copied from https://github.com/NHSDigital/digital-prevention-services-portfolio/tree/main/.github where we've been testing it out. --- .github/scripts/check-markdown.mjs | 136 +++++++++++++++++++++++ .github/scripts/pr-review.mjs | 155 +++++++++++++++++++++++++++ .github/workflows/check-markdown.yml | 38 +++++++ package.json | 1 + 4 files changed, 330 insertions(+) create mode 100644 .github/scripts/check-markdown.mjs create mode 100644 .github/scripts/pr-review.mjs create mode 100644 .github/workflows/check-markdown.yml diff --git a/.github/scripts/check-markdown.mjs b/.github/scripts/check-markdown.mjs new file mode 100644 index 000000000..a21c09d1b --- /dev/null +++ b/.github/scripts/check-markdown.mjs @@ -0,0 +1,136 @@ +/** + * Checks markdown files for common mistakes. + * + * Current checks: + * - H1 headings (# ...): the H1 is already generated from the `title` field + * in the frontmatter, so adding a `#` heading manually creates a duplicate. + * + * When run directly, scans all markdown files under app/: + * npm run check:markdown + * + * Exports `getMistakes(baseRef)` for use by the PR review script, which + * checks only lines newly added in a pull request diff. + * + * Exits with code 1 if any mistakes are found. + */ + +import { execSync } from 'child_process' +import { readFileSync, readdirSync } from 'fs' +import { join } from 'path' +import { fileURLToPath } from 'url' + +const H1_MESSAGE = + 'The page title H1 is already generated from the `title` field in the frontmatter. ' + + 'If this heading duplicates the title, remove it. ' + + 'If it is a different heading, change it to an H2 using `##`.' + +/** + * Recursively finds all .md files under the given directory. + * + * @param {string} dir + * @returns {string[]} + */ +function findMarkdownFiles(dir) { + const entries = readdirSync(dir, { withFileTypes: true }) + return entries.flatMap((entry) => { + const fullPath = join(dir, entry.name) + if (entry.isDirectory()) return findMarkdownFiles(fullPath) + if (entry.isFile() && entry.name.endsWith('.md')) return [fullPath] + return [] + }) +} + +/** + * Scans all markdown files under app/ and returns mistakes. + * + * @returns {{ path: string, line: number, message: string }[]} + */ +export function scanAllFiles() { + const files = findMarkdownFiles('app') + + const mistakes = [] + + for (const filePath of files) { + const lines = readFileSync(filePath, 'utf8').split('\n') + for (let i = 0; i < lines.length; i++) { + if (/^# /.test(lines[i])) { + mistakes.push({ path: filePath, line: i + 1, message: H1_MESSAGE }) + } + } + } + + return mistakes +} + +/** + * Returns mistakes found only in lines newly added in a pull request diff. + * Used by the PR review script to post inline comments on changed lines only. + * + * @param {string} baseRef - git ref to diff against, e.g. 'origin/main' + * @returns {{ path: string, line: number, message: string }[]} + */ +export function getMistakes(baseRef) { + const diff = execSync(`git diff ${baseRef}...HEAD`, { encoding: 'utf8' }) + const mistakes = [] + let currentFile = null + let lineNumber = 0 + + for (const rawLine of diff.split('\n')) { + // e.g. "diff --git a/app/test.md b/app/test.md" + const diffFileMatch = rawLine.match(/^diff --git a\/.+ b\/(.+)$/) + if (diffFileMatch) { + currentFile = diffFileMatch[1].endsWith('.md') ? diffFileMatch[1] : null + lineNumber = 0 + continue + } + + if (!currentFile) continue + + // e.g. "@@ -3,5 +7,8 @@ layout: page" + const hunkMatch = rawLine.match(/^@@ -\d+(?:,\d+)? \+(\d+)/) + if (hunkMatch) { + lineNumber = parseInt(hunkMatch[1]) - 1 + continue + } + + // Skip the old/new file header lines + if (rawLine.startsWith('---') || rawLine.startsWith('+++')) continue + + if (rawLine.startsWith('+')) { + lineNumber++ + // Added line that is an H1 (single `#` followed by a space) + if (/^\+# /.test(rawLine)) { + mistakes.push({ + path: currentFile, + line: lineNumber, + message: H1_MESSAGE + }) + } + } else if (!rawLine.startsWith('-')) { + // Context line -- still advances the new-file line number + lineNumber++ + } + // Deleted lines ('-') do not advance the new-file line number + } + + return mistakes +} + +// Run locally when invoked directly ---------------------------------------- + +const isMain = process.argv[1] === fileURLToPath(import.meta.url) + +if (isMain) { + const mistakes = scanAllFiles() + + if (mistakes.length === 0) { + console.log('No markdown issues found.') + process.exit(0) + } + + for (const { path, line, message } of mistakes) { + console.warn(`${path}:${line}: ${message}`) + } + + process.exit(1) +} diff --git a/.github/scripts/pr-review.mjs b/.github/scripts/pr-review.mjs new file mode 100644 index 000000000..8a2f67b87 --- /dev/null +++ b/.github/scripts/pr-review.mjs @@ -0,0 +1,155 @@ +/** + * GitHub Actions script: posts inline PR review comments for any markdown + * mistakes found by check-markdown.mjs. + * + * - Posts a REQUEST_CHANGES review with one inline comment per new mistake. + * - Deletes comments that no longer apply (e.g. the line was fixed or the + * file was deleted). + * - Dismisses any previous REQUEST_CHANGES reviews when no mistakes remain. + * - Exits with code 1 if mistakes are found, so the CI check fails. + * + * Required environment variables (set automatically by the workflow): + * GITHUB_TOKEN, REPO, PR_NUMBER, HEAD_SHA, BASE_REF + */ + +import { getMistakes } from './check-markdown.mjs' + +const { GITHUB_TOKEN, REPO, BASE_REF, PR_NUMBER, HEAD_SHA } = process.env + +const BOT_USER = 'github-actions[bot]' + +async function githubFetch(path, options = {}) { + const response = await fetch(`https://api.github.com${path}`, { + ...options, + headers: { + Authorization: `Bearer ${GITHUB_TOKEN}`, + Accept: 'application/vnd.github+json', + 'X-GitHub-Api-Version': '2022-11-28', + ...(options.body ? { 'Content-Type': 'application/json' } : {}), + ...options.headers + } + }) + return response +} + +const mistakes = getMistakes(`origin/${BASE_REF}`) + +// Fetch existing review comments posted by the bot +const commentsResponse = await githubFetch( + `/repos/${REPO}/pulls/${PR_NUMBER}/comments?per_page=100` +) +if (!commentsResponse.ok) { + console.error( + 'Failed to fetch existing comments:', + await commentsResponse.text() + ) + process.exit(1) +} +const existingComments = await commentsResponse.json() +// Normalise comments: `line` is null for outdated diff comments, fall back to `original_line` +const botComments = existingComments + .filter((c) => c.user.login === BOT_USER) + .map((c) => ({ ...c, line: c.line ?? c.original_line })) + +// Resolve threads for comments that no longer have a matching mistake +// (line was fixed or file was deleted) +const staleComments = botComments.filter( + (c) => + !mistakes.some( + (m) => m.path === c.path && m.line === c.line && m.message === c.body + ) +) + +for (const comment of staleComments) { + console.log(`Deleting resolved comment on ${comment.path}:${comment.line}`) + const deleteResponse = await githubFetch( + `/repos/${REPO}/pulls/comments/${comment.id}`, + { method: 'DELETE' } + ) + if (!deleteResponse.ok) { + console.error('Failed to delete comment:', await deleteResponse.text()) + } +} + +// If there are no mistakes, dismiss any outstanding REQUEST_CHANGES reviews +// and exit cleanly so the CI check passes +if (mistakes.length === 0) { + const reviewsResponse = await githubFetch( + `/repos/${REPO}/pulls/${PR_NUMBER}/reviews?per_page=100` + ) + if (!reviewsResponse.ok) { + console.error('Failed to fetch reviews:', await reviewsResponse.text()) + process.exit(1) + } + const reviews = await reviewsResponse.json() + const pendingReviews = reviews.filter( + (r) => r.user.login === BOT_USER && r.state === 'CHANGES_REQUESTED' + ) + for (const review of pendingReviews) { + console.log(`Dismissing review ${review.id}`) + const dismissResponse = await githubFetch( + `/repos/${REPO}/pulls/${PR_NUMBER}/reviews/${review.id}/dismissals`, + { + method: 'PUT', + body: JSON.stringify({ + message: 'No markdown issues found — all clear.' + }) + } + ) + if (!dismissResponse.ok) { + console.error('Failed to dismiss review:', await dismissResponse.text()) + } + } + console.log('No markdown issues found.') + process.exit(0) +} + +// Post new comments for mistakes that don't already have a comment +const newComments = mistakes + .filter( + ({ path, line, message }) => + !botComments.some( + (c) => c.path === path && c.line === line && c.body === message + ) + ) + .map(({ path, line, message }) => ({ + path, + line, + side: 'RIGHT', + body: message + })) + +if (newComments.length === 0) { + console.log('All issues already have review comments. Nothing new to post.') + process.exit(1) +} + +const issueCount = newComments.length +const fileCount = new Set(newComments.map((c) => c.path)).size +const issueWord = issueCount === 1 ? 'issue' : 'issues' +const fileWord = fileCount === 1 ? 'file' : 'files' + +console.log(`Posting ${issueCount} new ${issueWord}...`) + +const response = await githubFetch( + `/repos/${REPO}/pulls/${PR_NUMBER}/reviews`, + { + method: 'POST', + body: JSON.stringify({ + commit_id: HEAD_SHA, + event: 'REQUEST_CHANGES', + body: + `Found ${issueCount} ${issueWord} across ${fileCount} markdown ${fileWord}. ` + + 'Please address the inline comments below.', + comments: newComments + }) + } +) + +if (!response.ok) { + console.error('Failed to post review:', await response.text()) + process.exit(1) +} + +console.log('Review posted successfully.') +process.exit(1) diff --git a/.github/workflows/check-markdown.yml b/.github/workflows/check-markdown.yml new file mode 100644 index 000000000..ce502518f --- /dev/null +++ b/.github/workflows/check-markdown.yml @@ -0,0 +1,38 @@ +name: Check markdown + +on: + pull_request: + branches: + - main + + # Enable running this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + check-markdown: + runs-on: ubuntu-latest + permissions: + pull-requests: write + steps: + - name: Checkout + uses: actions/checkout@v6 + with: + # Full history is needed so we can diff against the base branch + fetch-depth: 0 + + - name: Setup Node.js + uses: actions/setup-node@v6 + with: + node-version-file: '.nvmrc' + + - name: Fetch base branch + run: git fetch origin ${{ github.base_ref }} + + - name: Check for issues in changed markdown files + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.pull_request.number }} + HEAD_SHA: ${{ github.event.pull_request.head.sha }} + BASE_REF: ${{ github.base_ref }} + REPO: ${{ github.repository }} + run: node .github/scripts/pr-review.mjs diff --git a/package.json b/package.json index e454f0ae1..d5c11c497 100644 --- a/package.json +++ b/package.json @@ -19,6 +19,7 @@ "lint:scss:fix": "stylelint '**/*.scss' --fix", "lint": "npm run lint:prettier && npm run lint:js && npm run lint:scss", "lint:fix": "npm run lint:prettier:fix && npm run lint:js:fix && npm run lint:scss:fix", + "check:markdown": "node .github/scripts/check-markdown.mjs", "prestart": "npm run build", "start": "eleventy --serve --quiet", "preserve": "npm run build",