|
| 1 | +#!/usr/bin/env node |
| 2 | +/** |
| 3 | + * MDX frontmatter `description` 字段校验脚本 |
| 4 | + * |
| 5 | + * 背景 |
| 6 | + * - Bing Webmaster Tools 2026-05 报告 118 个页面 meta description 太短。 |
| 7 | + * - 根因:fumadocs 的 docs 页面直接读 MDX frontmatter `description`,没兜底; |
| 8 | + * 而 content/docs/ 下 292 个 MDX 里 96 个完全没写 description、67 个写成空字符串、 |
| 9 | + * 35 个 < 20 字符。 |
| 10 | + * - 代码层兜底已经做了(lib/seo-description.ts),让所有页面 meta description >= 80 字符。 |
| 11 | + * 但兜底是"补救",不是质量保证 —— 兜底版本是模板化拼接,比作者手写的精准内容差。 |
| 12 | + * |
| 13 | + * 这个脚本的角色 |
| 14 | + * - 在 CI/pre-commit 阶段拦截 **新增/修改** 的 MDX 文件,强制作者手写 description。 |
| 15 | + * - 老文件不返工(grandfather)—— 由 Layer 1 代码兜底兜住。 |
| 16 | + * - 自动豁免 leetcode/ 目录和 _translated 后缀文件(前者程序化导入太多,后者是机翻产物)。 |
| 17 | + * |
| 18 | + * 用法 |
| 19 | + * node scripts/check-frontmatter-description.mjs # 默认 --changed |
| 20 | + * node scripts/check-frontmatter-description.mjs --changed # 只检查 git 已变更的 mdx(PR/pre-commit 用) |
| 21 | + * node scripts/check-frontmatter-description.mjs --all # 扫全部,输出统计报表(不退出非 0) |
| 22 | + * node scripts/check-frontmatter-description.mjs --strict # 配合 --all 时遇违规退 1(暂不开放,老文件太多) |
| 23 | + * |
| 24 | + * 退出码 |
| 25 | + * 0 通过 / 报表模式 |
| 26 | + * 1 --changed 模式下发现新增/修改的 MDX 违反规则 |
| 27 | + * |
| 28 | + * 接入位置 |
| 29 | + * - .husky/pre-commit (pnpm check:frontmatter — --changed 模式) |
| 30 | + * - .github/workflows/content-check.yml (CI PR 检查) |
| 31 | + * |
| 32 | + * 后续可考虑 |
| 33 | + * - 把 leetcode/ 豁免改为"必须用模板生成",由 Layer 3 的回填脚本保证 |
| 34 | + * - 把 MIN_LENGTH 提到 100 字符(先保守 60 让老贡献者适应) |
| 35 | + */ |
| 36 | + |
| 37 | +import fs from "node:fs"; |
| 38 | +import path from "node:path"; |
| 39 | +import { execSync } from "node:child_process"; |
| 40 | +import matter from "gray-matter"; |
| 41 | + |
| 42 | +const ROOT = process.cwd(); |
| 43 | +const DOCS_DIR = path.join(ROOT, "content", "docs"); |
| 44 | + |
| 45 | +/** |
| 46 | + * 最短 description 字符数。 |
| 47 | + * 60 是保守值:Bing 推荐 150-160,但严苛的话所有新 PR 会被拦。先 60 让贡献者适应。 |
| 48 | + * Layer 1 代码兜底会进一步把太短的拼到 80+,所以最终用户看到的搜索摘要不会真的过短。 |
| 49 | + */ |
| 50 | +const MIN_LENGTH = 60; |
| 51 | + |
| 52 | +/** |
| 53 | + * 豁免路径前缀。这些目录下的 MDX 不强制写 description: |
| 54 | + * - leetcode/: 96 个题解程序化导入,没人会手写;Layer 1 兜底已用 title+面包屑生成可用摘要 |
| 55 | + */ |
| 56 | +const EXEMPT_PATH_PREFIXES = ["content/docs/career/interview-prep/leetcode/"]; |
| 57 | + |
| 58 | +/** |
| 59 | + * 豁免文件后缀。 |
| 60 | + * - _translated.md: 机翻产物,原文 description 不一定能直接译过来;豁免后等人工 review 时补 |
| 61 | + */ |
| 62 | +const EXEMPT_FILE_SUFFIXES = ["_translated.md", "_translated.mdx"]; |
| 63 | + |
| 64 | +function isExempt(relPath) { |
| 65 | + if (EXEMPT_PATH_PREFIXES.some((p) => relPath.startsWith(p))) return true; |
| 66 | + if (EXEMPT_FILE_SUFFIXES.some((s) => relPath.endsWith(s))) return true; |
| 67 | + return false; |
| 68 | +} |
| 69 | + |
| 70 | +/** |
| 71 | + * 解析 mdx 文件返回 { description, hasField }。 |
| 72 | + * 用 gray-matter 兼容引号 / 多行 / YAML 边缘 case;正则 dirty parsing 不可靠。 |
| 73 | + */ |
| 74 | +function parseDescription(absPath) { |
| 75 | + const raw = fs.readFileSync(absPath, "utf-8"); |
| 76 | + let parsed; |
| 77 | + try { |
| 78 | + parsed = matter(raw); |
| 79 | + } catch (e) { |
| 80 | + return { |
| 81 | + hasField: false, |
| 82 | + description: "", |
| 83 | + parseError: e?.message ?? String(e), |
| 84 | + }; |
| 85 | + } |
| 86 | + const data = parsed.data ?? {}; |
| 87 | + const hasField = Object.prototype.hasOwnProperty.call(data, "description"); |
| 88 | + const description = |
| 89 | + typeof data.description === "string" ? data.description.trim() : ""; |
| 90 | + return { hasField, description }; |
| 91 | +} |
| 92 | + |
| 93 | +/** |
| 94 | + * 列出所有 MDX 文件(递归 content/docs/)。 |
| 95 | + */ |
| 96 | +function listAllMdxFiles() { |
| 97 | + const out = []; |
| 98 | + function walk(dir) { |
| 99 | + const entries = fs.readdirSync(dir, { withFileTypes: true }); |
| 100 | + for (const entry of entries) { |
| 101 | + const full = path.join(dir, entry.name); |
| 102 | + if (entry.isDirectory()) { |
| 103 | + walk(full); |
| 104 | + } else if ( |
| 105 | + entry.isFile() && |
| 106 | + (entry.name.endsWith(".mdx") || entry.name.endsWith(".md")) |
| 107 | + ) { |
| 108 | + out.push(path.relative(ROOT, full)); |
| 109 | + } |
| 110 | + } |
| 111 | + } |
| 112 | + if (!fs.existsSync(DOCS_DIR)) return []; |
| 113 | + walk(DOCS_DIR); |
| 114 | + return out; |
| 115 | +} |
| 116 | + |
| 117 | +/** |
| 118 | + * 列出当前 PR / pre-commit 阶段已变更的 mdx 文件。 |
| 119 | + * |
| 120 | + * pre-commit: git diff --cached 取暂存区 |
| 121 | + * GitHub Actions PR: 取 PR head vs base 的 diff(GITHUB_BASE_REF 提供 base 分支) |
| 122 | + * 本地 (无 staged 时):取 working tree vs HEAD,确保开发期跑也能看到刚改的文件 |
| 123 | + */ |
| 124 | +function listChangedMdxFiles() { |
| 125 | + const candidates = new Set(); |
| 126 | + |
| 127 | + /** 把 git 输出按行加进 candidates;只保留 content/docs 下的 mdx/md */ |
| 128 | + const addLines = (raw) => { |
| 129 | + raw |
| 130 | + .split("\n") |
| 131 | + .map((l) => l.trim()) |
| 132 | + .filter(Boolean) |
| 133 | + .filter( |
| 134 | + (l) => |
| 135 | + l.startsWith("content/docs/") && |
| 136 | + (l.endsWith(".mdx") || l.endsWith(".md")), |
| 137 | + ) |
| 138 | + .forEach((l) => candidates.add(l)); |
| 139 | + }; |
| 140 | + |
| 141 | + // Strategy 1: GitHub Actions PR 上下文 |
| 142 | + const baseRef = process.env.GITHUB_BASE_REF; |
| 143 | + if (baseRef) { |
| 144 | + try { |
| 145 | + // 确保 base 分支引用本地可达(actions/checkout 默认浅克隆) |
| 146 | + execSync(`git fetch origin ${baseRef} --depth=1`, { stdio: "ignore" }); |
| 147 | + const out = execSync( |
| 148 | + `git diff --name-only --diff-filter=AM origin/${baseRef}...HEAD`, |
| 149 | + { encoding: "utf-8" }, |
| 150 | + ); |
| 151 | + addLines(out); |
| 152 | + return [...candidates]; |
| 153 | + } catch { |
| 154 | + // 失败回退到本地策略 |
| 155 | + } |
| 156 | + } |
| 157 | + |
| 158 | + // Strategy 2: pre-commit 暂存区 |
| 159 | + try { |
| 160 | + const staged = execSync("git diff --cached --name-only --diff-filter=AM", { |
| 161 | + encoding: "utf-8", |
| 162 | + }); |
| 163 | + addLines(staged); |
| 164 | + } catch { |
| 165 | + /* 非 git 仓库或无 staged,忽略 */ |
| 166 | + } |
| 167 | + |
| 168 | + // Strategy 3: working tree vs HEAD(本地开发期跑脚本时看刚改未 stage 的文件) |
| 169 | + try { |
| 170 | + const wt = execSync("git diff --name-only --diff-filter=AM HEAD", { |
| 171 | + encoding: "utf-8", |
| 172 | + }); |
| 173 | + addLines(wt); |
| 174 | + } catch { |
| 175 | + /* 忽略 */ |
| 176 | + } |
| 177 | + |
| 178 | + return [...candidates]; |
| 179 | +} |
| 180 | + |
| 181 | +function emitError({ file, message, line = 1 }) { |
| 182 | + // GitHub Actions annotation format,PR 里会显示在文件具体行 |
| 183 | + if (process.env.GITHUB_ACTIONS) { |
| 184 | + console.error(`::error file=${file},line=${line}::${message}`); |
| 185 | + } else { |
| 186 | + console.error(` ✗ ${file}: ${message}`); |
| 187 | + } |
| 188 | +} |
| 189 | + |
| 190 | +function emitWarning({ file, message }) { |
| 191 | + if (process.env.GITHUB_ACTIONS) { |
| 192 | + console.warn(`::warning file=${file}::${message}`); |
| 193 | + } else { |
| 194 | + console.warn(` ⚠ ${file}: ${message}`); |
| 195 | + } |
| 196 | +} |
| 197 | + |
| 198 | +function main() { |
| 199 | + const args = new Set(process.argv.slice(2)); |
| 200 | + const mode = args.has("--all") ? "all" : "changed"; |
| 201 | + const strict = args.has("--strict"); |
| 202 | + |
| 203 | + let files = mode === "all" ? listAllMdxFiles() : listChangedMdxFiles(); |
| 204 | + |
| 205 | + if (files.length === 0) { |
| 206 | + if (mode === "changed") { |
| 207 | + console.log( |
| 208 | + "✅ check:frontmatter — no changed MDX files in content/docs/", |
| 209 | + ); |
| 210 | + process.exit(0); |
| 211 | + } else { |
| 212 | + console.log("⚠️ check:frontmatter --all — no MDX files found"); |
| 213 | + process.exit(0); |
| 214 | + } |
| 215 | + } |
| 216 | + |
| 217 | + // 报表统计 |
| 218 | + const stats = { |
| 219 | + total: files.length, |
| 220 | + exempt: 0, |
| 221 | + missing: [], |
| 222 | + empty: [], |
| 223 | + short: [], |
| 224 | + ok: 0, |
| 225 | + }; |
| 226 | + |
| 227 | + for (const rel of files) { |
| 228 | + if (isExempt(rel)) { |
| 229 | + stats.exempt++; |
| 230 | + continue; |
| 231 | + } |
| 232 | + const abs = path.join(ROOT, rel); |
| 233 | + if (!fs.existsSync(abs)) continue; |
| 234 | + const { hasField, description } = parseDescription(abs); |
| 235 | + if (!hasField) { |
| 236 | + stats.missing.push(rel); |
| 237 | + } else if (!description) { |
| 238 | + stats.empty.push(rel); |
| 239 | + } else if (description.length < MIN_LENGTH) { |
| 240 | + stats.short.push({ rel, len: description.length }); |
| 241 | + } else { |
| 242 | + stats.ok++; |
| 243 | + } |
| 244 | + } |
| 245 | + |
| 246 | + const violations = |
| 247 | + stats.missing.length + stats.empty.length + stats.short.length; |
| 248 | + |
| 249 | + console.log(`\n📋 check:frontmatter (mode=${mode})`); |
| 250 | + console.log(` scanned: ${stats.total} files`); |
| 251 | + console.log(` exempt : ${stats.exempt} (leetcode/ + _translated)`); |
| 252 | + console.log(` ok : ${stats.ok}`); |
| 253 | + console.log(` missing description field: ${stats.missing.length}`); |
| 254 | + console.log(` empty description : ${stats.empty.length}`); |
| 255 | + console.log(` short < ${MIN_LENGTH} chars : ${stats.short.length}`); |
| 256 | + |
| 257 | + if (violations === 0) { |
| 258 | + console.log("\n✅ all checked files have description >= " + MIN_LENGTH); |
| 259 | + process.exit(0); |
| 260 | + } |
| 261 | + |
| 262 | + console.log(`\n🚫 ${violations} file(s) need a longer description:\n`); |
| 263 | + for (const rel of stats.missing) { |
| 264 | + emitError({ |
| 265 | + file: rel, |
| 266 | + message: `Missing \`description\` in frontmatter. Add a 60-160 char summary describing what this page covers (used by search engines and AI assistants).`, |
| 267 | + }); |
| 268 | + } |
| 269 | + for (const rel of stats.empty) { |
| 270 | + emitError({ |
| 271 | + file: rel, |
| 272 | + message: `Frontmatter has \`description: ""\` (empty). Fill in 60-160 chars describing the page topic for SEO.`, |
| 273 | + }); |
| 274 | + } |
| 275 | + for (const { rel, len } of stats.short) { |
| 276 | + emitError({ |
| 277 | + file: rel, |
| 278 | + message: `\`description\` is too short (${len} chars, need >= ${MIN_LENGTH}). Expand to 60-160 chars summarizing the page.`, |
| 279 | + }); |
| 280 | + } |
| 281 | + |
| 282 | + console.log( |
| 283 | + `\n💡 tip: 在 frontmatter 里加 description: "..." 字段。\n 推荐 60-160 字符,覆盖:本页主题 + 关键技术点 + 适用读者。\n leetcode/ 目录和 _translated.md 文件自动豁免(由代码层兜底,见 lib/seo-description.ts)。`, |
| 284 | + ); |
| 285 | + |
| 286 | + // changed 模式默认严格;all 模式只在 --strict 下报错 |
| 287 | + if (mode === "changed" || strict) { |
| 288 | + process.exit(1); |
| 289 | + } |
| 290 | + process.exit(0); |
| 291 | +} |
| 292 | + |
| 293 | +main(); |
0 commit comments