Skip to content

Commit 4ae561b

Browse files
feat(seo): lint mdx frontmatter description in CI
新增 scripts/check-frontmatter-description.mjs:默认 --changed 模式 检查新增/修改的 MDX 必须有 description 且 ≥ 60 字符。 接入位置: - .husky/pre-commit:本地提交前拦截 - .github/workflows/content-check.yml:PR 时按 GITHUB_BASE_REF diff - package.json 新增 check:frontmatter / check:frontmatter:all 命令 豁免规则: - content/docs/career/interview-prep/leetcode/ 全部豁免(程序化导入 的题解,靠 lib/seo-description.ts 兜底) - *_translated.{md,mdx}(机翻产物) 老存量页面由 Layer 1 代码层兜底,本 lint 只阻止新增低质量 description。
1 parent f83e622 commit 4ae561b

4 files changed

Lines changed: 310 additions & 2 deletions

File tree

.github/workflows/content-check.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,10 @@ jobs:
7676

7777
- name: Lint image references (non-blocking)
7878
run: pnpm lint:images || echo "[warn] image lint found issues (non-blocking)"
79+
80+
# Block PR if newly added/modified MDX is missing a proper description.
81+
# Old files are grandfathered via lib/seo-description.ts (Layer 1 fallback);
82+
# this check only fires on changed files in the PR (uses GITHUB_BASE_REF diff).
83+
# leetcode/ and _translated.md are exempt — see scripts/check-frontmatter-description.mjs
84+
- name: Check MDX frontmatter description
85+
run: pnpm check:frontmatter

.husky/pre-commit

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,11 @@ pnpm test || exit 1
1111
pnpm check:pnpm-version || true
1212
pnpm check:lockfile || true
1313

14-
# 5) 其余按 lint-staged 处理(如 Prettier)
14+
# 5) 校验新增/修改的 docs MDX 必须有 description(>= 60 字符)
15+
# Bing 2026-05 报告 118 个页面 description 太短,老内容由 lib/seo-description.ts
16+
# 代码层兜底,但新增/修改必须手写,避免再积累低质量 SEO 内容。
17+
# leetcode/ 和 _translated 自动豁免,详见 scripts/check-frontmatter-description.mjs
18+
pnpm check:frontmatter || exit 1
19+
20+
# 6) 其余按 lint-staged 处理(如 Prettier)
1521
pnpm exec lint-staged

package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
"lint:fix": "eslint . --ext .ts,.tsx --fix",
1919
"typecheck": "tsc --noEmit",
2020
"check:pnpm-version": "node scripts/check-pnpm-version.mjs",
21-
"check:lockfile": "node scripts/check-lockfile.mjs"
21+
"check:lockfile": "node scripts/check-lockfile.mjs",
22+
"check:frontmatter": "node scripts/check-frontmatter-description.mjs",
23+
"check:frontmatter:all": "node scripts/check-frontmatter-description.mjs --all"
2224
},
2325
"dependencies": {
2426
"@ai-sdk/google": "^2.0.14",
Lines changed: 293 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,293 @@
1+
#!/usr/bin/env node
2+
/**
3+
* MDX frontmatter `description` 字段校验脚本
4+
*
5+
* 背景
6+
* - Bing Webmaster Tools 2026-05 报告 118 个页面 meta description 太短。
7+
* - 根因:fumadocs 的 docs 页面直接读 MDX frontmatter `description`,没兜底;
8+
* 而 content/docs/ 下 292 个 MDX 里 96 个完全没写 description、67 个写成空字符串、
9+
* 35 个 < 20 字符。
10+
* - 代码层兜底已经做了(lib/seo-description.ts),让所有页面 meta description >= 80 字符。
11+
* 但兜底是"补救",不是质量保证 —— 兜底版本是模板化拼接,比作者手写的精准内容差。
12+
*
13+
* 这个脚本的角色
14+
* - 在 CI/pre-commit 阶段拦截 **新增/修改** 的 MDX 文件,强制作者手写 description。
15+
* - 老文件不返工(grandfather)—— 由 Layer 1 代码兜底兜住。
16+
* - 自动豁免 leetcode/ 目录和 _translated 后缀文件(前者程序化导入太多,后者是机翻产物)。
17+
*
18+
* 用法
19+
* node scripts/check-frontmatter-description.mjs # 默认 --changed
20+
* node scripts/check-frontmatter-description.mjs --changed # 只检查 git 已变更的 mdx(PR/pre-commit 用)
21+
* node scripts/check-frontmatter-description.mjs --all # 扫全部,输出统计报表(不退出非 0)
22+
* node scripts/check-frontmatter-description.mjs --strict # 配合 --all 时遇违规退 1(暂不开放,老文件太多)
23+
*
24+
* 退出码
25+
* 0 通过 / 报表模式
26+
* 1 --changed 模式下发现新增/修改的 MDX 违反规则
27+
*
28+
* 接入位置
29+
* - .husky/pre-commit (pnpm check:frontmatter — --changed 模式)
30+
* - .github/workflows/content-check.yml (CI PR 检查)
31+
*
32+
* 后续可考虑
33+
* - 把 leetcode/ 豁免改为"必须用模板生成",由 Layer 3 的回填脚本保证
34+
* - 把 MIN_LENGTH 提到 100 字符(先保守 60 让老贡献者适应)
35+
*/
36+
37+
import fs from "node:fs";
38+
import path from "node:path";
39+
import { execSync } from "node:child_process";
40+
import matter from "gray-matter";
41+
42+
const ROOT = process.cwd();
43+
const DOCS_DIR = path.join(ROOT, "content", "docs");
44+
45+
/**
46+
* 最短 description 字符数。
47+
* 60 是保守值:Bing 推荐 150-160,但严苛的话所有新 PR 会被拦。先 60 让贡献者适应。
48+
* Layer 1 代码兜底会进一步把太短的拼到 80+,所以最终用户看到的搜索摘要不会真的过短。
49+
*/
50+
const MIN_LENGTH = 60;
51+
52+
/**
53+
* 豁免路径前缀。这些目录下的 MDX 不强制写 description:
54+
* - leetcode/: 96 个题解程序化导入,没人会手写;Layer 1 兜底已用 title+面包屑生成可用摘要
55+
*/
56+
const EXEMPT_PATH_PREFIXES = ["content/docs/career/interview-prep/leetcode/"];
57+
58+
/**
59+
* 豁免文件后缀。
60+
* - _translated.md: 机翻产物,原文 description 不一定能直接译过来;豁免后等人工 review 时补
61+
*/
62+
const EXEMPT_FILE_SUFFIXES = ["_translated.md", "_translated.mdx"];
63+
64+
function isExempt(relPath) {
65+
if (EXEMPT_PATH_PREFIXES.some((p) => relPath.startsWith(p))) return true;
66+
if (EXEMPT_FILE_SUFFIXES.some((s) => relPath.endsWith(s))) return true;
67+
return false;
68+
}
69+
70+
/**
71+
* 解析 mdx 文件返回 { description, hasField }。
72+
* 用 gray-matter 兼容引号 / 多行 / YAML 边缘 case;正则 dirty parsing 不可靠。
73+
*/
74+
function parseDescription(absPath) {
75+
const raw = fs.readFileSync(absPath, "utf-8");
76+
let parsed;
77+
try {
78+
parsed = matter(raw);
79+
} catch (e) {
80+
return {
81+
hasField: false,
82+
description: "",
83+
parseError: e?.message ?? String(e),
84+
};
85+
}
86+
const data = parsed.data ?? {};
87+
const hasField = Object.prototype.hasOwnProperty.call(data, "description");
88+
const description =
89+
typeof data.description === "string" ? data.description.trim() : "";
90+
return { hasField, description };
91+
}
92+
93+
/**
94+
* 列出所有 MDX 文件(递归 content/docs/)。
95+
*/
96+
function listAllMdxFiles() {
97+
const out = [];
98+
function walk(dir) {
99+
const entries = fs.readdirSync(dir, { withFileTypes: true });
100+
for (const entry of entries) {
101+
const full = path.join(dir, entry.name);
102+
if (entry.isDirectory()) {
103+
walk(full);
104+
} else if (
105+
entry.isFile() &&
106+
(entry.name.endsWith(".mdx") || entry.name.endsWith(".md"))
107+
) {
108+
out.push(path.relative(ROOT, full));
109+
}
110+
}
111+
}
112+
if (!fs.existsSync(DOCS_DIR)) return [];
113+
walk(DOCS_DIR);
114+
return out;
115+
}
116+
117+
/**
118+
* 列出当前 PR / pre-commit 阶段已变更的 mdx 文件。
119+
*
120+
* pre-commit: git diff --cached 取暂存区
121+
* GitHub Actions PR: 取 PR head vs base 的 diff(GITHUB_BASE_REF 提供 base 分支)
122+
* 本地 (无 staged 时):取 working tree vs HEAD,确保开发期跑也能看到刚改的文件
123+
*/
124+
function listChangedMdxFiles() {
125+
const candidates = new Set();
126+
127+
/** 把 git 输出按行加进 candidates;只保留 content/docs 下的 mdx/md */
128+
const addLines = (raw) => {
129+
raw
130+
.split("\n")
131+
.map((l) => l.trim())
132+
.filter(Boolean)
133+
.filter(
134+
(l) =>
135+
l.startsWith("content/docs/") &&
136+
(l.endsWith(".mdx") || l.endsWith(".md")),
137+
)
138+
.forEach((l) => candidates.add(l));
139+
};
140+
141+
// Strategy 1: GitHub Actions PR 上下文
142+
const baseRef = process.env.GITHUB_BASE_REF;
143+
if (baseRef) {
144+
try {
145+
// 确保 base 分支引用本地可达(actions/checkout 默认浅克隆)
146+
execSync(`git fetch origin ${baseRef} --depth=1`, { stdio: "ignore" });
147+
const out = execSync(
148+
`git diff --name-only --diff-filter=AM origin/${baseRef}...HEAD`,
149+
{ encoding: "utf-8" },
150+
);
151+
addLines(out);
152+
return [...candidates];
153+
} catch {
154+
// 失败回退到本地策略
155+
}
156+
}
157+
158+
// Strategy 2: pre-commit 暂存区
159+
try {
160+
const staged = execSync("git diff --cached --name-only --diff-filter=AM", {
161+
encoding: "utf-8",
162+
});
163+
addLines(staged);
164+
} catch {
165+
/* 非 git 仓库或无 staged,忽略 */
166+
}
167+
168+
// Strategy 3: working tree vs HEAD(本地开发期跑脚本时看刚改未 stage 的文件)
169+
try {
170+
const wt = execSync("git diff --name-only --diff-filter=AM HEAD", {
171+
encoding: "utf-8",
172+
});
173+
addLines(wt);
174+
} catch {
175+
/* 忽略 */
176+
}
177+
178+
return [...candidates];
179+
}
180+
181+
function emitError({ file, message, line = 1 }) {
182+
// GitHub Actions annotation format,PR 里会显示在文件具体行
183+
if (process.env.GITHUB_ACTIONS) {
184+
console.error(`::error file=${file},line=${line}::${message}`);
185+
} else {
186+
console.error(` ✗ ${file}: ${message}`);
187+
}
188+
}
189+
190+
function emitWarning({ file, message }) {
191+
if (process.env.GITHUB_ACTIONS) {
192+
console.warn(`::warning file=${file}::${message}`);
193+
} else {
194+
console.warn(` ⚠ ${file}: ${message}`);
195+
}
196+
}
197+
198+
function main() {
199+
const args = new Set(process.argv.slice(2));
200+
const mode = args.has("--all") ? "all" : "changed";
201+
const strict = args.has("--strict");
202+
203+
let files = mode === "all" ? listAllMdxFiles() : listChangedMdxFiles();
204+
205+
if (files.length === 0) {
206+
if (mode === "changed") {
207+
console.log(
208+
"✅ check:frontmatter — no changed MDX files in content/docs/",
209+
);
210+
process.exit(0);
211+
} else {
212+
console.log("⚠️ check:frontmatter --all — no MDX files found");
213+
process.exit(0);
214+
}
215+
}
216+
217+
// 报表统计
218+
const stats = {
219+
total: files.length,
220+
exempt: 0,
221+
missing: [],
222+
empty: [],
223+
short: [],
224+
ok: 0,
225+
};
226+
227+
for (const rel of files) {
228+
if (isExempt(rel)) {
229+
stats.exempt++;
230+
continue;
231+
}
232+
const abs = path.join(ROOT, rel);
233+
if (!fs.existsSync(abs)) continue;
234+
const { hasField, description } = parseDescription(abs);
235+
if (!hasField) {
236+
stats.missing.push(rel);
237+
} else if (!description) {
238+
stats.empty.push(rel);
239+
} else if (description.length < MIN_LENGTH) {
240+
stats.short.push({ rel, len: description.length });
241+
} else {
242+
stats.ok++;
243+
}
244+
}
245+
246+
const violations =
247+
stats.missing.length + stats.empty.length + stats.short.length;
248+
249+
console.log(`\n📋 check:frontmatter (mode=${mode})`);
250+
console.log(` scanned: ${stats.total} files`);
251+
console.log(` exempt : ${stats.exempt} (leetcode/ + _translated)`);
252+
console.log(` ok : ${stats.ok}`);
253+
console.log(` missing description field: ${stats.missing.length}`);
254+
console.log(` empty description : ${stats.empty.length}`);
255+
console.log(` short < ${MIN_LENGTH} chars : ${stats.short.length}`);
256+
257+
if (violations === 0) {
258+
console.log("\n✅ all checked files have description >= " + MIN_LENGTH);
259+
process.exit(0);
260+
}
261+
262+
console.log(`\n🚫 ${violations} file(s) need a longer description:\n`);
263+
for (const rel of stats.missing) {
264+
emitError({
265+
file: rel,
266+
message: `Missing \`description\` in frontmatter. Add a 60-160 char summary describing what this page covers (used by search engines and AI assistants).`,
267+
});
268+
}
269+
for (const rel of stats.empty) {
270+
emitError({
271+
file: rel,
272+
message: `Frontmatter has \`description: ""\` (empty). Fill in 60-160 chars describing the page topic for SEO.`,
273+
});
274+
}
275+
for (const { rel, len } of stats.short) {
276+
emitError({
277+
file: rel,
278+
message: `\`description\` is too short (${len} chars, need >= ${MIN_LENGTH}). Expand to 60-160 chars summarizing the page.`,
279+
});
280+
}
281+
282+
console.log(
283+
`\n💡 tip: 在 frontmatter 里加 description: "..." 字段。\n 推荐 60-160 字符,覆盖:本页主题 + 关键技术点 + 适用读者。\n leetcode/ 目录和 _translated.md 文件自动豁免(由代码层兜底,见 lib/seo-description.ts)。`,
284+
);
285+
286+
// changed 模式默认严格;all 模式只在 --strict 下报错
287+
if (mode === "changed" || strict) {
288+
process.exit(1);
289+
}
290+
process.exit(0);
291+
}
292+
293+
main();

0 commit comments

Comments
 (0)