From 49ab15d065893665f8a37ef415feb5a9c6c878e2 Mon Sep 17 00:00:00 2001 From: uditDewan Date: Thu, 11 Jun 2026 01:21:04 -0400 Subject: [PATCH] fix: git eager VCS comma author names, non-ASCII paths, json dir asset links, og:locale MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - fix(utils): parse git log author names containing commas correctly The eager VCS strategy parsed `t:%ct,a:%an` lines with split(','), which truncated author names like "Doe, John" to just "Doe". Now uses an anchored regex so only the first comma (the t:/a: delimiter) splits the fields. - fix(utils): disable core.quotepath so non-ASCII file paths are tracked Git's default core.quotepath=true causes paths like "docs/café.md" to be octal-escaped in --name-status output, making file-info map lookups fail silently for all non-ASCII filenames. Now passes -c core.quotepath=false. - fix(mdx-loader): use /\.json$/ regex for .json → .raw extension swap The string replace('.json','.raw') replaced the first occurrence, so a link like `../dir.json/data.json` produced `../dir.raw/data.json`. Now anchored to the end of the path. - fix(theme-classic): use replaceAll for og:locale BCP 47 → underscore Multi-subtag locales like zh-Hans-CN produced zh_Hans-CN (invalid OG locale). All dashes must be replaced, not just the first. Each fix is covered by a regression test verified to fail on the original code and pass after the fix. --- .../__tests__/__fixtures__/dir.json/data.json | 1 + .../transformLinks/__tests__/index.test.ts | 8 ++++ .../src/remark/transformLinks/index.ts | 4 +- .../src/theme/SiteMetadata/index.tsx | 4 +- .../src/vcs/__tests__/gitUtils.test.ts | 43 +++++++++++++++++++ packages/docusaurus-utils/src/vcs/gitUtils.ts | 19 ++++---- 6 files changed, 69 insertions(+), 10 deletions(-) create mode 100644 packages/docusaurus-mdx-loader/src/remark/transformLinks/__tests__/__fixtures__/dir.json/data.json diff --git a/packages/docusaurus-mdx-loader/src/remark/transformLinks/__tests__/__fixtures__/dir.json/data.json b/packages/docusaurus-mdx-loader/src/remark/transformLinks/__tests__/__fixtures__/dir.json/data.json new file mode 100644 index 000000000000..6d9590305133 --- /dev/null +++ b/packages/docusaurus-mdx-loader/src/remark/transformLinks/__tests__/__fixtures__/dir.json/data.json @@ -0,0 +1 @@ +{"foo": "bar"} diff --git a/packages/docusaurus-mdx-loader/src/remark/transformLinks/__tests__/index.test.ts b/packages/docusaurus-mdx-loader/src/remark/transformLinks/__tests__/index.test.ts index e33ce8fc785b..273f4976b81b 100644 --- a/packages/docusaurus-mdx-loader/src/remark/transformLinks/__tests__/index.test.ts +++ b/packages/docusaurus-mdx-loader/src/remark/transformLinks/__tests__/index.test.ts @@ -80,6 +80,14 @@ describe('transformLinks plugin', () => { expect(result).toMatchInlineSnapshot(`"[file](dir/file.zip)"`); }); + it('transforms json file links inside a directory with ".json" in its name', async () => { + const result = await processContent(`[json](../dir.json/data.json)`); + // The ".raw" extension swap must only apply to the file extension, + // not to the first ".json" occurrence (here, the directory name) + expect(result).toContain('require("./../dir.json/data.raw!=!'); + expect(result).toContain('!./../dir.json/data.json").default'); + }); + it('does not transform existing dotted directory links to asset requires', async () => { const result = await processContent( `[directory](../dotted-directory.whatever)`, diff --git a/packages/docusaurus-mdx-loader/src/remark/transformLinks/index.ts b/packages/docusaurus-mdx-loader/src/remark/transformLinks/index.ts index 2642880fa885..8058f78e8e14 100644 --- a/packages/docusaurus-mdx-loader/src/remark/transformLinks/index.ts +++ b/packages/docusaurus-mdx-loader/src/remark/transformLinks/index.ts @@ -106,8 +106,10 @@ async function toAssetRequireNode( const requireString = `${ // A hack to stop Webpack from using its built-in loader to parse JSON + // Note: only replace the file extension at the end of the path: + // the ".json" string can also appear in directory/file names path.extname(relativeAssetPath) === '.json' - ? `${relativeAssetPath.replace('.json', '.raw')}!=` + ? `${relativeAssetPath.replace(/\.json$/, '.raw')}!=` : '' }${context.inlineMarkdownLinkFileLoader}${ escapePath(relativeAssetPath) + search diff --git a/packages/docusaurus-theme-classic/src/theme/SiteMetadata/index.tsx b/packages/docusaurus-theme-classic/src/theme/SiteMetadata/index.tsx index d116f17af005..46a30a1b3f2c 100644 --- a/packages/docusaurus-theme-classic/src/theme/SiteMetadata/index.tsx +++ b/packages/docusaurus-theme-classic/src/theme/SiteMetadata/index.tsx @@ -34,8 +34,10 @@ function AlternateLangHeaders(): ReactNode { // using underscores instead of dashes. // See https://ogp.me/#optional // See https://en.wikipedia.org/wiki/IETF_language_tag) + // Note: BCP 47 tags can have multiple subtags (e.g. zh-Hans-CN), + // so all dashes must be replaced, not just the first one const bcp47ToOpenGraphLocale = (code: string): string => - code.replace('-', '_'); + code.replaceAll('-', '_'); // Note: it is fine to use both "x-default" and "en" to target the same url // See https://www.searchviu.com/en/multiple-hreflang-tags-one-url/ diff --git a/packages/docusaurus-utils/src/vcs/__tests__/gitUtils.test.ts b/packages/docusaurus-utils/src/vcs/__tests__/gitUtils.test.ts index 71cbe1903a52..b3fe7d4c8738 100644 --- a/packages/docusaurus-utils/src/vcs/__tests__/gitUtils.test.ts +++ b/packages/docusaurus-utils/src/vcs/__tests__/gitUtils.test.ts @@ -357,6 +357,49 @@ describe('commit info APIs', () => { ); }); + it('returns files info for author names containing commas', async () => { + const {repoDir, git} = await createGitRepoEmpty(); + await git.commitFile('test.txt', { + commitDate: '2020-06-19', + commitAuthor: 'Doe, John ', + }); + + const filesInfo = await getGitRepositoryFilesInfo(repoDir); + expect(filesInfo.get('test.txt')).toEqual({ + creation: { + author: 'Doe, John', + timestamp: new Date('2020-06-19').getTime(), + }, + lastUpdate: { + author: 'Doe, John', + timestamp: new Date('2020-06-19').getTime(), + }, + }); + }); + + it('returns files info for non-ASCII file paths', async () => { + const {repoDir, git} = await createGitRepoEmpty(); + // Write the file and use "git add ." to avoid shell encoding issues + await fs.outputFile( + path.join(repoDir, 'docs', 'café.md'), + 'Some content', + ); + await git.addAll(); + await git.commit('Add doc', '2020-06-19', 'Seb '); + + const filesInfo = await getGitRepositoryFilesInfo(repoDir); + expect(filesInfo.get('docs/café.md')).toEqual({ + creation: { + author: 'Seb', + timestamp: new Date('2020-06-19').getTime(), + }, + lastUpdate: { + author: 'Seb', + timestamp: new Date('2020-06-19').getTime(), + }, + }); + }); + it('returns files info', async () => { const repoDir = await repoDirPromise; diff --git a/packages/docusaurus-utils/src/vcs/gitUtils.ts b/packages/docusaurus-utils/src/vcs/gitUtils.ts index 292cd12fc45c..1944d8c2fb7c 100644 --- a/packages/docusaurus-utils/src/vcs/gitUtils.ts +++ b/packages/docusaurus-utils/src/vcs/gitUtils.ts @@ -450,6 +450,11 @@ export async function getGitRepositoryFilesInfo( // See https://github.com/facebook/docusaurus/pull/10022 '-c', 'log.showSignature=false', + // Do not quote/escape non-ASCII file paths in the --name-status output + // Otherwise paths like "docs/café.md" are emitted as + // "\"docs/caf\\303\\251.md\"" and can't be matched to real file paths + '-c', + 'core.quotepath=false', // The git command we want to run 'log', // Format each history entry as t: @@ -485,14 +490,12 @@ The command exited with code ${result.exitCode}: ${result.stderr}`, const runningMap: GitFileInfoMap = new Map(); for (const logLine of logLines) { - if (logLine.startsWith('t:')) { - // t:,a: - const [timestampStr, authorStr] = logLine.split(',') as [string, string]; - const timestamp = Number.parseInt(timestampStr.slice(2), 10) * 1000; - const author = authorStr.slice(2); - - runningDate = timestamp; - runningAuthor = author; + // t:,a: + // Note: the author name can contain commas, so we can't just split(',') + const entryMatch = logLine.match(/^t:(?\d+),a:(?.*)$/); + if (entryMatch) { + runningDate = Number.parseInt(entryMatch.groups!.timestamp!, 10) * 1000; + runningAuthor = entryMatch.groups!.author!; } // TODO the code below doesn't handle delete/move/rename operations properly