diff --git a/app/api/markdown/[[...slug]]/route.ts b/app/api/markdown/[[...slug]]/route.ts index 61006c812..753af3d59 100644 --- a/app/api/markdown/[[...slug]]/route.ts +++ b/app/api/markdown/[[...slug]]/route.ts @@ -7,6 +7,269 @@ export const dynamic = "force-dynamic"; // Regex pattern for removing .md extension const MD_EXTENSION_REGEX = /\.md$/; +// Regex patterns for MDX to Markdown compilation (top-level for performance) +const FRONTMATTER_REGEX = /^---\n([\s\S]*?)\n---\n?/; +const IMPORT_FROM_REGEX = /^import\s+.*?from\s+['"].*?['"];?\s*$/gm; +const IMPORT_DIRECT_REGEX = /^import\s+['"].*?['"];?\s*$/gm; +const IMPORT_DESTRUCTURE_REGEX = + /^import\s*\{[\s\S]*?\}\s*from\s*['"].*?['"];?\s*$/gm; +const EXPORT_REGEX = + /^export\s+(const|let|var|function|default)\s+[\s\S]*?(?=\n(?:import|export|#|\n|$))/gm; +// JSX attribute pattern that properly handles: +// - Quoted strings containing ">" characters +// - JSX expressions in curly braces containing ">" (arrow functions, comparisons) +// - Multiline attributes (newlines allowed between attributes) +// - Up to 3 levels of brace nesting for style={{outer: {inner: 1}}} patterns +// The brace pattern uses a recursive-like structure to handle nested braces +const BRACE_CONTENT_L0 = "[^{}]*"; // Innermost: no braces +const BRACE_CONTENT_L1 = `(?:${BRACE_CONTENT_L0}|\\{${BRACE_CONTENT_L0}\\})*`; // 1 level +const BRACE_CONTENT_L2 = `(?:${BRACE_CONTENT_L0}|\\{${BRACE_CONTENT_L1}\\})*`; // 2 levels +const BRACE_PATTERN = `\\{${BRACE_CONTENT_L2}\\}`; // Full brace expression (supports 3 levels) +const JSX_ATTRS_PATTERN = `(?:[^>"'{}]|"[^"]*"|'[^']*'|${BRACE_PATTERN})*`; +const SELF_CLOSING_JSX_REGEX = new RegExp( + `<([A-Z][a-zA-Z0-9.]*)${JSX_ATTRS_PATTERN}\\/>`, + "g" +); +const JSX_WITH_CHILDREN_REGEX = new RegExp( + `<([A-Z][a-zA-Z0-9.]*)${JSX_ATTRS_PATTERN}>([\\s\\S]*?)<\\/\\1>`, + "g" +); +const CODE_BLOCK_REGEX = /```[\s\S]*?```/g; +const JSX_EXPRESSION_REGEX = /\{[^}]+\}/g; +const EXCESSIVE_NEWLINES_REGEX = /\n{3,}/g; +const CODE_BLOCK_PLACEHOLDER_REGEX = /__CODE_BLOCK_(\d+)__/g; + +// Regex for detecting markdown list items and numbered lists +const UNORDERED_LIST_REGEX = /^[-*+]\s/; +const ORDERED_LIST_REGEX = /^\d+[.)]\s/; + +// Regex for extracting frontmatter fields +// Handles: "double quoted", 'single quoted', or unquoted values +// Group 1 = double-quoted content, Group 2 = single-quoted content, Group 3 = unquoted/fallback +// Quoted patterns require closing quote at end of line to prevent apostrophes being misread as delimiters +const TITLE_REGEX = /title:\s*(?:"([^"]*)"\s*$|'([^']*)'\s*$|([^\n]+))/; +const DESCRIPTION_REGEX = + /description:\s*(?:"([^"]*)"\s*$|'([^']*)'\s*$|([^\n]+))/; + +// Regex for detecting leading whitespace on lines +const LEADING_WHITESPACE_REGEX = /^[ \t]+/; + +/** + * Removes consistent leading indentation from all lines of text. + * This normalizes content that was indented inside JSX components. + * Code block markers (```) are ignored when calculating minimum indent + * since they typically start at column 0 in MDX files. + */ +function dedent(text: string): string { + const lines = text.split("\n"); + + // Find minimum indentation, ignoring: + // - Empty lines + // - Code block markers (lines starting with ```) + let minIndent = Number.POSITIVE_INFINITY; + for (const line of lines) { + const trimmed = line.trim(); + if (trimmed === "" || trimmed.startsWith("```")) { + continue; // Ignore empty lines and code block markers + } + const match = line.match(LEADING_WHITESPACE_REGEX); + const indent = match ? match[0].length : 0; + if (indent < minIndent) { + minIndent = indent; + } + } + + // If no indentation found, return as-is + if (minIndent === 0 || minIndent === Number.POSITIVE_INFINITY) { + return text; + } + + // Remove the minimum indentation from each line (except code block content) + return lines + .map((line) => { + const trimmed = line.trim(); + // Calculate leading whitespace length for this line + const leadingMatch = line.match(LEADING_WHITESPACE_REGEX); + const leadingLength = leadingMatch ? leadingMatch[0].length : 0; + // Don't modify empty lines or lines with less indentation than min + if (trimmed === "" || leadingLength < minIndent) { + return line.trimStart(); + } + // Preserve code block markers - just remove leading whitespace + // This matches the logic that ignores them when calculating minIndent + if (trimmed.startsWith("```")) { + return trimmed; + } + return line.slice(minIndent); + }) + .join("\n"); +} + +/** + * Strips surrounding quotes from a value if present. + * Used for unquoted fallback values that may contain quotes due to apostrophe handling. + */ +function stripSurroundingQuotes(value: string): string { + const trimmed = value.trim(); + if ( + (trimmed.startsWith('"') && trimmed.endsWith('"')) || + (trimmed.startsWith("'") && trimmed.endsWith("'")) + ) { + return trimmed.slice(1, -1); + } + return trimmed; +} + +/** + * Extracts title and description from frontmatter. + * Handles double-quoted, single-quoted, and unquoted YAML values. + */ +function extractFrontmatterMeta(frontmatter: string): { + title: string; + description: string; +} { + const titleMatch = frontmatter.match(TITLE_REGEX); + const descriptionMatch = frontmatter.match(DESCRIPTION_REGEX); + + // Extract from whichever capture group matched: + // Group 1 = double-quoted, Group 2 = single-quoted, Group 3 = unquoted/fallback + // For group 3 (fallback), strip surrounding quotes if present + const title = + titleMatch?.[1] ?? + titleMatch?.[2] ?? + stripSurroundingQuotes(titleMatch?.[3] ?? ""); + const description = + descriptionMatch?.[1] ?? + descriptionMatch?.[2] ?? + stripSurroundingQuotes(descriptionMatch?.[3] ?? ""); + + return { + title: title || "Arcade Documentation", + description, + }; +} + +/** + * Normalizes indentation in the final output. + * Removes stray leading whitespace outside code blocks while preserving + * meaningful markdown indentation (nested lists, blockquotes). + */ +function normalizeIndentation(text: string): string { + const finalLines: string[] = []; + let inCodeBlock = false; + + for (const line of text.split("\n")) { + if (line.trim().startsWith("```")) { + inCodeBlock = !inCodeBlock; + finalLines.push(line.trimStart()); // Code block markers should start at column 0 + } else if (inCodeBlock) { + finalLines.push(line); // Preserve indentation inside code blocks + } else { + const trimmed = line.trimStart(); + // Preserve indentation for nested list items and blockquotes + const isListItem = + UNORDERED_LIST_REGEX.test(trimmed) || ORDERED_LIST_REGEX.test(trimmed); + const isBlockquote = trimmed.startsWith(">"); + if ((isListItem || isBlockquote) && line.startsWith(" ")) { + // Keep markdown-meaningful indentation (but normalize to 2-space increments) + const leadingSpaces = line.length - line.trimStart().length; + const normalizedIndent = " ".repeat(Math.floor(leadingSpaces / 2)); + finalLines.push(normalizedIndent + trimmed); + } else { + finalLines.push(trimmed); // Remove leading whitespace for other lines + } + } + } + + return finalLines.join("\n"); +} + +/** + * Compiles MDX content to clean markdown by: + * - Preserving frontmatter + * - Removing import statements + * - Converting JSX components to their text content + * - Preserving standard markdown + * - Providing fallback content for component-only pages + */ +function compileMdxToMarkdown(content: string, pagePath: string): string { + let result = content; + + // Extract and preserve frontmatter if present + let frontmatter = ""; + const frontmatterMatch = result.match(FRONTMATTER_REGEX); + if (frontmatterMatch) { + frontmatter = frontmatterMatch[0]; + result = result.slice(frontmatterMatch[0].length); + } + + // Remove import statements (various formats) + result = result.replace(IMPORT_FROM_REGEX, ""); + result = result.replace(IMPORT_DIRECT_REGEX, ""); + result = result.replace(IMPORT_DESTRUCTURE_REGEX, ""); + + // Remove export statements (like export const metadata) + result = result.replace(EXPORT_REGEX, ""); + + // Process self-closing JSX components (e.g., or ) + // Handles components with dots like + result = result.replace(SELF_CLOSING_JSX_REGEX, ""); + + // Process JSX components with children - extract the text content + // Handles components with dots like content + // Keep processing until no more JSX components remain + let previousResult = ""; + while (previousResult !== result) { + previousResult = result; + // Match opening tag, capture tag name (with dots), and content until matching closing tag + // Apply dedent to each extracted piece to normalize indentation + result = result.replace(JSX_WITH_CHILDREN_REGEX, (_, _tag, innerContent) => + dedent(innerContent.trim()) + ); + } + + // Remove any remaining JSX expressions like {variable} or {expression} + // But preserve code blocks by temporarily replacing them + const codeBlocks: string[] = []; + result = result.replace(CODE_BLOCK_REGEX, (match) => { + codeBlocks.push(match); + return `__CODE_BLOCK_${codeBlocks.length - 1}__`; + }); + + // Now remove JSX expressions outside code blocks + result = result.replace(JSX_EXPRESSION_REGEX, ""); + + // Restore code blocks (return original placeholder if index doesn't exist) + result = result.replace( + CODE_BLOCK_PLACEHOLDER_REGEX, + (match, index) => codeBlocks[Number.parseInt(index, 10)] ?? match + ); + + // Normalize indentation (remove stray whitespace, preserve meaningful markdown indentation) + result = normalizeIndentation(result); + + // Clean up excessive blank lines (more than 2 consecutive) + result = result.replace(EXCESSIVE_NEWLINES_REGEX, "\n\n"); + + // Trim leading/trailing whitespace + result = result.trim(); + + // If content is essentially empty (component-only page), provide fallback + if (!result || result.length < 10) { + const { title, description } = extractFrontmatterMeta(frontmatter); + const htmlUrl = `https://docs.arcade.dev${pagePath}`; + return `${frontmatter}# ${title} + +${description} + +This page contains interactive content. Visit the full page at: ${htmlUrl} +`; + } + + // Reconstruct with frontmatter + return `${frontmatter}${result}\n`; +} + export async function GET( request: NextRequest, _context: { params: Promise<{ slug?: string[] }> } @@ -31,13 +294,16 @@ export async function GET( return new NextResponse("Markdown file not found", { status: 404 }); } - const content = await readFile(filePath, "utf-8"); + const rawContent = await readFile(filePath, "utf-8"); + + // Compile MDX to clean markdown + const content = compileMdxToMarkdown(rawContent, pathWithoutMd); - // Return the raw markdown with proper headers + // Return the compiled markdown with proper headers return new NextResponse(content, { status: 200, headers: { - "Content-Type": "text/plain; charset=utf-8", + "Content-Type": "text/markdown; charset=utf-8", "Content-Disposition": "inline", }, }); diff --git a/app/layout.tsx b/app/layout.tsx index 56fdf950f..bc3a1f8b9 100644 --- a/app/layout.tsx +++ b/app/layout.tsx @@ -104,6 +104,13 @@ export default async function RootLayout({ + {pathname !== "/" && ( + + )} {lang !== "en" && (