diff --git a/components/ui/Autocomplete.tsx b/components/ui/Autocomplete.tsx
index 36ea233c4..0b75cb1e5 100644
--- a/components/ui/Autocomplete.tsx
+++ b/components/ui/Autocomplete.tsx
@@ -29,7 +29,11 @@ import { MenuItem } from "@telegraph/menu";
import { Tag } from "@telegraph/tag";
import { Code, Text } from "@telegraph/typography";
-import { DocsSearchItem, EndpointSearchItem } from "@/types";
+import {
+ DocsSearchItem,
+ EndpointSearchItem,
+ EnhancedDocsSearchItem,
+} from "@/types";
import { useInkeepModal } from "../AiChatButton";
import { useAskAi } from "../AskAiContext";
@@ -62,7 +66,9 @@ function createAskAiPrompt(query: string): string {
return `Can you tell me about ${query}`;
}
-type ResultItem = (DocsSearchItem & BaseItem) | (EndpointSearchItem & BaseItem);
+type ResultItem =
+ | (EnhancedDocsSearchItem & BaseItem)
+ | (EndpointSearchItem & BaseItem);
const algoliaAppId = process.env.NEXT_PUBLIC_ALGOLIA_APP_ID || "";
const algoliaSearchApiKey =
@@ -175,6 +181,9 @@ const DocsSearchResult = ({
const href = `/${item.path}`;
const isApiRef = isApiReferencePath(item.path);
+ const enhancedItem = item as EnhancedDocsSearchItem;
+ const showPageTitle = !enhancedItem.isPageLevel && enhancedItem.pageTitle;
+
const content = (
@@ -193,6 +202,7 @@ const DocsSearchResult = ({
)}
+ {showPageTitle ? `${enhancedItem.pageTitle} ยท ` : ""}
{item.section}
@@ -823,7 +833,7 @@ const Autocomplete = () => {
/>
) : (
autocomplete.setQuery("")}
/>
)}
diff --git a/lib/content.server.ts b/lib/content.server.ts
index ad0d45e03..0d1ae2ad6 100644
--- a/lib/content.server.ts
+++ b/lib/content.server.ts
@@ -1,16 +1,9 @@
import fs from "fs";
import path from "path";
-import algoliasearch from "algoliasearch";
-import type { FrontMatter, DocsSearchItem } from "../types";
export const CONTENT_DIR = "content/";
export const DOCS_FILE_EXTENSIONS = [".mdx", ".md"];
-/**
- * This is to index our .md and .mdx file content.
- * API/mAPI reference content is indexed at script/indexApisForSearch.ts.
- */
-
export const getAllFilesInDir = (
directory: string,
files: string[] = [],
@@ -31,48 +24,5 @@ export const getAllFilesInDir = (
};
export function makeIdFromPath(resourcePath) {
- return resourcePath.replace(/\.mdx?$/, "").replace("/index", "");
-}
-
-export async function generateAlgoliaIndex(frontmatter: FrontMatter) {
- const algoliaAppId = process.env.NEXT_PUBLIC_ALGOLIA_APP_ID ?? "";
- const algoliaAdminApiKey = process.env.ALGOLIA_ADMIN_API_KEY ?? "";
- const algoliaIndexName = process.env.NEXT_PUBLIC_ALGOLIA_INDEX_NAME ?? "";
-
- if (algoliaAppId && algoliaAdminApiKey && algoliaIndexName) {
- const client = algoliasearch(algoliaAppId, algoliaAdminApiKey);
- const index = client.initIndex(algoliaIndexName);
-
- try {
- // Notes:
- // Algolia recommends saving objects in batches because of efficiency.
- // Our markdown processor doesn't provide a callback to subscribe to that
- // gets called after finishing with all elements.
- //
- // Given we only have ~40 items to be indexed right now, we are just saving
- // entries one by one.
- const object: DocsSearchItem = {
- // The path to the page will be the identifier in Algolia.
- objectID: frontmatter.id,
- path: frontmatter.id,
- title: frontmatter.title,
- section: frontmatter.section,
- // Once we add tags are added to pages, Algolia records
- // will be updated with them, so we can enhance the search experience
- tags: frontmatter.tags || [],
- // Saving a content page, not an API endpoint
- contentType: "document",
- // Saving to the pages index
- index: "pages",
- };
-
- await index.saveObject(object);
- } catch (e) {
- console.error(e);
- }
- } else {
- console.info(
- "Algolia configuration variables not present. Skipping indexing.",
- );
- }
+ return resourcePath.replace(/\.mdx?$/, "").replace(/\/index$/, "");
}
diff --git a/package.json b/package.json
index 3da0302d4..a271ae75f 100644
--- a/package.json
+++ b/package.json
@@ -16,10 +16,11 @@
"generate-llms": "yarn run open-api-to-md && tsx scripts/generateApiMarkdown.ts && tsx scripts/generateLlmsTxt.ts",
"generate-reference-md": "tsx scripts/generateApiMarkdown.ts",
"index-apis": "tsx scripts/indexApisForSearch.ts",
+ "index-docs": "tsx scripts/indexDocsForSearch.ts",
"open-api-to-md": "bash scripts/openApiToMd.sh",
"split-specs": "tsx scripts/splitOpenApiSpec.ts",
"predev": "yarn split-specs && yarn generate-llms",
- "prebuild": "yarn split-specs && yarn generate-llms && yarn index-apis"
+ "prebuild": "yarn split-specs && yarn generate-llms && yarn index-docs && yarn index-apis"
},
"dependencies": {
"@algolia/autocomplete-js": "^1.6.3",
diff --git a/pages/[...slug].tsx b/pages/[...slug].tsx
index 00d485c78..c66cd4c52 100644
--- a/pages/[...slug].tsx
+++ b/pages/[...slug].tsx
@@ -14,7 +14,6 @@ import {
CONTENT_DIR,
DOCS_FILE_EXTENSIONS,
makeIdFromPath,
- generateAlgoliaIndex,
} from "../lib/content.server";
import eventPayload from "../data/code/sources/eventPayload";
import datadogDashboardJson from "../content/integrations/extensions/datadog_dashboard.json";
@@ -96,9 +95,6 @@ export async function getStaticProps({ params: { slug } }) {
// Extend frontmatter
mdxSource.frontmatter.id = makeIdFromPath(slug.join(sep));
- // Index page in algolia
- await generateAlgoliaIndex(mdxSource.frontmatter);
-
return { props: { source: mdxSource, sourcePath, typedocs } };
}
diff --git a/scripts/indexDocsForSearch.ts b/scripts/indexDocsForSearch.ts
new file mode 100644
index 000000000..e87856c26
--- /dev/null
+++ b/scripts/indexDocsForSearch.ts
@@ -0,0 +1,407 @@
+import fs from "fs";
+import path from "path";
+import { unified } from "unified";
+import remarkParse from "remark-parse";
+import remarkFrontmatter from "remark-frontmatter";
+import yaml from "yaml";
+import algoliasearch from "algoliasearch";
+import { loadEnvConfig } from "@next/env";
+import type { EnhancedDocsSearchItem } from "@/types";
+
+// Load Next.js environment variables
+const projectDir = process.cwd();
+loadEnvConfig(projectDir);
+
+const algoliaAppId = process.env.NEXT_PUBLIC_ALGOLIA_APP_ID ?? "";
+const algoliaAdminApiKey = process.env.ALGOLIA_ADMIN_API_KEY ?? "";
+const algoliaPagesIndexName = process.env.NEXT_PUBLIC_ALGOLIA_INDEX_NAME ?? "";
+
+const CONTENT_DIR = path.join(projectDir, "content");
+const DOCS_FILE_EXTENSIONS = [".mdx", ".md"];
+
+// Maximum content length per record (in characters)
+// Algolia recommends keeping records small for better performance
+const MAX_CONTENT_LENGTH = 2000;
+
+// Keep count of indexed items
+let pageCount = 0;
+let headingCount = 0;
+
+interface Heading {
+ level: number;
+ title: string;
+ slug: string;
+ content: string;
+}
+
+interface Frontmatter {
+ title: string;
+ description?: string;
+ tags?: string[];
+ section: string;
+}
+
+/**
+ * Recursively get all files in a directory with specific extensions
+ */
+function getAllFilesInDir(
+ directory: string,
+ files: string[] = [],
+ extensions?: string[],
+): string[] {
+ fs.readdirSync(directory).forEach((file) => {
+ const subpath = path.join(directory, file);
+ if (fs.lstatSync(subpath).isDirectory()) {
+ getAllFilesInDir(subpath, files, extensions);
+ } else {
+ if (!extensions || extensions.includes(path.extname(subpath))) {
+ files.push(subpath);
+ }
+ }
+ });
+
+ return files;
+}
+
+/**
+ * Parse frontmatter from markdown content using remark
+ */
+async function parseFrontmatter(
+ markdownContent: string,
+): Promise {
+ const file = await unified()
+ .use(remarkParse)
+ .use(remarkFrontmatter, ["yaml"])
+ .parse(markdownContent);
+
+ const yamlNode = file.children.find(
+ (node): node is { type: "yaml"; value: string } => node.type === "yaml",
+ );
+ if (!yamlNode) return null;
+ return yaml.parse(yamlNode.value);
+}
+
+/**
+ * Create a URL-friendly slug from a heading title
+ */
+function slugify(text: string): string {
+ return text
+ .toLowerCase()
+ .replace(/[^\w\s-]/g, "") // Remove non-word characters except spaces and hyphens
+ .replace(/\s+/g, "-") // Replace spaces with hyphens
+ .replace(/-+/g, "-") // Replace multiple hyphens with single
+ .trim();
+}
+
+/**
+ * Remove frontmatter from markdown content
+ */
+function removeFrontmatter(content: string): string {
+ // Match YAML frontmatter at the start of the file
+ const frontmatterRegex = /^---[\s\S]*?---\n*/;
+ return content.replace(frontmatterRegex, "");
+}
+
+/**
+ * Extract plain text from markdown content
+ * Removes JSX components, imports, code blocks, and other non-text elements
+ */
+function extractTextContent(mdxContent: string): string {
+ let content = mdxContent;
+
+ // Remove import statements
+ content = content.replace(/^import\s+.*$/gm, "");
+
+ // Remove export statements
+ content = content.replace(/^export\s+.*$/gm, "");
+
+ // Remove code blocks (fenced)
+ content = content.replace(/```[\s\S]*?```/g, "");
+
+ // Remove inline code
+ content = content.replace(/`[^`]+`/g, "");
+
+ // Remove JSX components (self-closing and with children)
+ content = content.replace(/<[A-Z][^>]*\/>/g, ""); // Self-closing like
+ content = content.replace(/<[A-Z][^>]*>[\s\S]*?<\/[A-Z][^>]*>/g, ""); // With children
+
+ // Remove HTML-style components
+ content = content.replace(/<[a-z][^>]*>[\s\S]*?<\/[a-z][^>]*>/g, "");
+
+ // Remove remaining HTML/JSX tags
+ content = content.replace(/<[^>]+>/g, "");
+
+ // Remove markdown images (must come before links since images contain link syntax)
+ content = content.replace(/!\[([^\]]*)\]\([^)]+\)/g, "");
+
+ // Remove markdown links but keep the text
+ content = content.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1");
+
+ // Remove markdown emphasis markers
+ content = content.replace(/\*\*([^*]+)\*\*/g, "$1"); // Bold
+ content = content.replace(/\*([^*]+)\*/g, "$1"); // Italic
+ content = content.replace(/__([^_]+)__/g, "$1"); // Bold
+ content = content.replace(/_([^_]+)_/g, "$1"); // Italic
+
+ // Remove heading markers
+ content = content.replace(/^#{1,6}\s+/gm, "");
+
+ // Remove horizontal rules
+ content = content.replace(/^[-*_]{3,}$/gm, "");
+
+ // Remove list markers
+ content = content.replace(/^\s*[-*+]\s+/gm, "");
+ content = content.replace(/^\s*\d+\.\s+/gm, "");
+
+ // Remove blockquote markers
+ content = content.replace(/^\s*>\s*/gm, "");
+
+ // Normalize whitespace
+ content = content.replace(/\n{3,}/g, "\n\n"); // Multiple newlines to double
+ content = content.replace(/[ \t]+/g, " "); // Multiple spaces to single
+
+ return content.trim();
+}
+
+/**
+ * Extract headings with their content from markdown
+ */
+function extractHeadings(mdxContent: string): Heading[] {
+ // Remove frontmatter first
+ const contentWithoutFrontmatter = removeFrontmatter(mdxContent);
+
+ const headingRegex = /^(#{2,3})\s+(.+)$/gm;
+ const headings: Heading[] = [];
+ let match;
+
+ const matches: Array<{ index: number; level: number; title: string }> = [];
+
+ while ((match = headingRegex.exec(contentWithoutFrontmatter)) !== null) {
+ matches.push({
+ index: match.index,
+ level: match[1].length,
+ title: match[2].trim(),
+ });
+ }
+
+ // Extract content for each heading
+ for (let i = 0; i < matches.length; i++) {
+ const current = matches[i];
+ const next = matches[i + 1];
+
+ const contentStart =
+ current.index + `${"#".repeat(current.level)} ${current.title}`.length;
+ const contentEnd = next ? next.index : contentWithoutFrontmatter.length;
+ const rawContent = contentWithoutFrontmatter.slice(
+ contentStart,
+ contentEnd,
+ );
+
+ const cleanContent = extractTextContent(rawContent);
+
+ // Only include headings with meaningful content
+ if (cleanContent.length > 20) {
+ headings.push({
+ level: current.level,
+ title: current.title,
+ slug: slugify(current.title),
+ content: cleanContent.slice(0, MAX_CONTENT_LENGTH),
+ });
+ }
+ }
+
+ return headings;
+}
+
+/**
+ * Get the intro content (content before the first heading)
+ */
+function getIntroContent(mdxContent: string): string {
+ const contentWithoutFrontmatter = removeFrontmatter(mdxContent);
+
+ // Find the first H2 or H3 heading
+ const firstHeadingMatch = contentWithoutFrontmatter.match(/^#{2,3}\s+/m);
+
+ if (firstHeadingMatch && firstHeadingMatch.index !== undefined) {
+ const introRaw = contentWithoutFrontmatter.slice(
+ 0,
+ firstHeadingMatch.index,
+ );
+ return extractTextContent(introRaw).slice(0, MAX_CONTENT_LENGTH);
+ }
+
+ // No headings found, use all content
+ return extractTextContent(contentWithoutFrontmatter).slice(
+ 0,
+ MAX_CONTENT_LENGTH,
+ );
+}
+
+/**
+ * Convert file path to URL path
+ */
+function filePathToUrlPath(filePath: string): string {
+ return filePath
+ .replace(CONTENT_DIR, "")
+ .replace(/\.mdx?$/, "")
+ .replace(/\/index$/, "") // Only remove /index at end of path
+ .replace(/^\//, ""); // Remove leading slash for objectID
+}
+
+/**
+ * Queue of items to save to Algolia
+ */
+const itemsToSave: EnhancedDocsSearchItem[] = [];
+
+async function queueItem(item: EnhancedDocsSearchItem) {
+ // Validate path doesn't start with /
+ if (item.path.startsWith("/")) {
+ console.error(`Path may not start with "/". Violating path: ${item.path}`);
+ return;
+ }
+
+ console.log(
+ `Indexing ${item.isPageLevel ? "page" : "heading"}: ${item.title} -> ${
+ item.path
+ }`,
+ );
+ itemsToSave.push(item);
+}
+
+/**
+ * Process a single MDX file and create search records
+ */
+async function processFile(filePath: string): Promise {
+ // Skip special directories
+ if (
+ filePath.includes("/__mapi-reference/") ||
+ filePath.includes("/__api-reference/") ||
+ filePath.includes("/__cli/")
+ ) {
+ return;
+ }
+
+ const content = fs.readFileSync(filePath, "utf-8");
+ const frontmatter = await parseFrontmatter(content);
+
+ if (!frontmatter || !frontmatter.title || !frontmatter.section) {
+ console.warn(`Skipping ${filePath}: missing required frontmatter`);
+ return;
+ }
+
+ const urlPath = filePathToUrlPath(filePath);
+
+ // Create page-level record
+ const introContent = getIntroContent(content);
+ const pageRecord: EnhancedDocsSearchItem = {
+ objectID: `page-${urlPath}`,
+ path: urlPath,
+ title: frontmatter.title,
+ pageTitle: frontmatter.title,
+ description: frontmatter.description,
+ content: introContent,
+ section: frontmatter.section,
+ tags: frontmatter.tags || [],
+ headingLevel: 0,
+ contentType: "document",
+ index: "pages",
+ isPageLevel: true,
+ };
+ await queueItem(pageRecord);
+ pageCount++;
+
+ // Extract and create heading-level records
+ const headings = extractHeadings(content);
+ for (const heading of headings) {
+ const headingPath = `${urlPath}#${heading.slug}`;
+ const headingRecord: EnhancedDocsSearchItem = {
+ objectID: `heading-${headingPath}`,
+ path: headingPath,
+ title: heading.title,
+ pageTitle: frontmatter.title,
+ content: heading.content,
+ section: frontmatter.section,
+ tags: frontmatter.tags || [],
+ headingLevel: heading.level,
+ contentType: "document",
+ index: "pages",
+ isPageLevel: false,
+ };
+ await queueItem(headingRecord);
+ headingCount++;
+ }
+}
+
+/**
+ * Main entry point
+ */
+async function main() {
+ console.log("๐ Starting docs search indexing...\n");
+
+ let skipIndexing = false;
+
+ // Check for required environment variables
+ if (!algoliaAppId || !algoliaAdminApiKey || !algoliaPagesIndexName) {
+ const missing: string[] = [];
+ if (!algoliaAppId) missing.push("NEXT_PUBLIC_ALGOLIA_APP_ID");
+ if (!algoliaAdminApiKey) missing.push("ALGOLIA_ADMIN_API_KEY");
+ if (!algoliaPagesIndexName) missing.push("NEXT_PUBLIC_ALGOLIA_INDEX_NAME");
+
+ console.warn(
+ "Missing Algolia environment variables. Continuing with script but skipping actual indexing.\n\nMissing: " +
+ missing.join(", "),
+ );
+ skipIndexing = true;
+ }
+
+ // Get all MDX/MD files
+ const files = getAllFilesInDir(CONTENT_DIR, [], DOCS_FILE_EXTENSIONS);
+ console.log(`Found ${files.length} content files to process\n`);
+
+ // Process each file
+ for (const file of files) {
+ try {
+ await processFile(file);
+ } catch (error) {
+ console.error(`Error processing ${file}:`, error);
+ }
+ }
+
+ console.log("\n๐ Indexing summary:");
+ console.log(` Pages indexed: ${pageCount}`);
+ console.log(` Headings indexed: ${headingCount}`);
+ console.log(` Total records: ${itemsToSave.length}`);
+
+ // Save to Algolia
+ if (!skipIndexing && itemsToSave.length > 0) {
+ console.log("\n๐ค Uploading to Algolia...");
+
+ const client = algoliasearch(algoliaAppId, algoliaAdminApiKey);
+ const index = client.initIndex(algoliaPagesIndexName);
+
+ // Save objects in batches (Algolia recommends batches of 1000)
+ const BATCH_SIZE = 1000;
+ for (let i = 0; i < itemsToSave.length; i += BATCH_SIZE) {
+ const batch = itemsToSave.slice(i, i + BATCH_SIZE);
+ await index.saveObjects(batch);
+ console.log(
+ ` Saved batch ${Math.floor(i / BATCH_SIZE) + 1}/${Math.ceil(
+ itemsToSave.length / BATCH_SIZE,
+ )}`,
+ );
+ }
+
+ console.log("\nโ
Successfully indexed docs for search!");
+ } else if (skipIndexing) {
+ console.log(
+ "\nโ ๏ธ Completed processing, but skipped Algolia upload due to missing environment variables.",
+ );
+ }
+
+ process.exit(0);
+}
+
+main().catch((error) => {
+ console.error("Fatal error:", error);
+ process.exit(1);
+});
diff --git a/types.ts b/types.ts
index f3cb89ee6..71bc5ed0f 100644
--- a/types.ts
+++ b/types.ts
@@ -47,6 +47,24 @@ export type DocsSearchItem = {
index: "pages" | "endpoints";
};
+// Enhanced search item type for improved Algolia indexing
+// This extends the basic DocsSearchItem with content and heading information
+export type EnhancedDocsSearchItem = {
+ objectID: string; // Unique ID (page-path or page-path#heading-slug)
+ path: string; // URL path (with optional anchor)
+ title: string; // Page title OR heading title
+ pageTitle: string; // Always the parent page title
+ description?: string; // From frontmatter (page-level only)
+ content: string; // Text content (truncated ~300-500 words)
+ section: string; // Top-level section (Concepts, Getting Started, etc.)
+ tags: string[]; // Tags from frontmatter
+ headingLevel: number; // 0 for page, 2 for H2, 3 for H3
+ contentType: "document" | "api-reference";
+ index: "pages" | "endpoints";
+ // Ranking fields
+ isPageLevel: boolean; // True if this is a page-level record (not a heading)
+};
+
export type EndpointSearchItem = DocsSearchItem & {
method: string;
endpoint: string;