diff --git a/README.md b/README.md index 9730e69..ff13b47 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,8 @@ console.log(`✅ Sitemap saved to ${outputPath}`); | `lastmod` | `string` | Current date | Custom lastmod date for all routes | | `prettyPrint` | `boolean` | `true` | Pretty print the XML output | | `manualRoutes` | `() => Promise \| ManualSitemapEntry[]` | `undefined` | Function to generate manual/dynamic routes | +| `generateRobotsTxt` | `boolean` | `false` | Generate a robots.txt file alongside the sitemap | +| `robotsTxtOptions` | `IRobotsTxt` | `undefined` | Options used when generating robots.txt | ### SitemapPluginOptions (extends SitemapOptions) @@ -162,6 +164,129 @@ The plugin automatically: - ❌ **Excludes** routes in your `excludeRoutes` configuration - ✅ **Processes** nested route structures recursively +## Robots.txt + +The plugin can generate a `robots.txt` file when `generateRobotsTxt` is enabled. It will always include the sitemap URL for the configured `outputPath` and can append additional sitemaps or all non-index sitemaps based on options. + +### Robots Options + +`robotsTxtOptions` accepts the following shape: + +- `policies`: List of `IRobotPolicy` entries. Each policy renders as a block with `User-agent`, optional `Allow`, `Disallow`, and `Crawl-delay`. +- `additionalSitemaps`: Extra sitemap URLs to list in `robots.txt`. +- `includeNonIndexSitemaps`: When `true`, all generated sitemap URLs are listed in `robots.txt`, not only the index sitemap. + +`IRobotPolicy` fields: + +- `userAgent`: User agent for the policy (e.g., `*`, `Googlebot`). +- `disallow`: One or more disallowed paths. +- `allow`: One or more allowed paths. +- `crawlDelay`: Crawl delay in seconds. + +### Minimal Example + +```typescript +sitemapPlugin({ + baseUrl: 'https://your-domain.com', + outputPath: 'public/sitemap.xml', + generateRobotsTxt: true, +}); +``` + +Result: + +```txt +User-agent: * +Disallow: + +Sitemap: https://your-domain.com/sitemap.xml +``` + +### Custom Policies + +```typescript +sitemapPlugin({ + baseUrl: 'https://your-domain.com', + outputPath: 'public/sitemap.xml', + generateRobotsTxt: true, + robotsTxtOptions: { + policies: [ + { + userAgent: '*', + disallow: ['/admin', '/private'], + allow: ['/public'], + crawlDelay: 10, + }, + ], + }, +}); +``` + +Result: + +```txt +User-agent: * +Allow: /public +Disallow: /admin +Disallow: /private +Crawl-delay: 10 + +Sitemap: https://your-domain.com/sitemap.xml +``` + +### Additional Sitemaps + +```typescript +sitemapPlugin({ + baseUrl: 'https://your-domain.com', + outputPath: 'public/sitemap.xml', + generateRobotsTxt: true, + robotsTxtOptions: { + additionalSitemaps: [ + 'https://your-domain.com/sitemap-blog.xml', + 'https://your-domain.com/sitemap-products.xml', + ], + }, +}); +``` + +Result: + +```txt +User-agent: * +Disallow: + +Sitemap: https://your-domain.com/sitemap.xml +Sitemap: https://your-domain.com/sitemap-blog.xml +Sitemap: https://your-domain.com/sitemap-products.xml +``` + +### includeNonIndexSitemaps + +```typescript +sitemapPlugin({ + baseUrl: 'https://your-domain.com', + outputPath: 'public/sitemap-index.xml', + generateRobotsTxt: true, + robotsTxtOptions: { + includeNonIndexSitemaps: true, + }, +}); +``` + +Result: + +```txt +User-agent: * +Disallow: + +Sitemap: https://your-domain.com/sitemap-index.xml +Sitemap: https://your-domain.com/sitemap.xml +Sitemap: https://your-domain.com/sitemap-posts.xml +``` + +If you need manual generation, you can also call `generateRobotsTxt` and write the file yourself. + ## Example Output ```xml diff --git a/src/__tests__/robots-generator.test.ts b/src/__tests__/robots-generator.test.ts new file mode 100644 index 0000000..83d153d --- /dev/null +++ b/src/__tests__/robots-generator.test.ts @@ -0,0 +1,96 @@ +import { describe, it, expect } from 'vitest'; +import { generateRobotsTxt } from '../robots-generator'; +import { SitemapOptions } from '../types'; +import { TanStackRouterRobotGenerator } from '../generator'; + +describe('TanStackRouterRobotGenerator', () => { + it('should generate default policy with no sitemaps', () => { + const options: SitemapOptions = { baseUrl: 'https://example.com' }; + const generator = new TanStackRouterRobotGenerator(options); + + const robotsTxt = generator.generateRobotsTxt(); + + expect(robotsTxt).toBe('User-agent: *\nDisallow:\n'); + }); + + it('should generate policies and sitemap entries', () => { + const options: SitemapOptions = { + baseUrl: 'https://example.com', + robotsTxtOptions: { + policies: [ + { + userAgent: '*', + allow: '/public', + disallow: ['/admin', '/private'], + crawlDelay: 10, + }, + { + userAgent: 'Googlebot', + disallow: '', + }, + ], + additionalSitemaps: ['https://example.com/extra.xml'], + }, + }; + const generator = new TanStackRouterRobotGenerator(options); + + const robotsTxt = generator.generateRobotsTxt(['public/sitemap.xml']); + + expect(robotsTxt).toContain('User-agent: *'); + expect(robotsTxt).toContain('Allow: /public'); + expect(robotsTxt).toContain('Disallow: /admin'); + expect(robotsTxt).toContain('Disallow: /private'); + expect(robotsTxt).toContain('Crawl-delay: 10'); + expect(robotsTxt).toContain('User-agent: Googlebot'); + expect(robotsTxt).toContain('Disallow:'); + expect(robotsTxt).toContain('Sitemap: https://example.com/sitemap.xml'); + expect(robotsTxt).toContain('Sitemap: https://example.com/extra.xml'); + }); + + it('should include only index sitemap by default', () => { + const options: SitemapOptions = { + baseUrl: 'https://example.com', + robotsTxtOptions: { + additionalSitemaps: ['https://example.com/extra.xml'], + }, + }; + const generator = new TanStackRouterRobotGenerator(options); + + const robotsTxt = generator.generateRobotsTxt([ + 'public/sitemap.xml', + 'public/sitemap-2.xml', + ]); + + expect(robotsTxt).toContain('Sitemap: https://example.com/sitemap.xml'); + expect(robotsTxt).toContain('Sitemap: https://example.com/extra.xml'); + expect(robotsTxt).not.toContain('sitemap-2.xml'); + }); + + it('should include all sitemaps when includeNonIndexSitemaps is true', () => { + const options: SitemapOptions = { + baseUrl: 'https://example.com', + robotsTxtOptions: { + includeNonIndexSitemaps: true, + }, + }; + const generator = new TanStackRouterRobotGenerator(options); + + const robotsTxt = generator.generateRobotsTxt([ + 'public/sitemap.xml', + 'public/sitemap-2.xml', + ]); + + expect(robotsTxt).toContain('Sitemap: https://example.com/sitemap.xml'); + expect(robotsTxt).toContain('Sitemap: https://example.com/sitemap-2.xml'); + }); +}); + +describe('generateRobotsTxt', () => { + it('should generate robots.txt string using helper', () => { + const options: SitemapOptions = { baseUrl: 'https://example.com' }; + + const robotsTxt = generateRobotsTxt(options, ['public/sitemap.xml']); + + expect(robotsTxt).toContain('Sitemap: https://example.com/sitemap.xml'); + }); +}); diff --git a/src/generator/index.ts b/src/generator/index.ts new file mode 100644 index 0000000..9823905 --- /dev/null +++ b/src/generator/index.ts @@ -0,0 +1,3 @@ +export { TanStackRouterRobotGenerator } from './robots' + +export { TanStackRouterSitemapGenerator } from './sitemap'; \ No newline at end of file diff --git a/src/generator/robots.ts b/src/generator/robots.ts new file mode 100644 index 0000000..ce46f1c --- /dev/null +++ b/src/generator/robots.ts @@ -0,0 +1,103 @@ +import type { IRobotPolicy, IRobotsTxt, SitemapOptions } from "../types"; + +export class TanStackRouterRobotGenerator { + private baseUrl: string; + private robotsTxtOptions: IRobotsTxt; + + constructor(options: SitemapOptions) { + if (!options || !options.baseUrl || options.baseUrl.trim() === '') { + throw new Error('baseUrl is required and cannot be empty'); + } + + this.baseUrl = options.baseUrl.replace(/\/$/, ''); + this.robotsTxtOptions = options.robotsTxtOptions || {}; + } + + generateRobotsTxt(sitemapPathsOrUrls: string[] = []): string { + const policies = this.resolvePolicies(); + const lines: string[] = []; + + policies.forEach((policy, index) => { + if (index > 0) lines.push(''); + lines.push(`User-agent: ${policy.userAgent}`); + + const allows = this.normalizeToArray(policy.allow); + allows.forEach((allow) => lines.push(`Allow: ${allow}`)); + + if (policy.disallow === '') { + lines.push('Disallow:'); + } else { + const disallows = this.normalizeToArray(policy.disallow); + disallows.forEach((disallow) => lines.push(`Disallow: ${disallow}`)); + } + + if (policy.crawlDelay !== undefined) { + lines.push(`Crawl-delay: ${policy.crawlDelay}`); + } + }); + + const sitemapUrls = this.buildSitemapUrls(sitemapPathsOrUrls); + if (sitemapUrls.length > 0) { + if (lines.length > 0) lines.push(''); + sitemapUrls.forEach((url) => lines.push(`Sitemap: ${url}`)); + } + + return `${lines.join('\n')}\n`; + } + + private resolvePolicies(): IRobotPolicy[] { + if (this.robotsTxtOptions.policies?.length) { + return this.robotsTxtOptions.policies; + } + + return [{ userAgent: '*', disallow: '' }]; + } + + private buildSitemapUrls(sitemapPathsOrUrls: string[]): string[] { + const includeAll = Boolean(this.robotsTxtOptions.includeNonIndexSitemaps); + const baseList = includeAll + ? sitemapPathsOrUrls + : sitemapPathsOrUrls.slice(0, 1); + const combined = [...baseList, ...(this.robotsTxtOptions.additionalSitemaps || [])]; + + const resolved = combined + .map((value) => this.resolveSitemapUrl(value)) + .filter((value): value is string => Boolean(value)); + + return Array.from(new Set(resolved)); + } + + private resolveSitemapUrl(pathOrUrl: string): string { + if (/^https?:\/\//i.test(pathOrUrl)) { + return pathOrUrl; + } + + const normalizedPath = this.normalizeSitemapPath(pathOrUrl); + return `${this.baseUrl}${normalizedPath}`; + } + + private normalizeSitemapPath(pathValue: string): string { + let normalized = pathValue.replace(/\\/g, '/'); + + if (normalized.startsWith('./')) { + normalized = normalized.slice(2); + } + + if (normalized.startsWith('/public/')) { + normalized = normalized.slice('/public'.length); + } else if (normalized.startsWith('public/')) { + normalized = normalized.slice('public'.length); + } + + if (!normalized.startsWith('/')) { + normalized = `/${normalized}`; + } + + return normalized; + } + + private normalizeToArray(value?: string | string[]): string[] { + if (!value) return []; + return Array.isArray(value) ? value : [value]; + } +} \ No newline at end of file diff --git a/src/generator.ts b/src/generator/sitemap.ts similarity index 98% rename from src/generator.ts rename to src/generator/sitemap.ts index 83ce220..d4f4050 100644 --- a/src/generator.ts +++ b/src/generator/sitemap.ts @@ -4,7 +4,7 @@ import { RouteInfo, TanStackRoute, ManualSitemapEntry, -} from './types'; +} from '../types'; export class TanStackRouterSitemapGenerator { private options: Required> & @@ -23,6 +23,8 @@ export class TanStackRouterSitemapGenerator { trailingSlash: false, lastmod: new Date().toISOString(), prettyPrint: true, + generateRobotsTxt: false, + robotsTxtOptions: {}, ...options, }; } diff --git a/src/index.ts b/src/index.ts index 80e8586..61e4942 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,4 +1,5 @@ export { generateSitemap, generateSitemapEntries } from './sitemap-generator'; +export { generateRobotsTxt } from './robots-generator'; export type { SitemapOptions, RouteInfo, @@ -7,7 +8,9 @@ export type { RouterTree, AnyRoute, ManualSitemapEntry, + IRobotPolicy, + IRobotsTxt, } from './types'; -export { TanStackRouterSitemapGenerator } from './generator'; +export { TanStackRouterSitemapGenerator, TanStackRouterRobotGenerator } from './generator'; export { sitemapPlugin, createSitemapPlugin } from './plugin'; export type { SitemapPluginOptions } from './plugin'; diff --git a/src/plugin.ts b/src/plugin.ts index 6f8c10c..22ff962 100644 --- a/src/plugin.ts +++ b/src/plugin.ts @@ -1,6 +1,6 @@ import { writeFileSync, existsSync, mkdirSync, readFileSync } from 'fs'; import { join, resolve } from 'path'; -import { TanStackRouterSitemapGenerator } from './generator'; +import { TanStackRouterSitemapGenerator, TanStackRouterRobotGenerator } from './generator'; import { SitemapOptions } from './types'; export interface SitemapPluginOptions extends SitemapOptions { @@ -15,7 +15,7 @@ export interface SitemapPluginOptions extends SitemapOptions { } /** - * TanStack Router plugin for automatic sitemap generation + * TanStack Router plugin for automatic sitemap and robots.txt generation * Use this in your app.config.ts file */ /** @@ -170,6 +170,7 @@ export function sitemapPlugin(options: SitemapPluginOptions) { verbose = false, routeTreePath, onBuildOnly = false, + generateRobotsTxt = false, ...sitemapOptions } = options; @@ -230,18 +231,37 @@ export function sitemapPlugin(options: SitemapPluginOptions) { const sitemap = await generator.generateXmlSitemap(routeTree); // Ensure output directory exists + const normalizedOutputPath = outputPath.replace(/\\/g, '/'); const outputDir = join( process.cwd(), - outputPath.split('/').slice(0, -1).join('/') + normalizedOutputPath.split('/').slice(0, -1).join('/') ); if (!existsSync(outputDir)) { mkdirSync(outputDir, { recursive: true }); } // Write sitemap to file - const fullOutputPath = join(process.cwd(), outputPath); + const fullOutputPath = join(process.cwd(), normalizedOutputPath); writeFileSync(fullOutputPath, sitemap, 'utf8'); + if (generateRobotsTxt) { + const robotGenerator = new TanStackRouterRobotGenerator(sitemapOptions); + const robotsTxt = robotGenerator.generateRobotsTxt([ + normalizedOutputPath, + ]); + const robotsOutputPath = join(outputDir, 'robots.txt'); + writeFileSync(robotsOutputPath, robotsTxt, 'utf8'); + + if (verbose) { + const robotsDisplayPath = normalizedOutputPath + .split('/') + .slice(0, -1) + .concat('robots.txt') + .join('/'); + console.log(`🤖 robots.txt generated at ${robotsDisplayPath}`); + } + } + if (verbose) { const entries = await generator.generateSitemapEntries(routeTree); console.log( diff --git a/src/robots-generator.ts b/src/robots-generator.ts new file mode 100644 index 0000000..6c25bb4 --- /dev/null +++ b/src/robots-generator.ts @@ -0,0 +1,16 @@ +import { TanStackRouterRobotGenerator } from './generator'; +import { SitemapOptions } from './types'; + +/** + * Generate robots.txt content based on provided options and sitemap URLs + * @param options Sitemap generation options + * @param sitemapPathsOrUrls Array of sitemap paths or URLs to include in robots.txt + * @returns Generated robots.txt content as a string + */ +export function generateRobotsTxt( + options: SitemapOptions, + sitemapPathsOrUrls: string[] = [] +): string { + const generator = new TanStackRouterRobotGenerator(options); + return generator.generateRobotsTxt(sitemapPathsOrUrls); +} diff --git a/src/sitemap-generator.ts b/src/sitemap-generator.ts index 4b76d46..6583497 100644 --- a/src/sitemap-generator.ts +++ b/src/sitemap-generator.ts @@ -1,4 +1,4 @@ -import { TanStackRouterSitemapGenerator } from './generator'; +import { TanStackRouterSitemapGenerator } from './generator/sitemap'; import { SitemapOptions, TanStackRoute } from './types'; /** diff --git a/src/types.ts b/src/types.ts index 38156a5..121bd76 100644 --- a/src/types.ts +++ b/src/types.ts @@ -17,6 +17,16 @@ export interface SitemapOptions { prettyPrint?: boolean; /** Function to generate manual/dynamic routes */ manualRoutes?: () => Promise | ManualSitemapEntry[]; + /** + * Generate a robots.txt file and list the generated sitemaps. + * @default false + */ + generateRobotsTxt?: boolean + + /** + * robots.txt options + */ + robotsTxtOptions?: IRobotsTxt } export interface RouteInfo { @@ -79,3 +89,41 @@ export interface ManualSitemapEntry { /** How frequently the page is likely to change */ changeFrequency?: SitemapEntry['changefreq']; } + +export interface IRobotPolicy { + /** User-agent this policy applies to (e.g., '*', 'Googlebot') */ + userAgent: string + /** Disallow option(s) */ + disallow?: string | string[] + /** Allow option(s) */ + allow?: string | string[] + /** Crawl delay in seconds */ + crawlDelay?: number +} + +export interface IRobotsTxt { + /** Policies to include in the generated robots.txt file + * @example + * [ + * { + * userAgent: '*', + * disallow: ['/admin', '/private'], + * allow: ['/public'], + * crawlDelay: 10, + * }, + * ] + */ + policies?: IRobotPolicy[] + /** Additional sitemap URLs to include in robots.txt (e.g., ['https://example.com/sitemap1.xml', 'https://example.com/sitemap2.xml']) */ + additionalSitemaps?: string[] + + /** + * From v2.4x onwards, generated `robots.txt` will only contain url of `index sitemap` and custom provided endpoints from `robotsTxtOptions.additionalSitemaps` + * + * This is to prevent duplicate url submission (once through index-sitemap -> sitemap-url and once through robots.txt -> HOST) + * + * Set this option `true` to add all generated sitemap endpoints to `robots.txt` + * @default false + */ + includeNonIndexSitemaps?: boolean +} \ No newline at end of file