Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 125 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ console.log(`✅ Sitemap saved to ${outputPath}`);
| `lastmod` | `string` | Current date | Custom lastmod date for all routes |
| `prettyPrint` | `boolean` | `true` | Pretty print the XML output |
| `manualRoutes` | `() => Promise<ManualSitemapEntry[]> \| ManualSitemapEntry[]` | `undefined` | Function to generate manual/dynamic routes |
| `generateRobotsTxt` | `boolean` | `false` | Generate a robots.txt file alongside the sitemap |
| `robotsTxtOptions` | `IRobotsTxt` | `undefined` | Options used when generating robots.txt |

### SitemapPluginOptions (extends SitemapOptions)

Expand Down Expand Up @@ -162,6 +164,129 @@ The plugin automatically:
- ❌ **Excludes** routes in your `excludeRoutes` configuration
- ✅ **Processes** nested route structures recursively

## Robots.txt

The plugin can generate a `robots.txt` file when `generateRobotsTxt` is enabled. It will always include the sitemap URL for the configured `outputPath` and can append additional sitemaps or all non-index sitemaps based on options.

### Robots Options

`robotsTxtOptions` accepts the following shape:

- `policies`: List of `IRobotPolicy` entries. Each policy renders as a block with `User-agent`, optional `Allow`, `Disallow`, and `Crawl-delay`.
- `additionalSitemaps`: Extra sitemap URLs to list in `robots.txt`.
- `includeNonIndexSitemaps`: When `true`, all generated sitemap URLs are listed in `robots.txt`, not only the index sitemap.

`IRobotPolicy` fields:

- `userAgent`: User agent for the policy (e.g., `*`, `Googlebot`).
- `disallow`: One or more disallowed paths.
- `allow`: One or more allowed paths.
- `crawlDelay`: Crawl delay in seconds.

### Minimal Example

```typescript
sitemapPlugin({
baseUrl: 'https://your-domain.com',
outputPath: 'public/sitemap.xml',
generateRobotsTxt: true,
});
```

Result:

```txt
User-agent: *
Disallow:

Sitemap: https://your-domain.com/sitemap.xml
```

### Custom Policies

```typescript
sitemapPlugin({
baseUrl: 'https://your-domain.com',
outputPath: 'public/sitemap.xml',
generateRobotsTxt: true,
robotsTxtOptions: {
policies: [
{
userAgent: '*',
disallow: ['/admin', '/private'],
allow: ['/public'],
crawlDelay: 10,
},
],
},
});
```

Result:

```txt
User-agent: *
Allow: /public
Disallow: /admin
Disallow: /private
Crawl-delay: 10

Sitemap: https://your-domain.com/sitemap.xml
```

### Additional Sitemaps

```typescript
sitemapPlugin({
baseUrl: 'https://your-domain.com',
outputPath: 'public/sitemap.xml',
generateRobotsTxt: true,
robotsTxtOptions: {
additionalSitemaps: [
'https://your-domain.com/sitemap-blog.xml',
'https://your-domain.com/sitemap-products.xml',
],
},
});
```

Result:

```txt
User-agent: *
Disallow:

Sitemap: https://your-domain.com/sitemap.xml
Sitemap: https://your-domain.com/sitemap-blog.xml
Sitemap: https://your-domain.com/sitemap-products.xml
```

### includeNonIndexSitemaps

```typescript
sitemapPlugin({
baseUrl: 'https://your-domain.com',
outputPath: 'public/sitemap-index.xml',
generateRobotsTxt: true,
robotsTxtOptions: {
includeNonIndexSitemaps: true,
},
});
```

Result:

```txt
User-agent: *
Disallow:

Sitemap: https://your-domain.com/sitemap-index.xml
Sitemap: https://your-domain.com/sitemap.xml
Sitemap: https://your-domain.com/sitemap-posts.xml
```

If you need manual generation, you can also call `generateRobotsTxt` and write the file yourself.

## Example Output

```xml
Expand Down
96 changes: 96 additions & 0 deletions src/__tests__/robots-generator.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import { describe, it, expect } from 'vitest';
import { generateRobotsTxt } from '../robots-generator';
import { SitemapOptions } from '../types';
import { TanStackRouterRobotGenerator } from '../generator';

describe('TanStackRouterRobotGenerator', () => {
it('should generate default policy with no sitemaps', () => {
const options: SitemapOptions = { baseUrl: 'https://example.com' };
const generator = new TanStackRouterRobotGenerator(options);

const robotsTxt = generator.generateRobotsTxt();

expect(robotsTxt).toBe('User-agent: *\nDisallow:\n');
});

it('should generate policies and sitemap entries', () => {
const options: SitemapOptions = {
baseUrl: 'https://example.com',
robotsTxtOptions: {
policies: [
{
userAgent: '*',
allow: '/public',
disallow: ['/admin', '/private'],
crawlDelay: 10,
},
{
userAgent: 'Googlebot',
disallow: '',
},
],
additionalSitemaps: ['https://example.com/extra.xml'],
},
};
const generator = new TanStackRouterRobotGenerator(options);

const robotsTxt = generator.generateRobotsTxt(['public/sitemap.xml']);

expect(robotsTxt).toContain('User-agent: *');
expect(robotsTxt).toContain('Allow: /public');
expect(robotsTxt).toContain('Disallow: /admin');
expect(robotsTxt).toContain('Disallow: /private');
expect(robotsTxt).toContain('Crawl-delay: 10');
expect(robotsTxt).toContain('User-agent: Googlebot');
expect(robotsTxt).toContain('Disallow:');
expect(robotsTxt).toContain('Sitemap: https://example.com/sitemap.xml');
expect(robotsTxt).toContain('Sitemap: https://example.com/extra.xml');
});

it('should include only index sitemap by default', () => {
const options: SitemapOptions = {
baseUrl: 'https://example.com',
robotsTxtOptions: {
additionalSitemaps: ['https://example.com/extra.xml'],
},
};
const generator = new TanStackRouterRobotGenerator(options);

const robotsTxt = generator.generateRobotsTxt([
'public/sitemap.xml',
'public/sitemap-2.xml',
]);

expect(robotsTxt).toContain('Sitemap: https://example.com/sitemap.xml');
expect(robotsTxt).toContain('Sitemap: https://example.com/extra.xml');
expect(robotsTxt).not.toContain('sitemap-2.xml');
});

it('should include all sitemaps when includeNonIndexSitemaps is true', () => {
const options: SitemapOptions = {
baseUrl: 'https://example.com',
robotsTxtOptions: {
includeNonIndexSitemaps: true,
},
};
const generator = new TanStackRouterRobotGenerator(options);

const robotsTxt = generator.generateRobotsTxt([
'public/sitemap.xml',
'public/sitemap-2.xml',
]);

expect(robotsTxt).toContain('Sitemap: https://example.com/sitemap.xml');
expect(robotsTxt).toContain('Sitemap: https://example.com/sitemap-2.xml');
});
});

describe('generateRobotsTxt', () => {
it('should generate robots.txt string using helper', () => {
const options: SitemapOptions = { baseUrl: 'https://example.com' };

const robotsTxt = generateRobotsTxt(options, ['public/sitemap.xml']);

expect(robotsTxt).toContain('Sitemap: https://example.com/sitemap.xml');
});
});
3 changes: 3 additions & 0 deletions src/generator/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
export { TanStackRouterRobotGenerator } from './robots'

export { TanStackRouterSitemapGenerator } from './sitemap';
103 changes: 103 additions & 0 deletions src/generator/robots.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import type { IRobotPolicy, IRobotsTxt, SitemapOptions } from "../types";

export class TanStackRouterRobotGenerator {
private baseUrl: string;
private robotsTxtOptions: IRobotsTxt;

constructor(options: SitemapOptions) {
if (!options || !options.baseUrl || options.baseUrl.trim() === '') {
throw new Error('baseUrl is required and cannot be empty');
}

this.baseUrl = options.baseUrl.replace(/\/$/, '');
this.robotsTxtOptions = options.robotsTxtOptions || {};
}

generateRobotsTxt(sitemapPathsOrUrls: string[] = []): string {
const policies = this.resolvePolicies();
const lines: string[] = [];

policies.forEach((policy, index) => {
if (index > 0) lines.push('');
lines.push(`User-agent: ${policy.userAgent}`);

const allows = this.normalizeToArray(policy.allow);
allows.forEach((allow) => lines.push(`Allow: ${allow}`));

if (policy.disallow === '') {
lines.push('Disallow:');
} else {
const disallows = this.normalizeToArray(policy.disallow);
disallows.forEach((disallow) => lines.push(`Disallow: ${disallow}`));
}

if (policy.crawlDelay !== undefined) {
lines.push(`Crawl-delay: ${policy.crawlDelay}`);
}
});

const sitemapUrls = this.buildSitemapUrls(sitemapPathsOrUrls);
if (sitemapUrls.length > 0) {
if (lines.length > 0) lines.push('');
sitemapUrls.forEach((url) => lines.push(`Sitemap: ${url}`));
}

return `${lines.join('\n')}\n`;
}

private resolvePolicies(): IRobotPolicy[] {
if (this.robotsTxtOptions.policies?.length) {
return this.robotsTxtOptions.policies;
}

return [{ userAgent: '*', disallow: '' }];
}

private buildSitemapUrls(sitemapPathsOrUrls: string[]): string[] {
const includeAll = Boolean(this.robotsTxtOptions.includeNonIndexSitemaps);
const baseList = includeAll
? sitemapPathsOrUrls
: sitemapPathsOrUrls.slice(0, 1);
const combined = [...baseList, ...(this.robotsTxtOptions.additionalSitemaps || [])];

const resolved = combined
.map((value) => this.resolveSitemapUrl(value))
.filter((value): value is string => Boolean(value));

return Array.from(new Set(resolved));
}

private resolveSitemapUrl(pathOrUrl: string): string {
if (/^https?:\/\//i.test(pathOrUrl)) {
return pathOrUrl;
}

const normalizedPath = this.normalizeSitemapPath(pathOrUrl);
return `${this.baseUrl}${normalizedPath}`;
}

private normalizeSitemapPath(pathValue: string): string {
let normalized = pathValue.replace(/\\/g, '/');

if (normalized.startsWith('./')) {
normalized = normalized.slice(2);
}

if (normalized.startsWith('/public/')) {
normalized = normalized.slice('/public'.length);
} else if (normalized.startsWith('public/')) {
normalized = normalized.slice('public'.length);
}

if (!normalized.startsWith('/')) {
normalized = `/${normalized}`;
}

return normalized;
}

private normalizeToArray(value?: string | string[]): string[] {
if (!value) return [];
return Array.isArray(value) ? value : [value];
}
}
4 changes: 3 additions & 1 deletion src/generator.ts → src/generator/sitemap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import {
RouteInfo,
TanStackRoute,
ManualSitemapEntry,
} from './types';
} from '../types';

export class TanStackRouterSitemapGenerator {
private options: Required<Omit<SitemapOptions, 'manualRoutes'>> &
Expand All @@ -23,6 +23,8 @@ export class TanStackRouterSitemapGenerator {
trailingSlash: false,
lastmod: new Date().toISOString(),
prettyPrint: true,
generateRobotsTxt: false,
robotsTxtOptions: {},
...options,
};
}
Expand Down
5 changes: 4 additions & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
export { generateSitemap, generateSitemapEntries } from './sitemap-generator';
export { generateRobotsTxt } from './robots-generator';
export type {
SitemapOptions,
RouteInfo,
Expand All @@ -7,7 +8,9 @@ export type {
RouterTree,
AnyRoute,
ManualSitemapEntry,
IRobotPolicy,
IRobotsTxt,
} from './types';
export { TanStackRouterSitemapGenerator } from './generator';
export { TanStackRouterSitemapGenerator, TanStackRouterRobotGenerator } from './generator';
export { sitemapPlugin, createSitemapPlugin } from './plugin';
export type { SitemapPluginOptions } from './plugin';
Loading