From ef5d7227ef2c340b5451939cb05534e54a859d08 Mon Sep 17 00:00:00 2001 From: Alexander Makarov Date: Sat, 13 Jun 2026 12:07:46 +0300 Subject: [PATCH] Generate llms.txt --- docs/commands.md | 3 +- docs/configuration.md | 2 + roadmap.md | 1 + src/Build/LlmsTxtGenerator.php | 102 ++++++++++ src/Console/BuildCommand.php | 11 ++ src/Content/Model/SiteConfig.php | 1 + src/Content/Parser/SiteConfigParser.php | 1 + tests/Unit/Build/LlmsTxtGeneratorTest.php | 177 ++++++++++++++++++ .../Content/Parser/SiteConfigParserTest.php | 13 ++ 9 files changed, 310 insertions(+), 1 deletion(-) create mode 100644 src/Build/LlmsTxtGenerator.php create mode 100644 tests/Unit/Build/LlmsTxtGeneratorTest.php diff --git a/docs/commands.md b/docs/commands.md index ed8768d..d6183f6 100644 --- a/docs/commands.md +++ b/docs/commands.md @@ -54,7 +54,8 @@ The command: 7. Generates Atom (`feed.xml`) and RSS 2.0 (`rss.xml`) feeds for each collection with `feed: true`, capped by collection `feed_limit` (`20` by default, `0` for unlimited). 8. Generates paginated collection listing pages (e.g., `/blog/`, `/blog/page/2/`) for collections with `listing: true`. 9. Generates `sitemap.xml` containing all entry URLs, standalone page URLs, collection listing URLs, and the home page. -10. Generates taxonomy pages for each taxonomy defined in `config.yaml` (e.g., `/tags/`, `/tags/php/`, `/categories/`). +10. Generates `llms.txt` when `llms_txt` is enabled. +11. Generates taxonomy pages for each taxonomy defined in `config.yaml` (e.g., `/tags/`, `/tags/php/`, `/categories/`). With `--workers=N` (N > 1), entry rendering and writing is parallelized across N forked processes. With `--workers=auto`, YiiPress uses up to the detected worker count and lets page writers clamp back to sequential mode for smaller workloads. Feeds are generated after entry writing and can be split per collection across workers. Sitemap generation remains serial. diff --git a/docs/configuration.md b/docs/configuration.md index bdce7f4..ac70df0 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -45,6 +45,7 @@ assets: fingerprint: true minify: true +llms_txt: true last_updated: true edit_page: https://github.com/example/mysite/edit/main/content/{path} @@ -75,6 +76,7 @@ editor: code - **search** — opt-in client-side search (see below) - **related** — opt-in related content suggestions (see below) - **minify** — minify generated HTML output (default: `true`); set to `false` to keep rendered template whitespace +- **llms_txt** — generate `llms.txt`, an LLM-friendly Markdown index of site content (default: `true`) - **last_updated** — set to `true` to show each entry source file's last modification time below its content (default: `false`) - **edit_page** — URL template for an optional "Edit this page" link below entry content (see below) - **report_issue** — URL template for an optional "Report an issue" link below entry content (see below) diff --git a/roadmap.md b/roadmap.md index f36c641..8fba85d 100644 --- a/roadmap.md +++ b/roadmap.md @@ -83,6 +83,7 @@ - [x] Open Graph / meta tag helpers - [x] Canonical URL support - [x] Configurable `robots.txt` generation +- [x] Generate `llms.txt` for LLM-friendly site indexes - [x] Redirect support (e.g., when changing permalinks, output redirect HTML or config) - [x] Root-relative redirects resolve against deployment paths from `base_url` - [x] 404 page in static build output for static hosting providers (Netlify, GitHub Pages, etc.) diff --git a/src/Build/LlmsTxtGenerator.php b/src/Build/LlmsTxtGenerator.php new file mode 100644 index 0000000..6530c39 --- /dev/null +++ b/src/Build/LlmsTxtGenerator.php @@ -0,0 +1,102 @@ + $collections + * @param array> $entriesByCollection + * @param list $standalonePages + */ + public function generate( + SiteConfig $siteConfig, + array $collections, + array $entriesByCollection, + string $outputDir, + array $standalonePages = [], + bool $noWrite = false, + ): string { + if (!$siteConfig->llmsTxt) { + return ''; + } + + $lines = ['# ' . self::plainText($siteConfig->title)]; + $description = self::plainText($siteConfig->description); + if ($description !== '') { + $lines[] = ''; + $lines[] = $description; + } + + foreach ($collections as $collectionName => $collection) { + $items = $entriesByCollection[$collectionName] ?? []; + if ($items === []) { + continue; + } + + $lines[] = ''; + $lines[] = '## ' . self::plainText($collection->title !== '' ? $collection->title : $collectionName); + foreach ($items as $entry) { + $lines[] = $this->entryLine( + $siteConfig, + $entry, + PermalinkResolver::resolve($entry, $collection, $siteConfig->i18n), + ); + } + } + + if ($standalonePages !== []) { + $lines[] = ''; + $lines[] = '## Pages'; + foreach ($standalonePages as $page) { + $basePermalink = $page->permalink !== '' ? $page->permalink : '/' . $page->slug . '/'; + $lines[] = $this->entryLine( + $siteConfig, + $page, + PermalinkResolver::applyLanguagePrefix($basePermalink, $page->language, $siteConfig->i18n), + ); + } + } + + $content = implode("\n", $lines) . "\n"; + if (!$noWrite) { + file_put_contents($outputDir . '/llms.txt', $content); + } + + return $content; + } + + private function entryLine(SiteConfig $siteConfig, Entry $entry, string $permalink): string + { + $line = '- [' . self::linkText($entry->title) . '](' . UrlResolver::absoluteUrl($siteConfig, $permalink) . ')'; + $summary = self::plainText($entry->summary()); + if ($summary !== '') { + $line .= ': ' . $summary; + } + + return $line; + } + + private static function plainText(string $text): string + { + return trim((string) preg_replace('/\s+/', ' ', $text)); + } + + private static function linkText(string $text): string + { + return str_replace(['\\', '[', ']'], ['\\\\', '\[', '\]'], self::plainText($text)); + } +} diff --git a/src/Console/BuildCommand.php b/src/Console/BuildCommand.php index 4e1e959..7230e6b 100644 --- a/src/Console/BuildCommand.php +++ b/src/Console/BuildCommand.php @@ -13,6 +13,7 @@ use YiiPress\Build\CollectionListingWriter; use YiiPress\Build\ContentAssetCopier; use YiiPress\Build\DateArchiveWriter; +use YiiPress\Build\LlmsTxtGenerator; use YiiPress\Build\NotFoundPageWriter; use YiiPress\Build\NavigationPager; use YiiPress\Build\RedirectPageWriter; @@ -790,6 +791,12 @@ function (array $feedTask) use ($siteConfig, $outputDir, $authors, $noWrite): in $output->writeln(' Search index generated.'); } + if ($siteConfig->llmsTxt) { + $llmsTxtGenerator = new LlmsTxtGenerator(); + $llmsTxtGenerator->generate($siteConfig, $collections, $entriesByCollection, $outputDir, $standalonePages, $noWrite); + $output->writeln(' llms.txt generated.'); + } + if ($siteConfig->taxonomies !== []) { $profile->switchTo('write taxonomy pages'); $allEntries = array_merge(...array_values($entriesByCollection)); @@ -1163,6 +1170,10 @@ private function dryRun( $files[] = $outputDir . '/search-index.json'; } + if ($siteConfig->llmsTxt) { + $files[] = $outputDir . '/llms.txt'; + } + if ($siteConfig->taxonomies !== []) { $allEntries = []; foreach ($collections as $collectionName => $collection) { diff --git a/src/Content/Model/SiteConfig.php b/src/Content/Model/SiteConfig.php index 904fec6..a6ea281 100644 --- a/src/Content/Model/SiteConfig.php +++ b/src/Content/Model/SiteConfig.php @@ -38,5 +38,6 @@ public function __construct( public ?string $reportIssueUrl = null, public bool $authorPages = false, public bool $minify = true, + public bool $llmsTxt = true, ) {} } diff --git a/src/Content/Parser/SiteConfigParser.php b/src/Content/Parser/SiteConfigParser.php index 604ae52..8731f36 100644 --- a/src/Content/Parser/SiteConfigParser.php +++ b/src/Content/Parser/SiteConfigParser.php @@ -89,6 +89,7 @@ public function parse(string $filePath): SiteConfig reportIssueUrl: self::parseOptionalString($data['report_issue'] ?? null), authorPages: (bool) ($data['author_pages'] ?? false), minify: (bool) ($data['minify'] ?? true), + llmsTxt: (bool) ($data['llms_txt'] ?? true), ); } diff --git a/tests/Unit/Build/LlmsTxtGeneratorTest.php b/tests/Unit/Build/LlmsTxtGeneratorTest.php new file mode 100644 index 0000000..38ceb59 --- /dev/null +++ b/tests/Unit/Build/LlmsTxtGeneratorTest.php @@ -0,0 +1,177 @@ +outputDir = sys_get_temp_dir() . '/yiipress-llms-test-' . uniqid(); + mkdir($this->outputDir, 0o755, true); + + $this->tempFile = sys_get_temp_dir() . '/yiipress-llms-body-' . uniqid() . '.md'; + file_put_contents($this->tempFile, "Body content.\n"); + } + + protected function tearDown(): void + { + if (is_file($this->tempFile)) { + unlink($this->tempFile); + } + + if (is_dir($this->outputDir)) { + $iterator = new RecursiveIteratorIterator( + new RecursiveDirectoryIterator($this->outputDir, FilesystemIterator::SKIP_DOTS), + RecursiveIteratorIterator::CHILD_FIRST, + ); + foreach ($iterator as $item) { + /** @var SplFileInfo $item */ + if ($item->isDir()) { + rmdir($item->getPathname()); + } else { + unlink($item->getPathname()); + } + } + rmdir($this->outputDir); + } + } + + public function testGeneratesLlmsTxtFile(): void + { + $generator = new LlmsTxtGenerator(); + $content = $generator->generate($this->createSiteConfig(), [], [], $this->outputDir); + + assertFileExists($this->outputDir . '/llms.txt'); + assertSame($content, file_get_contents($this->outputDir . '/llms.txt')); + assertStringContainsString("# Test Site\n\nA test site\n", $content); + } + + public function testDoesNotGenerateWhenDisabled(): void + { + $generator = new LlmsTxtGenerator(); + $content = $generator->generate($this->createSiteConfig(llmsTxt: false), [], [], $this->outputDir); + + assertSame('', $content); + assertFileDoesNotExist($this->outputDir . '/llms.txt'); + } + + public function testIncludesCollectionEntriesAndStandalonePages(): void + { + $generator = new LlmsTxtGenerator(); + $collection = $this->createCollection(); + $entry = $this->createEntry(title: 'Hello World', slug: 'hello-world', summary: 'Intro text.'); + $page = $this->createEntry(title: 'About Us', slug: 'about', permalink: '/about/', summary: 'About the project.'); + + $content = $generator->generate( + $this->createSiteConfig(), + ['blog' => $collection], + ['blog' => [$entry]], + $this->outputDir, + [$page], + noWrite: true, + ); + + assertStringContainsString("## Blog\n- [Hello World](https://example.com/blog/hello-world/): Intro text.", $content); + assertStringContainsString("## Pages\n- [About Us](https://example.com/about/): About the project.", $content); + assertFileDoesNotExist($this->outputDir . '/llms.txt'); + } + + public function testEscapesMarkdownLinkText(): void + { + $generator = new LlmsTxtGenerator(); + $collection = $this->createCollection(); + $entry = $this->createEntry(title: 'A [bracketed] \\ title', slug: 'bracketed'); + + $content = $generator->generate( + $this->createSiteConfig(), + ['blog' => $collection], + ['blog' => [$entry]], + $this->outputDir, + noWrite: true, + ); + + assertStringContainsString('- [A \[bracketed\] \\\\ title](https://example.com/blog/bracketed/)', $content); + } + + private function createSiteConfig(bool $llmsTxt = true): SiteConfig + { + return new SiteConfig( + title: 'Test Site', + description: 'A test site', + baseUrl: 'https://example.com', + defaultLanguage: 'en', + charset: 'UTF-8', + defaultAuthor: '', + dateFormat: 'Y-m-d', + entriesPerPage: 10, + permalink: '/:collection/:slug/', + taxonomies: [], + params: [], + llmsTxt: $llmsTxt, + ); + } + + private function createCollection(): Collection + { + return new Collection( + name: 'blog', + title: 'Blog', + description: '', + permalink: '/:collection/:slug/', + sortBy: 'date', + sortOrder: 'desc', + entriesPerPage: 10, + feed: true, + listing: true, + ); + } + + private function createEntry( + string $title, + string $slug, + string $summary = '', + string $permalink = '', + ): Entry { + return new Entry( + filePath: $this->tempFile, + collection: 'blog', + slug: $slug, + title: $title, + date: null, + draft: false, + tags: [], + categories: [], + authors: [], + summary: $summary, + permalink: $permalink, + layout: '', + theme: '', + weight: 0, + language: '', + redirectTo: '', + extra: [], + bodyOffset: 0, + bodyLength: (int) filesize($this->tempFile), + ); + } +} diff --git a/tests/Unit/Content/Parser/SiteConfigParserTest.php b/tests/Unit/Content/Parser/SiteConfigParserTest.php index 9f8edb6..f7da856 100644 --- a/tests/Unit/Content/Parser/SiteConfigParserTest.php +++ b/tests/Unit/Content/Parser/SiteConfigParserTest.php @@ -54,6 +54,19 @@ public function testParseMinifyConfigCanDisableOutputMinification(): void unlink($filePath); } + public function testParseLlmsTxtConfigCanDisableGeneration(): void + { + $filePath = sys_get_temp_dir() . '/yiipress-site-config-' . uniqid() . '.yaml'; + file_put_contents($filePath, "title: Test\nlanguages: [en]\nllms_txt: false\n"); + + $parser = new SiteConfigParser(); + $config = $parser->parse($filePath); + + assertFalse($config->llmsTxt); + + unlink($filePath); + } + public function testParseAssetConfigCanDisableFingerprinting(): void { $filePath = sys_get_temp_dir() . '/yiipress-site-config-' . uniqid() . '.yaml';