From 568961c25ebabbe7d8a030c73bd2147ab958d73d Mon Sep 17 00:00:00 2001 From: Alexander Makarov Date: Sat, 13 Jun 2026 13:10:42 +0300 Subject: [PATCH] Add Jekyll content importer --- benchmarks/JekyllImporterBench.php | 76 +++++ config/common/di/importer.php | 2 + docs/commands.md | 27 +- docs/importing-content.md | 12 + roadmap.md | 2 +- src/Import/Jekyll/JekyllContentImporter.php | 297 ++++++++++++++++++ tests/Unit/Console/ImportCommandTest.php | 16 + .../Unit/Import/JekyllContentImporterTest.php | 132 ++++++++ 8 files changed, 562 insertions(+), 2 deletions(-) create mode 100644 benchmarks/JekyllImporterBench.php create mode 100644 src/Import/Jekyll/JekyllContentImporter.php create mode 100644 tests/Unit/Import/JekyllContentImporterTest.php diff --git a/benchmarks/JekyllImporterBench.php b/benchmarks/JekyllImporterBench.php new file mode 100644 index 0000000..f2bcded --- /dev/null +++ b/benchmarks/JekyllImporterBench.php @@ -0,0 +1,76 @@ +sourceDir = sys_get_temp_dir() . '/yiipress-jekyll-bench-source-' . uniqid(); + $this->targetDir = sys_get_temp_dir() . '/yiipress-jekyll-bench-target-' . uniqid(); + mkdir($this->sourceDir . '/_posts', 0o755, true); + mkdir($this->targetDir, 0o755, true); + + for ($i = 1; $i <= 100; $i++) { + file_put_contents( + $this->sourceDir . '/_posts/2024-03-' . str_pad((string) (($i % 28) + 1), 2, '0', STR_PAD_LEFT) . '-post-' . $i . '.md', + "---\ntitle: Post $i\ntags: [php, yii]\n---\n\nBody $i.\n", + ); + } + + $this->importer = new JekyllContentImporter(); + } + + public function tearDown(): void + { + $this->removeDir($this->sourceDir); + $this->removeDir($this->targetDir); + } + + #[Revs(10)] + #[Iterations(3)] + #[Warmup(1)] + public function benchImportPosts(): void + { + $this->removeDir($this->targetDir); + mkdir($this->targetDir, 0o755, true); + + $this->importer->import(['directory' => $this->sourceDir], $this->targetDir, 'blog'); + } + + private function removeDir(string $path): void + { + if (!is_dir($path)) { + return; + } + + $iterator = new \RecursiveIteratorIterator( + new \RecursiveDirectoryIterator($path, \FilesystemIterator::SKIP_DOTS), + \RecursiveIteratorIterator::CHILD_FIRST, + ); + foreach ($iterator as $item) { + if ($item->isDir()) { + rmdir($item->getPathname()); + } else { + unlink($item->getPathname()); + } + } + + rmdir($path); + } +} diff --git a/config/common/di/importer.php b/config/common/di/importer.php index 477b2f3..87c1eef 100644 --- a/config/common/di/importer.php +++ b/config/common/di/importer.php @@ -3,6 +3,7 @@ declare(strict_types=1); use YiiPress\Console\ImportCommand; +use YiiPress\Import\Jekyll\JekyllContentImporter; use YiiPress\Import\Telegram\TelegramContentImporter; $workingDirectory = getcwd() ?: dirname(__DIR__, 3); @@ -12,6 +13,7 @@ '__construct()' => [ 'rootPath' => $workingDirectory, 'importers' => [ + 'jekyll' => new JekyllContentImporter(), 'telegram' => new TelegramContentImporter(), ], ], diff --git a/docs/commands.md b/docs/commands.md index ed8768d..ddb236b 100644 --- a/docs/commands.md +++ b/docs/commands.md @@ -144,7 +144,7 @@ Imports content from external sources into a YiiPress collection. **Arguments:** -- `source` — source type to import from (required). Currently supported: `telegram`. +- `source` — source type to import from (required). Currently supported: `jekyll`, `telegram`. **Common options:** @@ -191,6 +191,31 @@ Supports both single-chat exports (`result.json` with `messages` array) and full ./yiipress import telegram --directory=./telegram-data --content-dir=content ``` +### Jekyll import + +Imports Markdown posts from a Jekyll site directory. The importer reads `_posts/YYYY-MM-DD-slug.md` and `_posts/YYYY-MM-DD-slug.markdown` files, converts common front matter fields, and writes YiiPress markdown files into the target collection. + +**Importer options:** + +- `--directory` — path to the Jekyll site directory containing `_posts` (required). Absolute or relative to project root. + +The importer preserves: + +- `title` +- `date` +- `permalink` +- `tags` +- `categories` + +If `title` is missing, it is inferred from the first `# Heading` in the post body and then from the filename slug. + +**Examples:** + +```bash +./yiipress import jekyll --directory=/path/to/jekyll-site +./yiipress import jekyll --directory=../old-blog --collection=blog +``` + ### Adding custom importers Importers implement `YiiPress\Import\ContentImporterInterface` and are registered via [Yii3 DI](https://yiisoft.github.io/docs/guide/concept/di-container.html) in `config/common/di/importer.php`. Each importer declares its own options via the `options()` method. See [Importing content](importing-content.md) for details. diff --git a/docs/importing-content.md b/docs/importing-content.md index ae12c12..810dc5e 100644 --- a/docs/importing-content.md +++ b/docs/importing-content.md @@ -60,6 +60,18 @@ Imports messages from a Telegram Desktop channel export (JSON format). See [commands.md](commands.md#yii-import) for usage details. +### JekyllContentImporter + +Imports Markdown posts from a Jekyll site `_posts/` directory. + +**Options:** + +- `--directory` — Path to the Jekyll site directory containing `_posts` (required) + +The importer accepts `.md` and `.markdown` posts named `YYYY-MM-DD-slug`, preserves common front matter (`title`, `date`, `permalink`, `tags`, `categories`), and creates a default collection config when one does not exist. + +See [commands.md](commands.md#jekyll-import) for usage details. + ## Writing a custom importer Create a class implementing `ContentImporterInterface`. Each importer declares its own options — a file-based importer might need a `directory`, while an API-based importer might need `url` and `api-key`. diff --git a/roadmap.md b/roadmap.md index f36c641..28b5903 100644 --- a/roadmap.md +++ b/roadmap.md @@ -107,7 +107,7 @@ ## Priority 9: Data importers - [ ] WordPress -- [ ] Jekyll +- [x] Jekyll - [ ] Hugo - [ ] Medium exported Markdown - [ ] Ghost diff --git a/src/Import/Jekyll/JekyllContentImporter.php b/src/Import/Jekyll/JekyllContentImporter.php new file mode 100644 index 0000000..48f1034 --- /dev/null +++ b/src/Import/Jekyll/JekyllContentImporter.php @@ -0,0 +1,297 @@ +postFiles($postsDirectory); + $importedFiles = []; + $skippedFiles = []; + $warnings = []; + + foreach ($postFiles as $postFile) { + $post = $this->readPost($postFile); + if ($post === null) { + $skippedFiles[] = $postFile; + $warnings[] = 'Skipped unsupported Jekyll post filename: ' . basename($postFile); + continue; + } + + [$date, $slug, $fields, $body] = $post; + $targetPath = $collectionDir . '/' . $date . '-' . $slug . '.md'; + file_put_contents($targetPath, $this->buildMarkdownFile($date, $slug, $fields, $body)); + $importedFiles[] = $targetPath; + } + + $this->ensureCollectionConfig($collectionDir, $collection); + + return new ImportResult( + totalMessages: count($postFiles), + importedCount: count($importedFiles), + importedFiles: $importedFiles, + skippedFiles: $skippedFiles, + warnings: $warnings, + ); + } + + public function name(): string + { + return 'jekyll'; + } + + /** + * @return list + */ + private function postFiles(string $postsDirectory): array + { + $files = []; + $iterator = new RecursiveIteratorIterator( + new RecursiveDirectoryIterator($postsDirectory, FilesystemIterator::SKIP_DOTS), + ); + + foreach ($iterator as $item) { + /** @var SplFileInfo $item */ + if (!$item->isFile()) { + continue; + } + + $extension = strtolower($item->getExtension()); + if ($extension === 'md' || $extension === 'markdown') { + $files[] = $item->getPathname(); + } + } + + sort($files); + + return $files; + } + + /** + * @return array{0: string, 1: string, 2: array, 3: string}|null + */ + private function readPost(string $postFile): ?array + { + $filename = pathinfo($postFile, PATHINFO_FILENAME); + if (!preg_match('/^(\d{4}-\d{2}-\d{2})-(.+)$/', $filename, $matches)) { + return null; + } + + $content = file_get_contents($postFile); + if ($content === false) { + return null; + } + + [$fields, $body] = $this->splitFrontMatter($content); + + return [$matches[1], $matches[2], $fields, trim($body) . "\n"]; + } + + /** + * @return array{0: array, 1: string} + */ + private function splitFrontMatter(string $content): array + { + $content = str_replace("\r\n", "\n", $content); + if (!str_starts_with($content, "---\n")) { + return [[], $content]; + } + + $endPosition = strpos($content, "\n---\n", 4); + if ($endPosition === false) { + return [[], $content]; + } + + $data = yaml_parse(substr($content, 4, $endPosition - 4)); + $body = substr($content, $endPosition + 5); + + if (!is_array($data)) { + return [[], $body]; + } + + $fields = []; + foreach ($data as $key => $value) { + if (is_string($key)) { + $fields[$key] = $value; + } + } + + return [$fields, $body]; + } + + /** + * @param array $fields + */ + private function buildMarkdownFile(string $date, string $slug, array $fields, string $body): string + { + $title = $this->stringField($fields['title'] ?? null); + if ($title === '') { + $title = $this->titleFromBody($body) ?: ucwords(str_replace(['-', '_'], ' ', $slug)); + } + + $frontMatter = "---\n"; + $frontMatter .= 'title: ' . $this->yamlEscape($title) . "\n"; + $frontMatter .= 'date: ' . $this->stringField($fields['date'] ?? null, $date) . "\n"; + + $permalink = $this->stringField($fields['permalink'] ?? null); + if ($permalink !== '') { + $frontMatter .= 'permalink: ' . $this->yamlEscape($permalink) . "\n"; + } + + $tags = $this->listField($fields['tags'] ?? []); + if ($tags !== []) { + $frontMatter .= "tags:\n"; + foreach ($tags as $tag) { + $frontMatter .= ' - ' . $this->yamlEscape($tag) . "\n"; + } + } + + $categories = $this->listField($fields['categories'] ?? []); + if ($categories !== []) { + $frontMatter .= "categories:\n"; + foreach ($categories as $category) { + $frontMatter .= ' - ' . $this->yamlEscape($category) . "\n"; + } + } + + return $frontMatter . "---\n\n" . $body; + } + + private function titleFromBody(string $body): string + { + return preg_match('/^#\s+(.+)$/m', $body, $matches) === 1 ? trim($matches[1]) : ''; + } + + private function stringField(mixed $value, string $default = ''): string + { + if ($value === null) { + return $default; + } + + return trim((string) $value); + } + + /** + * @return list + */ + private function listField(mixed $value): array + { + if (is_array($value)) { + return array_values(array_filter(array_map(static fn (mixed $item): string => trim((string) $item), $value))); + } + + $value = trim((string) $value); + if ($value === '') { + return []; + } + + $items = str_ends_with($value, ',') || str_contains($value, ',') + ? explode(',', $value) + : (preg_split('/\s+/', $value) ?: []); + + return array_values(array_filter(array_map(static fn (string $item): string => trim($item), $items))); + } + + private function ensureCollectionConfig(string $collectionDir, string $collection): void + { + $configPath = $collectionDir . '/_collection.yaml'; + if (is_file($configPath)) { + return; + } + + $config = 'title: ' . ucfirst($collection) . "\n"; + $config .= "sort_by: date\n"; + $config .= "sort_order: desc\n"; + $config .= "entries_per_page: 10\n"; + $config .= "feed: true\n"; + + file_put_contents($configPath, $config); + } + + private function yamlEscape(string $value): string + { + if (preg_match('/[:#\[\]{}|>&*!,\'"%@`]/', $value) === 1) { + return '"' . addcslashes($value, '"\\') . '"'; + } + + return $value; + } +} diff --git a/tests/Unit/Console/ImportCommandTest.php b/tests/Unit/Console/ImportCommandTest.php index a53cdb1..d78be9e 100644 --- a/tests/Unit/Console/ImportCommandTest.php +++ b/tests/Unit/Console/ImportCommandTest.php @@ -103,11 +103,27 @@ public function testImportsToCustomCollection(): void assertStringContainsString('Imported: 1', $result['output']); } + public function testImportsJekyllSite(): void + { + mkdir($this->sourceDir . '/_posts'); + file_put_contents( + $this->sourceDir . '/_posts/2024-03-15-hello-jekyll.md', + "---\ntitle: Hello Jekyll\n---\n\nBody.\n", + ); + + $result = $this->runImport('jekyll', ['--directory' => $this->sourceDir]); + + assertSame(0, $result['exitCode'], $result['output']); + assertStringContainsString('Importing from jekyll', $result['output']); + assertStringContainsString('Imported: 1', $result['output']); + } + public function testShowsAvailableImportersOnError(): void { $result = $this->runImport('wordpress', ['--directory' => $this->sourceDir]); assertSame(65, $result['exitCode']); + assertStringContainsString('jekyll', $result['output']); assertStringContainsString('telegram', $result['output']); } diff --git a/tests/Unit/Import/JekyllContentImporterTest.php b/tests/Unit/Import/JekyllContentImporterTest.php new file mode 100644 index 0000000..ee7a622 --- /dev/null +++ b/tests/Unit/Import/JekyllContentImporterTest.php @@ -0,0 +1,132 @@ +sourceDir = sys_get_temp_dir() . '/yiipress-jekyll-source-' . uniqid(); + $this->targetDir = sys_get_temp_dir() . '/yiipress-jekyll-target-' . uniqid(); + mkdir($this->sourceDir . '/_posts', 0o755, true); + mkdir($this->targetDir, 0o755, true); + } + + protected function tearDown(): void + { + $this->removeDir($this->sourceDir); + $this->removeDir($this->targetDir); + } + + public function testImportsJekyllPosts(): void + { + file_put_contents( + $this->sourceDir . '/_posts/2024-03-15-hello-jekyll.md', + "---\n" + . "layout: post\n" + . "title: \"Hello: Jekyll\"\n" + . "date: \"2024-03-15 10:30:00\"\n" + . "tags: [php, yii]\n" + . "categories: docs guides\n" + . "permalink: /custom/hello/\n" + . "---\n\n" + . "Body text.\n", + ); + + $result = (new JekyllContentImporter())->import(['directory' => $this->sourceDir], $this->targetDir, 'blog'); + + assertSame(1, $result->importedCount()); + assertSame(1, $result->totalMessages()); + assertSame([], $result->warnings()); + + $targetFile = $this->targetDir . '/blog/2024-03-15-hello-jekyll.md'; + $content = file_get_contents($targetFile); + $this->assertNotFalse($content); + assertStringContainsString('title: "Hello: Jekyll"', $content); + assertStringContainsString('date: 2024-03-15 10:30:00', $content); + assertStringContainsString('permalink: /custom/hello/', $content); + assertStringContainsString("tags:\n - php\n - yii\n", $content); + assertStringContainsString("categories:\n - docs\n - guides\n", $content); + assertStringContainsString("Body text.\n", $content); + $this->assertFileExists($this->targetDir . '/blog/_collection.yaml'); + } + + public function testDerivesTitleFromHeadingWhenFrontMatterTitleIsMissing(): void + { + file_put_contents( + $this->sourceDir . '/_posts/2024-04-01-heading-title.markdown', + "---\n" + . "tags: php yii\n" + . "---\n\n" + . "# Heading Title\n\n" + . "Body.\n", + ); + + $result = (new JekyllContentImporter())->import(['directory' => $this->sourceDir], $this->targetDir, 'blog'); + + assertSame(1, $result->importedCount()); + $content = file_get_contents($this->targetDir . '/blog/2024-04-01-heading-title.md'); + $this->assertNotFalse($content); + assertStringContainsString('title: Heading Title', $content); + assertStringContainsString("tags:\n - php\n - yii\n", $content); + } + + public function testSkipsUnsupportedPostFilenames(): void + { + file_put_contents($this->sourceDir . '/_posts/not-dated.md', "---\ntitle: Bad\n---\n\nBody.\n"); + + $result = (new JekyllContentImporter())->import(['directory' => $this->sourceDir], $this->targetDir, 'blog'); + + assertSame(0, $result->importedCount()); + assertCount(1, $result->skippedFiles()); + assertCount(1, $result->warnings()); + } + + public function testWarnsWhenPostsDirectoryIsMissing(): void + { + $this->removeDir($this->sourceDir . '/_posts'); + + $result = (new JekyllContentImporter())->import(['directory' => $this->sourceDir], $this->targetDir, 'blog'); + + assertSame(0, $result->importedCount()); + assertCount(1, $result->warnings()); + assertStringContainsString('_posts directory not found', $result->warnings()[0]); + } + + private function removeDir(string $path): void + { + if (!is_dir($path)) { + return; + } + + $iterator = new RecursiveIteratorIterator( + new RecursiveDirectoryIterator($path, FilesystemIterator::SKIP_DOTS), + RecursiveIteratorIterator::CHILD_FIRST, + ); + foreach ($iterator as $item) { + /** @var SplFileInfo $item */ + if ($item->isDir()) { + rmdir($item->getPathname()); + } else { + unlink($item->getPathname()); + } + } + rmdir($path); + } +}