diff --git a/benchmarks/GhostImporterBench.php b/benchmarks/GhostImporterBench.php new file mode 100644 index 0000000..4eef551 --- /dev/null +++ b/benchmarks/GhostImporterBench.php @@ -0,0 +1,102 @@ +sourceDir = sys_get_temp_dir() . '/yiipress-ghost-bench-source-' . uniqid(); + $this->targetDir = sys_get_temp_dir() . '/yiipress-ghost-bench-target-' . uniqid(); + mkdir($this->sourceDir, 0o755, true); + mkdir($this->targetDir, 0o755, true); + $this->sourceFile = $this->sourceDir . '/ghost.json'; + + $posts = []; + $postsTags = []; + for ($i = 1; $i <= 100; $i++) { + $posts[] = [ + 'id' => 'post-' . $i, + 'title' => 'Post ' . $i, + 'slug' => 'post-' . $i, + 'status' => 'published', + 'type' => 'post', + 'published_at' => '2024-03-15 10:30:00', + 'custom_excerpt' => 'Summary ' . $i . '.', + 'html' => '
Body ' . $i . '.
', + ]; + $postsTags[] = ['post_id' => 'post-' . $i, 'tag_id' => 'tag-php']; + } + + file_put_contents( + $this->sourceFile, + json_encode([ + 'db' => [[ + 'data' => [ + 'posts' => $posts, + 'tags' => [ + ['id' => 'tag-php', 'slug' => 'php', 'name' => 'PHP'], + ], + 'posts_tags' => $postsTags, + ], + ]], + ], JSON_THROW_ON_ERROR), + ); + + $this->importer = new GhostContentImporter(); + } + + public function tearDown(): void + { + $this->removeDir($this->sourceDir); + $this->removeDir($this->targetDir); + } + + #[Revs(10)] + #[Iterations(3)] + #[Warmup(1)] + public function benchImportPosts(): void + { + $this->removeDir($this->targetDir); + mkdir($this->targetDir, 0o755, true); + + $this->importer->import(['file' => $this->sourceFile], $this->targetDir, 'blog'); + } + + private function removeDir(string $path): void + { + if (!is_dir($path)) { + return; + } + + $iterator = new \RecursiveIteratorIterator( + new \RecursiveDirectoryIterator($path, \FilesystemIterator::SKIP_DOTS), + \RecursiveIteratorIterator::CHILD_FIRST, + ); + foreach ($iterator as $item) { + if ($item->isDir()) { + rmdir($item->getPathname()); + } else { + unlink($item->getPathname()); + } + } + + rmdir($path); + } +} diff --git a/config/common/di/importer.php b/config/common/di/importer.php index 477b2f3..51b6fd5 100644 --- a/config/common/di/importer.php +++ b/config/common/di/importer.php @@ -3,6 +3,7 @@ declare(strict_types=1); use YiiPress\Console\ImportCommand; +use YiiPress\Import\Ghost\GhostContentImporter; use YiiPress\Import\Telegram\TelegramContentImporter; $workingDirectory = getcwd() ?: dirname(__DIR__, 3); @@ -12,6 +13,7 @@ '__construct()' => [ 'rootPath' => $workingDirectory, 'importers' => [ + 'ghost' => new GhostContentImporter(), 'telegram' => new TelegramContentImporter(), ], ], diff --git a/docs/commands.md b/docs/commands.md index ed8768d..6d3122e 100644 --- a/docs/commands.md +++ b/docs/commands.md @@ -144,7 +144,7 @@ Imports content from external sources into a YiiPress collection. **Arguments:** -- `source` — source type to import from (required). Currently supported: `telegram`. +- `source` — source type to import from (required). Currently supported: `ghost`, `telegram`. **Common options:** @@ -191,6 +191,29 @@ Supports both single-chat exports (`result.json` with `messages` array) and full ./yiipress import telegram --directory=./telegram-data --content-dir=content ``` +### Ghost import + +Imports posts and pages from a Ghost JSON export file. Export your site from Ghost Admin via Settings > Labs > Export your content. + +**Importer options:** + +- `--file` — path to the Ghost `.json` export file (required). Absolute or relative to project root. + +The importer reads the standard `db[0].data` export structure and converts: + +- Ghost posts (`type = post`) into markdown files in the target collection. +- Ghost pages (`type = page`) into standalone markdown files in the content root. +- `title`, `slug`, `published_at`, `status`, `custom_excerpt`, `feature_image`, tags, authors, and `html` into YiiPress front matter and body content. + +Published posts are imported normally. Non-published posts and pages are imported with `draft: true`. Unsupported post types are skipped. Duplicate output filenames get numeric suffixes so earlier files are not overwritten. + +**Examples:** + +```bash +./yiipress import ghost --file=/path/to/ghost-export.json +./yiipress import ghost --file=./ghost.json --collection=blog +``` + ### Adding custom importers Importers implement `YiiPress\Import\ContentImporterInterface` and are registered via [Yii3 DI](https://yiisoft.github.io/docs/guide/concept/di-container.html) in `config/common/di/importer.php`. Each importer declares its own options via the `options()` method. See [Importing content](importing-content.md) for details. diff --git a/docs/importing-content.md b/docs/importing-content.md index ae12c12..1477a6b 100644 --- a/docs/importing-content.md +++ b/docs/importing-content.md @@ -60,6 +60,18 @@ Imports messages from a Telegram Desktop channel export (JSON format). See [commands.md](commands.md#yii-import) for usage details. +### GhostContentImporter + +Imports posts and pages from a Ghost JSON export. + +**Options:** + +- `--file` — Path to the Ghost `.json` export file (required) + +The importer converts Ghost posts into the selected YiiPress collection and Ghost pages into standalone content root markdown files. It preserves common metadata (`title`, date, draft status, excerpt summary, feature image, tags, and authors), keeps `html` as the markdown body, skips unsupported post types, and avoids overwriting duplicate output filenames. + +See [commands.md](commands.md#ghost-import) for usage details. + ## Writing a custom importer Create a class implementing `ContentImporterInterface`. Each importer declares its own options — a file-based importer might need a `directory`, while an API-based importer might need `url` and `api-key`. diff --git a/roadmap.md b/roadmap.md index f36c641..bf4ce98 100644 --- a/roadmap.md +++ b/roadmap.md @@ -110,5 +110,5 @@ - [ ] Jekyll - [ ] Hugo - [ ] Medium exported Markdown -- [ ] Ghost +- [x] Ghost - [x] Telegram export diff --git a/src/Import/Ghost/GhostContentImporter.php b/src/Import/Ghost/GhostContentImporter.php new file mode 100644 index 0000000..fd16e96 --- /dev/null +++ b/src/Import/Ghost/GhostContentImporter.php @@ -0,0 +1,501 @@ +exportData($data); + if ($export === null) { + return new ImportResult( + totalMessages: 0, + importedCount: 0, + importedFiles: [], + skippedFiles: [], + warnings: ["Ghost export data not found in $sourceFile"], + ); + } + + $posts = $this->list($export['posts'] ?? []); + $tagsByPost = $this->tagsByPost($export); + $authorsByPost = $this->authorsByPost($export); + + FileHelper::ensureDirectory($targetDirectory, 0o755); + + $collectionDir = $targetDirectory . '/' . $collection; + $importedFiles = []; + $skippedFiles = []; + $usedPaths = []; + $hasCollectionEntries = false; + + foreach ($posts as $post) { + $entry = $this->readPost($post, $tagsByPost, $authorsByPost); + if ($entry === null) { + $skippedFiles[] = $this->stringValue($post['id'] ?? null, $this->stringValue($post['slug'] ?? null)); + continue; + } + + $directory = $targetDirectory; + if ($entry['type'] === 'post') { + FileHelper::ensureDirectory($collectionDir, 0o755); + $directory = $collectionDir; + $hasCollectionEntries = true; + } + + $path = $this->uniquePath($directory, $this->filename($entry), $usedPaths); + file_put_contents($path, $this->buildMarkdownFile($entry)); + $importedFiles[] = $path; + } + + if ($hasCollectionEntries) { + $this->ensureCollectionConfig($collectionDir, $collection); + } + + return new ImportResult( + totalMessages: count($posts), + importedCount: count($importedFiles), + importedFiles: $importedFiles, + skippedFiles: $skippedFiles, + warnings: [], + ); + } + + public function name(): string + { + return 'ghost'; + } + + /** + * @param arrayHello from Ghost.
', + ], + [ + 'id' => 'page-1', + 'title' => 'About', + 'slug' => 'about', + 'status' => 'published', + 'type' => 'page', + 'published_at' => '2024-03-16 11:00:00', + 'html' => 'About page.
', + ], + ], + 'tags' => [ + ['id' => 'tag-1', 'slug' => 'php', 'name' => 'PHP'], + ], + 'posts_tags' => [ + ['post_id' => 'post-1', 'tag_id' => 'tag-1'], + ], + 'users' => [ + ['id' => 'author-1', 'slug' => 'jane-doe', 'name' => 'Jane Doe'], + ], + 'posts_authors' => [ + ['post_id' => 'post-1', 'author_id' => 'author-1'], + ], + ], + ]], + ], JSON_THROW_ON_ERROR)); + + $result = (new GhostContentImporter())->import(['file' => $this->sourceFile], $this->targetDir, 'blog'); + + assertSame(2, $result->totalMessages()); + assertSame(2, $result->importedCount()); + assertSame([], $result->warnings()); + + $post = file_get_contents($this->targetDir . '/blog/2024-03-15-hello-ghost.md'); + $this->assertNotFalse($post); + assertStringContainsString('title: "Hello: Ghost"', $post); + assertStringContainsString('date: 2024-03-15 10:30:00', $post); + assertStringContainsString('summary: Short summary.', $post); + assertStringContainsString('image: /content/images/hero.jpg', $post); + assertStringContainsString("tags:\n - php\n", $post); + assertStringContainsString("authors:\n - jane-doe\n", $post); + assertStringContainsString('Hello from Ghost.
', $post); + $this->assertFileExists($this->targetDir . '/blog/_collection.yaml'); + + $page = file_get_contents($this->targetDir . '/about.md'); + $this->assertNotFalse($page); + assertStringContainsString('title: About', $page); + assertStringContainsString('permalink: /about/', $page); + assertStringContainsString('About page.
', $page); + } + + public function testMarksDraftsAndSkipsUnsupportedPostTypes(): void + { + file_put_contents($this->sourceFile, json_encode([ + 'data' => [ + 'posts' => [ + [ + 'id' => 'draft-1', + 'title' => 'Draft Post', + 'slug' => 'draft-post', + 'status' => 'draft', + 'type' => 'post', + 'published_at' => '2024-04-01 09:00:00', + 'html' => 'Draft body.', + ], + [ + 'id' => 'unknown-1', + 'title' => 'Unknown', + 'slug' => 'unknown', + 'status' => 'published', + 'type' => 'custom', + ], + ], + ], + ], JSON_THROW_ON_ERROR)); + + $result = (new GhostContentImporter())->import(['file' => $this->sourceFile], $this->targetDir, 'blog'); + + assertSame(2, $result->totalMessages()); + assertSame(1, $result->importedCount()); + assertSame(['unknown-1'], $result->skippedFiles()); + + $post = file_get_contents($this->targetDir . '/blog/2024-04-01-draft-post.md'); + $this->assertNotFalse($post); + assertStringContainsString("draft: true\n", $post); + } + + public function testDoesNotOverwriteDuplicateSlugs(): void + { + file_put_contents($this->sourceFile, json_encode([ + 'posts' => [ + [ + 'id' => 'post-1', + 'title' => 'Duplicate', + 'slug' => 'duplicate', + 'status' => 'published', + 'type' => 'post', + 'published_at' => '2024-05-01 09:00:00', + 'html' => 'First.', + ], + [ + 'id' => 'post-2', + 'title' => 'Duplicate', + 'slug' => 'duplicate', + 'status' => 'published', + 'type' => 'post', + 'published_at' => '2024-05-01 10:00:00', + 'html' => 'Second.', + ], + ], + ], JSON_THROW_ON_ERROR)); + + $result = (new GhostContentImporter())->import(['file' => $this->sourceFile], $this->targetDir, 'blog'); + + assertSame(2, $result->importedCount()); + $this->assertFileExists($this->targetDir . '/blog/2024-05-01-duplicate.md'); + $this->assertFileExists($this->targetDir . '/blog/2024-05-01-duplicate-2.md'); + } + + public function testWarnsWhenFileIsMissing(): void + { + $result = (new GhostContentImporter())->import(['file' => $this->sourceFile], $this->targetDir, 'blog'); + + assertSame(0, $result->importedCount()); + assertCount(1, $result->warnings()); + assertStringContainsString('file option is required', $result->warnings()[0]); + } + + public function testWarnsWhenJsonIsInvalid(): void + { + file_put_contents($this->sourceFile, '{'); + + $result = (new GhostContentImporter())->import(['file' => $this->sourceFile], $this->targetDir, 'blog'); + + assertSame(0, $result->importedCount()); + assertCount(1, $result->warnings()); + assertStringContainsString('Invalid Ghost JSON', $result->warnings()[0]); + } + + private function removeDir(string $path): void + { + if (!is_dir($path)) { + return; + } + + $iterator = new RecursiveIteratorIterator( + new RecursiveDirectoryIterator($path, FilesystemIterator::SKIP_DOTS), + RecursiveIteratorIterator::CHILD_FIRST, + ); + foreach ($iterator as $item) { + /** @var SplFileInfo $item */ + if ($item->isDir()) { + rmdir($item->getPathname()); + } else { + unlink($item->getPathname()); + } + } + rmdir($path); + } +}