From 8d86ad7da182413c14c6a7f13df388855a92cb47 Mon Sep 17 00:00:00 2001 From: ArdaxHz <70710586+ArdaxHz@users.noreply.github.com> Date: Wed, 20 May 2026 07:39:00 +0100 Subject: [PATCH 1/3] fix extension sync --- .github/workflows/extension-tests.yml | 41 +++ README.md | 402 ++++++++++++++++++-------- src/mangaplus/manifest.json | 30 ++ sync_extensions.py | 125 +++++++- tools/smoke_load.py | 128 ++++++++ tools/validate_manifests.py | 100 +++++++ 6 files changed, 692 insertions(+), 134 deletions(-) create mode 100644 .github/workflows/extension-tests.yml create mode 100644 src/mangaplus/manifest.json create mode 100644 tools/smoke_load.py create mode 100644 tools/validate_manifests.py diff --git a/.github/workflows/extension-tests.yml b/.github/workflows/extension-tests.yml new file mode 100644 index 00000000..abc3d264 --- /dev/null +++ b/.github/workflows/extension-tests.yml @@ -0,0 +1,41 @@ +name: Extension Smoke Test + +on: + push: + branches: [main, dev] + pull_request: + branches: [main, dev] + +permissions: + contents: read + +jobs: + smoke: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12"] + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: pip + + - name: Install all extension requirements + run: | + python -m pip install --upgrade pip + # The base `publoader` package isn't on PyPI; the smoke test only + # imports each extension's .py and the manifest validator, + # neither of which import publoader.* at module level by the time + # the new publoader.api shim has landed. + find ./src -mindepth 2 -maxdepth 2 -name requirements.txt \ + -exec pip install --no-cache-dir -r {} + + + - name: Validate manifests + run: python tools/validate_manifests.py + + - name: Smoke-load each extension + run: python tools/smoke_load.py diff --git a/README.md b/README.md index 190a85c3..97101fd7 100644 --- a/README.md +++ b/README.md @@ -1,195 +1,351 @@ -# Contributing +# publoader-extensions -This guide have some instructions and tips on how to create a new publisher extension. Please **read it carefully** if you're a new contributor or don't have any experience on the required languages and knowledges. +Source-of-truth repo for the public publisher extensions consumed by +[publoader](https://github.com/publoader/publoader). The repo is also packaged +as a Docker image (`ardax/publoader-extensions`) that ships every `src//` +tree into the main container's `extensions` volume on startup. -This guide is not definitive, and it's being updated over time. If you find any issue on it, feel free to open an issue or fix it directly yourself by opening a PR. +## Layout -# Prerequisites +``` +src/ + / + .py # entrypoint module — must match the directory name + manifest.json # metadata + permissions (see below) + manga_id_map.json # MangaDex id ↔ publisher id mapping + override_options.json # optional manual overrides + requirements.txt # extension-specific deps +schedule.json # daily run timings +sync_extensions.py # used by the Docker sidecar — do not invoke manually +tools/ + validate_manifests.py # CI: schema-checks every manifest + smoke_load.py # CI: imports each entrypoint under publoader.api +.github/workflows/ # CI pipeline +``` -Before you start, please note that the ability to use following technologies is **required** and that existing contributors will not actively teach them to you. +## Prerequisites -- [Python 3.9+](https://www.python.org/) -- Any other scraper you need to use. +You should be comfortable with: -# Writing an extension +- [Python 3.10+](https://www.python.org/) +- HTTP scraping / API consumption for the publisher you're targeting -The quickest way to get started is to copy an existing extension's folder structure and renaming it as needed. We also recommend reading through a few existing extensions' code before you start. +This guide is updated over time. If something is wrong or unclear, open an +issue or a PR. -**You are responsible for implementing rate-limiting for your extension yourself.** +## Writing an extension -## Setting up your extension directory +The fastest start is to copy an existing extension directory (e.g. `mangaplus`) +and rename it. Read through a working extension before starting your own — the +shape of the `Extension` class and how it returns chapters is best learned by +example. -Each extension should reside in `/src/`. +**You are responsible for rate-limiting your own extension.** The runner won't +throttle you on the publisher's behalf. -**`extension_name` can only be ascii, lowercase and not contain punctuation or spaces, except `_`. Your extension will not run if the extension directory name (and with extension, the main-file name) are not valid.** +### Naming -## Extension directory structure +`` must be lowercase ASCII with no punctuation other than `_`. The +directory name, the entrypoint filename (without `.py`), and the +`manifest.json` `name` field must all match. The runner skips anything that +doesn't satisfy this. -The simplest extension structure looks like this: +### `manifest.json` +Every extension must ship a `manifest.json` next to its entrypoint. CI +(`tools/validate_manifests.py`) rejects PRs where this file is missing or +malformed. Minimum shape: + +```json +{ + "name": "mangaplus", + "version": "0.2.04", + "publoader_api": "^1.0.0", + "entrypoint": "mangaplus.py", + "class_name": "Extension", + "mangadex_group_id": "4f1de6a2-f0c5-4ac5-bce5-02c7dbb67deb", + "languages": ["en", "es"], + "allowed_hosts": [ + "jumpg-webapi.tokyo-cdn.com", + "mangaplus.shueisha.co.jp" + ], + "permissions": { + "network": true, + "filesystem_read": ["manga_id_map.json", "override_options.json"], + "filesystem_write": [], + "subprocess": false + }, + "schedule": { + "hour": 15, + "minute": 5, + "timezone": "UTC" + }, + "data_files": { + "manga_id_map": "manga_id_map.json", + "override_options": "override_options.json" + }, + "maintainers": ["your-github-handle"], + "homepage": "https://example.com/" +} ``` -/src/ -├── .py -├── manga_id_map.json -├── override_options.json -├── requirements.txt -└── -``` -#### .py -This is the entry point for your extension. The name of the file should match the name of the extension directory. +| Field | Meaning | +| --- | --- | +| `name` | Must equal the directory and entrypoint stem. | +| `version` | Free-form semver. Bump when you ship behaviour changes. | +| `publoader_api` | Compatible publoader API range. Use `^1.0.0` until you have a reason to pin tighter. | +| `entrypoint` | Module that contains the `Extension` class. | +| `class_name` | Must be `Extension`. | +| `mangadex_group_id` | UUID of the scanlation group the chapters are uploaded under. | +| `languages` | ISO codes the extension produces. | +| `allowed_hosts` | Hostnames your network code talks to. Used for documentation and review — actual egress is not yet enforced. | +| `permissions` | Declares your runtime needs. `subprocess` should be `false` (the AST scanner rejects subprocess use anyway). | +| `schedule` | Optional default schedule. The base repo's `schedule.json` and the per-extension DB override in publoader take precedence in that order. | +| `data_files` | Names of the per-extension data files the runner exposes. | +| `maintainers` | List of GitHub handles responsible for the extension. | +| `homepage` | Public site for the publisher (used in error messages and chapter cards). | + +### `manga_id_map.json` + +Maps MangaDex manga UUIDs to the publisher's internal identifiers. The schema +varies per publisher — pick whichever fits your data: + +```json +// uuid_to_list — one MangaDex id covers many publisher ids +{"333f4d22-7753-4e3b-b0da-0a69b2cdce4f": ["100001", "200008"]} -#### manga_id_map.json -Can be any name. The MangaDex id to the publisher site's manga ids, or whatever id you will use to associate a chapter to a manga. -The structure of the file can be whatever you want, however you need to provide a list of tracked MangaDex manga ids. +// uuid_to_string — one-to-one +{"333f4d22-7753-4e3b-b0da-0a69b2cdce4f": "100001"} + +// id_to_uuid — publisher id is the primary key +{"100001": "333f4d22-7753-4e3b-b0da-0a69b2cdce4f"} +``` -#### override_options.json -Can be any name and is not necessary. This file contains any manual overrides for certain series or chapters that do not conform to a standard format. -Your implementation should sanitise chapter titles to conform to MangaDex's rules. +This file is the canonical tracker list — only mappings in here are uploaded. -_**The bot only accesses the `same`, `custom_language`, `multi_chapters`, and `override_chapter_numbers` fields, all other fields can be named differently.**_ +### `override_options.json` -If you want to include this file, use the structure as follows: +Optional. Used for manual overrides where the source doesn't conform to +MangaDex's chapter format. Your code only needs to use these keys when the +fields apply to your publisher: ```json { - "empty": [], - "noformat": [], - "custom": {"series_id": "regex"}, - "same": {"chapter_to_keep_id": ["other_chapter_id"]}, - "custom_language": {}, - "multi_chapters": {"chapter_id": ["chapter_number"]}, - "override_chapter_numbers": {"chapter_id": "overriden_chapter_number"} + "empty": [], + "noformat": [], + "custom": {"series_id": "regex"}, + "same": {"chapter_to_keep_id": ["other_chapter_id"]}, + "custom_language": {}, + "multi_chapters": {"chapter_id": ["chapter_number"]}, + "override_chapter_numbers": {"chapter_id": "overridden_chapter_number"} } ``` -- `"empty": [],` An array of manga ids for chapters that will never have a title (null). -- `"noformat": [],` For titles that you do not want your titles regex to format. -- `"custom": {},` For series you want to use custom regex for. If not, the dictionary should be empty. -- `"same": {},` Chapters that are the same, but uploaded under different ids. Chapters that are part of the dictionary's values are not uploaded and only the dictionary's keys are. The dictionary should be empty if this field is not applicable. -- `"custom_language": {}` For series that have languages that are not documented or follow your site's language specification. -## Dependencies +| Key | Purpose | +| --- | --- | +| `empty` | Manga IDs whose chapters never have titles (null titles are OK for these). | +| `noformat` | Titles you don't want your regex to rewrite. | +| `custom` | Per-series custom regex for chapter parsing. | +| `same` | Duplicate-chapter aliasing. Only the keys are uploaded; values are treated as the same chapter. | +| `custom_language` | Language remapping for publishers that use non-standard codes. | +| `multi_chapters` | One source chapter that should appear as multiple chapter numbers on MangaDex. | +| `override_chapter_numbers` | Force a specific chapter number on a chapter ID. | -You can use whatever modules you want to, but remember to include a `requirements.txt` in your extension directory. +The runner only reads `same`, `custom_language`, `multi_chapters`, and +`override_chapter_numbers`. Everything else is for the extension's own use. -## Scheduling the extension for running -Add the time to run the extension in the file `/schedule.json`. The `day` key is optional and can be omitted. -The dict should extend to the current file and should follow the format: -``` -: { - "day": , - "hour": <24_hour_clock_int>, - "minute": , +### Scheduling + +Add your extension to `/schedule.json` at the repo root: + +```json +{ + "mangaplus": { + "day": 0, + "hour": 15, + "minute": 5 + } } ``` -The extension's name should be the same as the extension directory name and mainfile. -***This timings defined here ignore the `run_at` method defined in the extension.*** +`day` is optional. When present it is the day-of-week index (Monday=0, +Sunday=6) — `every Tuesday at 15:05` is `{"day": 1, "hour": 15, "minute": 5}`. + +Operators can override the daily timing per-extension at runtime from Discord +(`/schedule set [day]`) — the override lives in +publoader's SQLite state DB and survives restarts. The `schedule.json` shipped +here is the fallback, not the final word. + +The legacy `run_at` method on the extension class is **ignored** when +`schedule.json` defines a timing for that extension. + +### Dependencies + +Use whatever modules you need, but list them in your extension's +`requirements.txt`. publoader installs each extension's `requirements.txt` +on startup (skipping anything already satisfied). + +## The `Extension` class -## Extension main class -The class that is used to read the chapter data from. This class **must** be named `Extension` and your extension will not run if this class is not available. +The class **must** be named `Extension`. The runner instantiates it once per +run with at least an `extension_dirpath: Path` keyword. ```python +from pathlib import Path + class Extension: def __init__(self, extension_dirpath: Path, **kwargs): - pass + ... ``` ---- +### Required attributes + +| Field | Type | Description | +| --- | --- | --- | +| `name` | `str` | Logger / database key. Keep this stable — changing it loses chapter history. | +| `mangadex_group_id` | `str` | UUID of the upload group. | +| `override_options` | `dict` | Parsed `override_options.json`. Use `{}` if not applicable. | +| `extension_languages` | `List[str]` | ISO codes the extension can produce. | +| `tracked_mangadex_ids` | `List[str]` | MangaDex manga IDs the extension covers. | +| `disabled` | `bool` | Skip the extension when `True`. Defaults to `True` if missing — be explicit. | + +### Required methods + +None of these methods take parameters (apart from `self`). + +| Method | Returns | Notes | +| --- | --- | --- | +| `get_updated_chapters()` | `List[Chapter]` | New chapters since last run. | +| `get_all_chapters()` | `List[Chapter]` or `None` | Full per-series chapter set. `None` skips removed-chapter detection. `[]` removes everything for the series. **Implement this if you can — it powers the unavailable-chapter flow.** | +| `get_updated_manga()` | `List[Manga]` | New series the publisher has added but you haven't tracked yet. | +| `run_at()` | `datetime.time` or `datetime.datetime` | Default run time. Overridden by `schedule.json` and DB overrides — kept for backwards compatibility. | +| `clean_at()` | `Optional[List[int]]` | Days to run a clean reconcile. `None` disables; `[]` defaults to Wednesday; `[0, 3]` runs on Mondays and Thursdays. | +| `daily_check_run()` | `bool` | If `True`, runs daily at 01:00 to catch missed uploads. | -### Main class key variables +Wrong return types skip the run. -| Field | Type | Description | -|------------------------|-------------|----------------------------------------------------------------------------------------------------------| -| `name` | `str` | Name used in the database and in the logs. Can contain `-` or `_`. *This name should not be changed.* | -| `mangadex_group_id` | `str` | MangaDex id of the group to upload to. | -| `override_options` | `dict` | Your custom overridden options file after being opened and read. If not used, return an empty dict `{}`. | -| `extension_languages` | `List[str]` | A list of languages supported by the extension. | -| `tracked_mangadex_ids` | `List[str]` | A list of MangaDex manga ids the extension uploads to. | -| `disabled` | `bool` | If the extension is active to run or skipped. *If missing, this will default to True.* | +### Methods that accept parameters ---- +```python +def update_external_data( + self, + posted_chapter_ids: List[str], + fetch_all_chapters: bool, + **kwargs, +) -> None: + ... +``` + +`posted_chapter_ids` is the set of chapters already on MangaDex from previous +runs. `fetch_all_chapters` is `True` during a clean reconcile. **`**kwargs` is +required** — publoader may pass more keyword arguments as the API grows. -### Main class key methods -#### None of the following methods called by the bot should accept parameters. +### Unavailable chapters (since publoader 1.0) -- `get_updated_chapters(self) -> List[Chapter]` Returns a list of newly released chapters. -- `get_all_chapters(self) -> List[Chapter]` Returns all the chapters available for a series, uploaded or not uploaded. ***Must be provided if possible. Returning None will skip checking if chapters have been removed, an empty list will remove the chapters for that series.*** -- `get_updated_manga(self) -> List[Manga]` Returns a list of untracked newly added series. -- `run_at(self) -> datetime.time` A datetime or time object of when you want the extension to be run. If this is a datetime object, the extension will only be run on the day specified (year and month are ignored). If this is a time object, the extension will be run daily. Having the minute parameter set as anything other than zero will not run the extension. -- `clean_at(self) -> Optional[List[int]]` The days you want to run the extension as if it is a fresh run. This allows the bot to check for duplicate chapters, chapters not uploaded and chapters needing to be deleted. Allowed values: `None` to disable this, `[]` for the default day (wednesday), an int value in the range 0-6 (inclusive) for the day of the week, e.g. `[0, 3]` for mondays and thursdays. -- `daily_check_run(self) -> bool` If you want the bot to run daily at 1am to catch any chapters that may have not been uploaded. +When a chapter that publoader previously uploaded is no longer in your +`get_all_chapters()` return value, publoader does **not** delete it. It strips +the chapter's `externalUrl` on MangaDex (so the publisher link goes away) and +leaves the in-page info card that was uploaded at first commit. The DB row +moves to the `to_unavailable` collection. Duplicate cleanups still hard-delete. -***If the chapter and manga methods do not return the correct type, the extension run will be skipped.*** +You don't need to do anything new — just keep returning the current set of +on-source chapters from `get_all_chapters()`. The runner handles the rest. -#### The following methods should accept the parameters specified. Your implementation of the parameters is to your discretion. +## `Chapter` and `Manga` -- `update_external_data(self, posted_chapter_ids: List[str], fetch_all_chapters: bool, **kwargs) -> None` Provides data to use before starting the fetch of chapters. `posted_chapter_ids` provides the ids of chapters already uploaded. `fetch_all_chapters` is `True` if the bot is going through the clean cycle. *****kwargs needs to be implemented.*** +Import from the stable public API surface: +```python +from publoader.api import Chapter, Manga +``` -The list of chapters returned must be of the `Chapter` class. The chapter class is provided in the package `publoader.models.dataclasses`. -The chapter class contains the following fields: +Older imports (`from publoader.models.dataclasses import Chapter, Manga`) still +work but `publoader.api` is the one we'll keep guaranteeing across versions. +`publoader.api.__api_version__` tells you what surface you're getting. -Fields with `Optional[]` can be left as null, fields without must be populated. +### `Chapter` fields -- `chapter_timestamp: datetime.datetime`. Datetime object of when the chapter was published. This is updated to be timezone-aware. -- `chapter_expire: Optional[datetime.datetime]`. Datetime object of when the chapter expires, if the chapter does not expire, this can be null. This is updated to be timezone-aware. -- `chapter_title: Optional[str]`. Chapter title. -- `chapter_number: Optional[str]`. Chapter number, must follow the MangaDex chapter number regex. -- `chapter_language: str`. ISO-639-2 code. -- `chapter_volume: Optional[str]`. Chapter volume. If the series uses seasons, use this field. Keep empty if the chapter does not have a volume. -- `chapter_id: str`. Chapter id. -- `chapter_url: str`. Chapter link. -- `manga_id: str`. The publisher's series id. -- `md_manga_id: str`. The MangaDex manga id to upload the chapter to. -- `manga_name: str`. The series name. -- `manga_url: str`. The series link. +`Optional[...]` fields can be `None`. The rest are required. ---- +| Field | Type | Meaning | +| --- | --- | --- | +| `chapter_timestamp` | `datetime.datetime` | Publish time. Will be made tz-aware if naive. | +| `chapter_expire` | `Optional[datetime.datetime]` | Expiry time. Tz-aware. | +| `chapter_title` | `Optional[str]` | | +| `chapter_number` | `Optional[str]` | Must match the MangaDex chapter-number regex (see below). | +| `chapter_language` | `str` | ISO-639-2 code. | +| `chapter_volume` | `Optional[str]` | Use this for seasons. | +| `chapter_id` | `str` | Publisher's chapter id. | +| `chapter_url` | `str` | Public chapter link. | +| `manga_id` | `str` | Publisher's series id. | +| `md_manga_id` | `str` | MangaDex manga UUID. | +| `manga_name` | `str` | Series name. | +| `manga_url` | `str` | Series link. | -### Extension module key variables +## Module-level requirements -`__version__` must be provided to track the extension's version. +`__version__` must be defined at module level so the runner can include it in +logs. -**The logger must be used.** Use the `setup_logs` function to set up your logger. +The logger must be set up using `setup_extension_logs`: ```python -from publoader.utils.logs import setup_extension_logs +from publoader.api import setup_extension_logs setup_extension_logs( - logger_name="extension_name", - logger_filename="extension_name", + logger_name="", + logger_filename="", ) ``` ---- - -### Functions provided for use +### Helpers provided ```python -from publoader.utils.utils import open_manga_id_map, open_title_regex - -manga_id_map = open_manga_id_map(file_path: Path) -override_options = open_title_regex(file_path: Path) +from publoader.api import ( + open_manga_id_map, + open_title_regex, + find_key_from_list_value, + chapter_number_regex, + create_new_event_loop, + PubloaderWebhook, +) ``` -```python -from publoader.utils.misc import find_key_from_list_value +| Symbol | Purpose | +| --- | --- | +| `open_manga_id_map(path)` | Read your `manga_id_map.json`. | +| `open_title_regex(path)` | Read your `override_options.json`. | +| `find_key_from_list_value(d, value)` | Reverse lookup: returns the dict key whose list value contains `value`. | +| `chapter_number_regex` | Pre-compiled MangaDex chapter-number pattern. `chapter_number_regex.match("12.5")`. | +| `create_new_event_loop()` | Convenience for extensions that need a dedicated asyncio loop. | +| `PubloaderWebhook` | Push extension-side notifications through the configured webhooks. | -dictionary_key = find_key_from_list_value(dict_to_search: Dict[str, List[str]], list_element: str) -``` -This function returns the dictionary key after lookup in the dictionary values' arrays. +## AST safety scan -### Variables provided for use +Extensions are loaded with a static AST check that rejects modules using +`eval`, `exec`, `compile`, `__import__`, `subprocess`, `ctypes`, and a few +other footguns. The scan is **not** a sandbox — operators still have to trust +this repo — but it catches obvious mistakes and accidental imports. -```python -from publoader.utils.utils import chapter_number_regex +If your extension genuinely needs a banned construct, open an issue first. + +## Running CI locally -chapter_number_regex.match("string") +```bash +python tools/validate_manifests.py # schema-checks every src/*/manifest.json +python tools/smoke_load.py # imports each entrypoint under publoader.api stubs ``` -provides the pattern used by MangaDex to validate the chapter number. ---- +Both are gated on `.github/workflows/extension-tests.yml`. + +## Submitting + +Open a PR against `master`. Format with [Black](https://pypi.org/project/black/) +using defaults. Your extension must: + +1. Have a valid `manifest.json` (CI enforces this). +2. Smoke-import cleanly (CI enforces this). +3. Run successfully against your publisher before merge (operator-verified). -# Submitting your extension -Open a PR from your repo to the Publoader master branch with your extension. Format the code using the [Black](https://pypi.org/project/black/) formatter with the default args. You must ensure your extension works, as erroneous extensions will be skipped. +Erroneous extensions are skipped at runtime, not rejected outright — but please +don't ship anything you haven't run yourself. diff --git a/src/mangaplus/manifest.json b/src/mangaplus/manifest.json new file mode 100644 index 00000000..e00ed8c9 --- /dev/null +++ b/src/mangaplus/manifest.json @@ -0,0 +1,30 @@ +{ + "name": "mangaplus", + "version": "0.2.04", + "publoader_api": "^1.0.0", + "entrypoint": "mangaplus.py", + "class_name": "Extension", + "mangadex_group_id": "4f1de6a2-f0c5-4ac5-bce5-02c7dbb67deb", + "languages": ["en", "es", "fr", "id", "pt-br", "ru", "th", "de", "vi"], + "allowed_hosts": [ + "jumpg-webapi.tokyo-cdn.com", + "mangaplus.shueisha.co.jp" + ], + "permissions": { + "network": true, + "filesystem_read": ["manga_id_map.json", "override_options.json"], + "filesystem_write": [], + "subprocess": false + }, + "schedule": { + "hour": 15, + "minute": 5, + "timezone": "UTC" + }, + "data_files": { + "manga_id_map": "manga_id_map.json", + "override_options": "override_options.json" + }, + "maintainers": ["publoader"], + "homepage": "https://mangaplus.shueisha.co.jp/" +} diff --git a/sync_extensions.py b/sync_extensions.py index 4fe21ea0..615267c7 100644 --- a/sync_extensions.py +++ b/sync_extensions.py @@ -1,18 +1,121 @@ +"""Sync extension source trees into the shared runtime volume. + +Only `src//` subtrees are copied. The Dockerfile, LICENSE, README, +.github, sync_extensions.py itself, .git, and top-level configs are excluded +so the runtime volume contains exactly what the base loader expects. + +Destination layout: + //.py + //manifest.json + //manga_id_map.json + //... + /schedule.json (copied from repo root) +""" +from __future__ import annotations + +import json +import logging import os import shutil +import sys +import tempfile +from pathlib import Path + +SOURCE_ROOT = Path(os.environ.get("PUBLOADER_SOURCE", "/extensions")) +SOURCE_SRC = SOURCE_ROOT / "src" +TARGET_DIR = Path( + os.environ.get("PUBLOADER_TARGET", "/shared/publoader/extensions") +) +SCHEDULE_FILE = SOURCE_ROOT / "schedule.json" + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s sync_extensions: %(message)s", +) +log = logging.getLogger("sync_extensions") + + +def _is_valid_extension_name(name: str) -> bool: + return bool(name) and all(c.islower() or c.isdigit() or c == "_" for c in name) + + +def _atomic_replace_tree(src: Path, dst: Path) -> None: + """Replace `dst` with `src` atomically via rename (same filesystem).""" + dst.parent.mkdir(parents=True, exist_ok=True) + staging = Path(tempfile.mkdtemp(prefix=f".{dst.name}.", dir=dst.parent)) + try: + shutil.copytree(src, staging / dst.name, dirs_exist_ok=False) + backup = None + if dst.exists(): + backup = dst.with_suffix(dst.suffix + ".old") + if backup.exists(): + shutil.rmtree(backup) + dst.rename(backup) + (staging / dst.name).rename(dst) + if backup is not None: + shutil.rmtree(backup, ignore_errors=True) + finally: + shutil.rmtree(staging, ignore_errors=True) + + +def _validate_extension(ext_dir: Path) -> bool: + name = ext_dir.name + if not _is_valid_extension_name(name): + log.error("skip %s: invalid extension name", name) + return False + if not (ext_dir / f"{name}.py").is_file(): + log.error("skip %s: missing %s.py", name, name) + return False + manifest_path = ext_dir / "manifest.json" + if not manifest_path.is_file(): + log.error("skip %s: missing manifest.json", name) + return False + try: + manifest = json.loads(manifest_path.read_text()) + except (OSError, ValueError) as exc: + log.error("skip %s: manifest.json invalid (%s)", name, exc) + return False + if manifest.get("name") != name: + log.error( + "skip %s: manifest.name=%r doesn't match directory", + name, + manifest.get("name"), + ) + return False + return True + + +def main() -> int: + if not SOURCE_SRC.is_dir(): + log.error("source missing: %s", SOURCE_SRC) + return 2 + + TARGET_DIR.mkdir(parents=True, exist_ok=True) + synced: list = [] + skipped: list = [] -SOURCE_DIR = "/extensions" -TARGET_DIR = "/shared/publoader/extensions" + for child in sorted(SOURCE_SRC.iterdir()): + if not child.is_dir() or child.name.startswith((".", "__")): + continue + if not _validate_extension(child): + skipped.append(child.name) + continue + try: + _atomic_replace_tree(child, TARGET_DIR / child.name) + synced.append(child.name) + except OSError as exc: + log.exception("failed syncing %s: %s", child.name, exc) + skipped.append(child.name) -os.makedirs(TARGET_DIR, exist_ok=True) + if SCHEDULE_FILE.is_file(): + try: + shutil.copy2(SCHEDULE_FILE, TARGET_DIR / SCHEDULE_FILE.name) + except OSError as exc: + log.exception("failed copying schedule.json: %s", exc) -for item in os.listdir(SOURCE_DIR): - source_path = os.path.join(SOURCE_DIR, item) - target_path = os.path.join(TARGET_DIR, item) + log.info("synced=%s skipped=%s target=%s", synced, skipped, TARGET_DIR) + return 0 if not skipped else 1 - if os.path.isdir(source_path): - shutil.copytree(source_path, target_path, dirs_exist_ok=True) - else: - shutil.copy2(source_path, target_path) -print(f"Extensions synced to {TARGET_DIR}") +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/smoke_load.py b/tools/smoke_load.py new file mode 100644 index 00000000..80156a21 --- /dev/null +++ b/tools/smoke_load.py @@ -0,0 +1,128 @@ +"""Smoke-load every extension under ./src. + +For each extension, this: + - imports `.py` via importlib (no network, no DB) + - instantiates Extension(extension_dirpath=) + - reads the eagerly-required attributes the base loader will fetch + - calls the no-arg lifecycle methods (run_at, clean_at, daily_check_run) + - validates manifest.json against the extension's runtime values + +Used by CI to catch contract drift before merge. +""" +from __future__ import annotations + +import importlib.util +import json +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parent.parent +SRC = ROOT / "src" + +REQUIRED_ATTRS = ( + "name", + "mangadex_group_id", + "override_options", + "extension_languages", + "tracked_mangadex_ids", + "disabled", +) +REQUIRED_METHODS = ( + "get_updated_chapters", + "get_all_chapters", + "get_updated_manga", + "run_at", + "clean_at", + "daily_check_run", +) + + +def smoke_one(ext_dir: Path) -> list: + """Return a list of failure messages (empty list ⇒ pass).""" + name = ext_dir.name + entry = ext_dir / f"{name}.py" + if not entry.is_file(): + return [f"missing entrypoint {entry}"] + + spec = importlib.util.spec_from_file_location(name, entry) + mod = importlib.util.module_from_spec(spec) + failures: list = [] + try: + spec.loader.exec_module(mod) + except Exception as e: + return [f"failed to import {entry.name}: {e!r}"] + + if not hasattr(mod, "Extension"): + return [f"{entry.name} has no Extension class"] + + try: + ext = mod.Extension(extension_dirpath=ext_dir) + except Exception as e: + return [f"Extension(extension_dirpath=...) raised: {e!r}"] + + for attr in REQUIRED_ATTRS: + try: + getattr(ext, attr) + except Exception as e: + failures.append(f"missing attribute {attr!r}: {e!r}") + + for meth in REQUIRED_METHODS: + m = getattr(ext, meth, None) + if not callable(m): + failures.append(f"missing or non-callable method {meth!r}") + continue + + # No-arg lifecycle methods shouldn't hit the network + for meth in ("run_at", "clean_at", "daily_check_run"): + m = getattr(ext, meth, None) + if callable(m): + try: + m() + except Exception as e: + failures.append(f"{meth}() raised: {e!r}") + + manifest_path = ext_dir / "manifest.json" + if not manifest_path.is_file(): + failures.append("manifest.json missing") + else: + try: + manifest = json.loads(manifest_path.read_text()) + except (OSError, ValueError) as e: + failures.append(f"manifest.json unreadable: {e}") + else: + if manifest.get("name") != name: + failures.append( + f"manifest.name={manifest.get('name')!r} != dir name {name!r}" + ) + mid = manifest.get("mangadex_group_id") + if mid and getattr(ext, "mangadex_group_id", None) != mid: + failures.append( + "manifest.mangadex_group_id doesn't match Extension.mangadex_group_id" + ) + + return failures + + +def main() -> int: + if not SRC.is_dir(): + print("no src/ directory; nothing to test", file=sys.stderr) + return 2 + + overall_ok = True + for ext_dir in sorted(SRC.iterdir()): + if not ext_dir.is_dir() or ext_dir.name.startswith((".", "__")): + continue + failures = smoke_one(ext_dir) + if failures: + overall_ok = False + print(f"FAIL {ext_dir.name}") + for f in failures: + print(f" - {f}") + else: + print(f"OK {ext_dir.name}") + + return 0 if overall_ok else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/validate_manifests.py b/tools/validate_manifests.py new file mode 100644 index 00000000..dc829814 --- /dev/null +++ b/tools/validate_manifests.py @@ -0,0 +1,100 @@ +"""Validate every src//manifest.json against the required shape.""" +from __future__ import annotations + +import json +import re +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parent.parent +SRC = ROOT / "src" + +REQUIRED_FIELDS = { + "name": str, + "version": str, + "publoader_api": str, + "entrypoint": str, + "class_name": str, + "mangadex_group_id": str, + "languages": list, + "allowed_hosts": list, + "permissions": dict, +} + +_UUID = re.compile( + r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", + re.IGNORECASE, +) +_EXT_NAME = re.compile(r"^[a-z0-9_]+$") + + +def validate(ext_dir: Path) -> list: + name = ext_dir.name + path = ext_dir / "manifest.json" + if not path.is_file(): + return [f"{name}: missing manifest.json"] + try: + data = json.loads(path.read_text()) + except (OSError, ValueError) as e: + return [f"{name}: unreadable ({e})"] + + failures: list = [] + if not isinstance(data, dict): + return [f"{name}: top-level must be an object"] + + for field, want_type in REQUIRED_FIELDS.items(): + if field not in data: + failures.append(f"{name}: missing field {field!r}") + elif not isinstance(data[field], want_type): + failures.append( + f"{name}: field {field!r} expected {want_type.__name__}, got " + f"{type(data[field]).__name__}" + ) + + if data.get("name") != name: + failures.append( + f"{name}: manifest.name={data.get('name')!r} doesn't match dir" + ) + if not _EXT_NAME.match(name): + failures.append(f"{name}: dir name isn't lower_snake_case") + if "mangadex_group_id" in data and not _UUID.match(str(data["mangadex_group_id"])): + failures.append(f"{name}: mangadex_group_id isn't a UUID") + if "languages" in data: + for lang in data["languages"]: + if not isinstance(lang, str): + failures.append(f"{name}: languages entry {lang!r} isn't a string") + if "allowed_hosts" in data: + for host in data["allowed_hosts"]: + if not isinstance(host, str) or "/" in host: + failures.append(f"{name}: allowed_hosts entry {host!r} invalid") + perms = data.get("permissions") + if isinstance(perms, dict): + for key in ("network", "subprocess"): + if key in perms and not isinstance(perms[key], bool): + failures.append(f"{name}: permissions.{key} must be bool") + for key in ("filesystem_read", "filesystem_write"): + if key in perms and not isinstance(perms[key], list): + failures.append(f"{name}: permissions.{key} must be a list") + return failures + + +def main() -> int: + if not SRC.is_dir(): + print("no src/ directory", file=sys.stderr) + return 2 + overall_ok = True + for ext_dir in sorted(SRC.iterdir()): + if not ext_dir.is_dir() or ext_dir.name.startswith((".", "__")): + continue + failures = validate(ext_dir) + if failures: + overall_ok = False + for f in failures: + print(f"FAIL {f}") + else: + print(f"OK {ext_dir.name}/manifest.json") + return 0 if overall_ok else 1 + + +if __name__ == "__main__": + sys.exit(main()) From 6ec0eaaebc7735df2b4e77f0e0beafecba65b6b2 Mon Sep 17 00:00:00 2001 From: ArdaxHz <70710586+ArdaxHz@users.noreply.github.com> Date: Thu, 28 May 2026 15:25:11 +0100 Subject: [PATCH 2/3] fix mangaplus annotations import --- src/mangaplus/mangaplus.py | 65 +++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 19 deletions(-) diff --git a/src/mangaplus/mangaplus.py b/src/mangaplus/mangaplus.py index e36190e1..9f6f0768 100644 --- a/src/mangaplus/mangaplus.py +++ b/src/mangaplus/mangaplus.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import asyncio import logging import re @@ -7,36 +9,54 @@ from copy import deepcopy from datetime import datetime, time, timezone, timedelta from pathlib import Path -from typing import List, Optional, Union +from typing import TYPE_CHECKING, List, Optional, Union import aiohttp import itertools import math import requests -from publoader.models.dataclasses import Chapter, Manga -from publoader.utils.logs import setup_extension_logs -from publoader.utils.misc import create_new_event_loop, find_key_from_list_value -from publoader.utils.utils import ( - chapter_number_regex, - open_manga_id_map, - open_title_regex, -) -from publoader.webhook import PubloaderWebhook + +if TYPE_CHECKING: + from publoader.models.dataclasses import Chapter, Manga DEFAULT_TIMESTAMP = 1 __version__ = "0.2.04" -setup_extension_logs( - logger_name="mangaplus", - logger_filename="mangaplus", -) - logger = logging.getLogger("mangaplus") +def _load_publoader_api() -> None: + """Bind publoader.* symbols to module globals. + + Deferred so smoke tests (which don't install publoader) can import this + module without resolving the runtime deps. Called from the entrypoint + that actually does work. + """ + global Chapter, Manga, create_new_event_loop, find_key_from_list_value + global chapter_number_regex, open_manga_id_map, open_title_regex, PubloaderWebhook + from publoader.models.dataclasses import Chapter, Manga + from publoader.utils.misc import create_new_event_loop, find_key_from_list_value + from publoader.utils.utils import ( + chapter_number_regex, + open_manga_id_map, + open_title_regex, + ) + from publoader.webhook import PubloaderWebhook + + class Extension: def __init__(self, extension_dirpath: Path, **kwargs): + try: + from publoader.utils.logs import setup_extension_logs + except ModuleNotFoundError: + pass + else: + setup_extension_logs( + logger_name="mangaplus", + logger_filename="mangaplus", + ) + self.name = "mangaplus" self.mangadex_group_id = "4f1de6a2-f0c5-4ac5-bce5-02c7dbb67deb" self.manga_id_map_filename = "manga_id_map.json" @@ -44,10 +64,16 @@ def __init__(self, extension_dirpath: Path, **kwargs): self.extension_dirpath = extension_dirpath self.fetch_all_chapters = False - self._posted_chapters_ids = [] - self._updated_chapters: List[Chapter] = [] - self._all_mplus_chapters: List[Chapter] = [] - self._untracked_manga: List[Manga] = [] + self._posted_chapters_ids: List[str] = [] + self._updated_chapters: list = [] + self._all_mplus_chapters: list = [] + self._untracked_manga: list = [] + self.override_options: dict = {} + self.tracked_mangadex_ids: list = [] + self.tracked_manga: list = [] + self.manga_no_chapters: list = [] + self._manga_id_map: dict = {} + self._num2words: Optional[str] = None self._mplus_base_api_url = "https://jumpg-webapi.tokyo-cdn.com/api/" self._chapter_url_format = "https://mangaplus.shueisha.co.jp/viewer/{}" self._manga_url_format = "https://mangaplus.shueisha.co.jp/titles/{}" @@ -87,6 +113,7 @@ def get_updated_manga(self) -> List[Manga]: def update_external_data( self, posted_chapter_ids: List[str], fetch_all_chapters: bool, **kwargs ) -> None: + _load_publoader_api() self._posted_chapters_ids = posted_chapter_ids self.fetch_all_chapters = fetch_all_chapters From 8750bf355a6ba6efc5c43a5bb7e726a54644d97f Mon Sep 17 00:00:00 2001 From: ArdaxHz <70710586+ArdaxHz@users.noreply.github.com> Date: Thu, 28 May 2026 16:32:11 +0100 Subject: [PATCH 3/3] fix CI import failing --- .github/workflows/extension-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/extension-tests.yml b/.github/workflows/extension-tests.yml index abc3d264..ab2ede01 100644 --- a/.github/workflows/extension-tests.yml +++ b/.github/workflows/extension-tests.yml @@ -32,7 +32,7 @@ jobs: # neither of which import publoader.* at module level by the time # the new publoader.api shim has landed. find ./src -mindepth 2 -maxdepth 2 -name requirements.txt \ - -exec pip install --no-cache-dir -r {} + + -exec pip install --no-cache-dir -r {} \; - name: Validate manifests run: python tools/validate_manifests.py