diff --git a/core/errors.py b/core/errors.py index 32192cc..13c75a0 100644 --- a/core/errors.py +++ b/core/errors.py @@ -140,7 +140,8 @@ def classify_failure(exc: BaseException) -> CollectorFailureCategory: ``requests.HTTPError`` with ``response.status_code`` 429 maps to :attr:`CollectorFailureCategory.RATE_LIMIT`; 401 and 403 map to - :attr:`CollectorFailureCategory.AUTH`. Other cases stay conservative. + :attr:`CollectorFailureCategory.AUTH`. ``discord.errors.HTTPException`` + with ``status`` is classified similarly when discord.py is in use. """ # Django / app try: @@ -191,6 +192,24 @@ def classify_failure(exc: BaseException) -> CollectorFailureCategory: if "HTTPStatus" in exc_name or "Transport" in exc_name or "Connect" in exc_name: return CollectorFailureCategory.NETWORK + # discord.py (optional dependency): HTTPException and subclasses expose ``status``. + if exc_mod.startswith("discord"): + status = getattr(exc, "status", None) + if isinstance(status, int): + if status == 429: + return CollectorFailureCategory.RATE_LIMIT + if status in (401, 403): + return CollectorFailureCategory.AUTH + if 500 <= status < 600: + return CollectorFailureCategory.NETWORK + if 400 <= status < 500: + return CollectorFailureCategory.UNKNOWN + return CollectorFailureCategory.NETWORK + if exc_name == "HTTPException": + return CollectorFailureCategory.NETWORK + if exc_name in ("LoginFailure", "PrivilegedIntentsRequired", "ClientException"): + return CollectorFailureCategory.AUTH + if isinstance(exc, OSError): return _classify_os_error(exc) diff --git a/discord_activity_tracker/__init__.py b/discord_activity_tracker/__init__.py index e69de29..d21746b 100644 --- a/discord_activity_tracker/__init__.py +++ b/discord_activity_tracker/__init__.py @@ -0,0 +1,9 @@ +"""Discord Activity Tracker Django app. + +Persists Discord guild, channel, message, and reaction data for analytics, Markdown +context export, and Pinecone indexing. All writes to app models go through +``discord_activity_tracker.services``. Ingestion is driven by management commands and +sync helpers (DiscordChatExporter and optional discord.py paths). + +App config: ``discord_activity_tracker.apps.DiscordActivityTrackerConfig``. +""" diff --git a/discord_activity_tracker/management/__init__.py b/discord_activity_tracker/management/__init__.py index e69de29..12d73c6 100644 --- a/discord_activity_tracker/management/__init__.py +++ b/discord_activity_tracker/management/__init__.py @@ -0,0 +1 @@ +"""Django ``management`` package for ``discord_activity_tracker`` (``manage.py`` commands).""" diff --git a/discord_activity_tracker/management/commands/__init__.py b/discord_activity_tracker/management/commands/__init__.py index e69de29..3ee9b00 100644 --- a/discord_activity_tracker/management/commands/__init__.py +++ b/discord_activity_tracker/management/commands/__init__.py @@ -0,0 +1 @@ +"""Management commands: ``run_discord_activity_tracker``, ``backfill_discord_activity_tracker``.""" diff --git a/discord_activity_tracker/management/commands/backfill_discord_activity_tracker.py b/discord_activity_tracker/management/commands/backfill_discord_activity_tracker.py index 5535ce4..5a4000d 100644 --- a/discord_activity_tracker/management/commands/backfill_discord_activity_tracker.py +++ b/discord_activity_tracker/management/commands/backfill_discord_activity_tracker.py @@ -1,10 +1,26 @@ -"""Import pre-exported Discord JSON from disk → DB → optional Pinecone sync. +"""Django management command ``backfill_discord_activity_tracker``. -Reads DiscordChatExporter JSON files under -``workspace/discord_activity_tracker/Discussion - c-cpp-discussion/`` -(recursively, including nested subfolders), -persists messages to the database, then deletes each file after a successful import -so it is not processed again. +Imports **pre-exported** DiscordChatExporter JSON from the workspace drop folder +(``workspace/discord_activity_tracker/Discussion - c-cpp-discussion/``, +recursively), validates envelope and normalized messages, upserts into the database +via the service layer, then **deletes** each file after a successful import so it is +not processed again. + +This command does **not** invoke DiscordChatExporter; place JSON exports in the drop +folder manually or from another host. + +Optional arguments: ``--dry-run`` (list files only), ``--skip-pinecone`` / +``--ignore-pinecone`` (skip ``task_discord_pinecone_sync`` after import). See +``Command.add_arguments`` and ``docs/service_api/discord_activity_tracker.md``. + +Side effects: DB writes to ``DiscordServer``, ``DiscordChannel``, ``DiscordMessage``, +``DiscordReaction``, and ``DiscordProfile`` (via services); filesystem deletes on +success; Pinecone sync when enabled. + +Raises: + Per-file parse/validation failures are caught inside ``DiscordBackfillCollector.run`` + (logged and reported on stdout); they do not abort the whole command. Uncaught + exceptions from ``sync_pinecone`` or the base command layer may still propagate. """ from __future__ import annotations @@ -48,7 +64,18 @@ def _json_display_path(import_dir: Path, json_path: Path) -> str: class DiscordBackfillCollector(CollectorBase): - """Import DiscordChatExporter JSON files from the c-cpp-discussion drop folder.""" + """Backfill collector: scan drop folder, import each JSON, delete on success. + + ``run()`` lists JSON under ``get_cpp_discussion_import_dir()``, optionally + dry-run prints paths, else for each file parses, validates staging schema, + upserts messages in batches, unlinks the file on success, or logs failure and + keeps the file. + + ``sync_pinecone()`` runs after a successful collector run (unless dry-run or + ``skip_pinecone``). + + Side effects: Same as module docstring (DB, deletes, optional Pinecone). + """ def __init__(self, *, stdout, style, **opts: Any) -> None: self.stdout = stdout @@ -152,6 +179,28 @@ def sync_pinecone(self) -> None: class Command(BaseCollectorCommand): + """``manage.py backfill_discord_activity_tracker`` — import JSON from the drop folder. + + Uses ``DiscordBackfillCollector``. Required layout: JSON files under + ``{WORKSPACE_DIR}/discord_activity_tracker/Discussion - c-cpp-discussion/``. + + Optional arguments: ``--dry-run``, ``--skip-pinecone`` / ``--ignore-pinecone``. + + Examples: + ``python manage.py backfill_discord_activity_tracker`` + + ``python manage.py backfill_discord_activity_tracker --dry-run`` + + ``python manage.py backfill_discord_activity_tracker --skip-pinecone`` + + Raises: + Per-file errors are swallowed in the collector loop; see class docstring. + Base command / Pinecone task may raise if misconfigured. + + See Also: + ``docs/service_api/discord_activity_tracker.md`` + """ + help = ( "Import DiscordChatExporter JSON from " "workspace/discord_activity_tracker/Discussion - c-cpp-discussion/ " diff --git a/discord_activity_tracker/management/commands/run_discord_activity_tracker.py b/discord_activity_tracker/management/commands/run_discord_activity_tracker.py index 1871f3a..122d47c 100644 --- a/discord_activity_tracker/management/commands/run_discord_activity_tracker.py +++ b/discord_activity_tracker/management/commands/run_discord_activity_tracker.py @@ -1,12 +1,34 @@ -""" -Management command: run_discord_activity_tracker - -Runs several tasks in order: - 1. Ensure raw workspace layout - 2. Fetch Discord messages (DiscordChatExporter) → DB → archive JSON under - WORKSPACE_DIR/raw/discord_activity_tracker/// - 3. Export DB messages as Markdown to DISCORD_CONTEXT_REPO_PATH (optional git push) - 4. Upsert Discord messages to Pinecone (run_cppa_pinecone_sync) +"""Django management command ``run_discord_activity_tracker``. + +Orchestrates the scheduled Discord ingest pipeline: workspace prep, optional +DiscordChatExporter fetch with DB upsert and raw JSON archival, Markdown export to +``DISCORD_CONTEXT_REPO_PATH``, and optional Pinecone sync via ``run_cppa_pinecone_sync``. + +Phases (see ``DiscordActivityCollector`` and task helpers in this module): + + 1. **Workspace** — Ensure raw/staging dirs under ``WORKSPACE_DIR`` (see + ``discord_activity_tracker.workspace``). + 2. **Sync** — Run DiscordChatExporter (unless ``--skip-discord-sync``), parse JSON, + validate staging schema, upsert via ``discord_activity_tracker.services``, + move exports under + ``{WORKSPACE_DIR}/raw/discord_activity_tracker///``. + 3. **Markdown** — Export DB rows to the context repo (unless ``--skip-markdown-export``); + optional git push when ``DISCORD_CONTEXT_AUTO_COMMIT`` is true and + ``--skip-remote-push`` is not set. + 4. **Pinecone** — ``task_discord_pinecone_sync`` when ``PINECONE_DISCORD_*`` are set + and ``--skip-pinecone`` is not used. + +Required settings for a full sync: ``DISCORD_USER_TOKEN``, ``DISCORD_SERVER_ID``. +Channel scope uses ``DISCORD_CHANNEL_IDS`` unless overridden by ``--channels``. + +CLI flags are documented on ``Command.add_argument`` ``help=`` strings and in +``docs/service_api/discord_activity_tracker.md``. + +Raises: + django.core.management.base.CommandError: Missing token/guild, invalid + ``--since``/``--until`` parse, or DiscordChatExporter failure (wrapped from + ``DiscordChatExporterError``). Other exceptions from the collector may propagate + after logging from ``_handle_core``. """ from __future__ import annotations @@ -312,7 +334,18 @@ def task_markdown_export_and_push( class DiscordActivityCollector(CollectorBase): - """Discord sync + Markdown + Pinecone; ``sync_pinecone`` runs ``run_cppa_pinecone_sync``.""" + """Collector implementation for ``run_discord_activity_tracker``. + + Holds stdout/style, resolved ``channel_ids`` (from ``--channels`` or + ``settings.DISCORD_CHANNEL_IDS``), and delegates to ``Command._handle_core``. + + ``run()`` drives fetch → Markdown → Pinecone according to options. + ``sync_pinecone()`` runs ``task_discord_pinecone_sync`` when not dry-run and not + skipping Pinecone. + + Side effects: Same as the management command (DB, filesystem, subprocess calls + to DiscordChatExporter and Pinecone tooling via configured runners). + """ def __init__(self, cmd: "Command", options: dict) -> None: self.cmd = cmd @@ -374,7 +407,35 @@ async def _persist_channel( class Command(BaseCollectorCommand): - """Discord activity tracker: fetch → DB → raw JSON; Markdown export; Pinecone upsert.""" + """``manage.py run_discord_activity_tracker`` — incremental Discord ingest and exports. + + Wraps ``DiscordActivityCollector`` with ``BaseCollectorCommand`` (dry-run, logging, + collector phases). See module docstring for phases and required settings. + + Optional arguments (full text on each ``add_argument``): + + ``--dry-run``, ``--skip-discord-sync``, ``--skip-markdown-export``, + ``--skip-remote-push``, ``--skip-pinecone`` / ``--ignore-pinecone``, + ``--since`` / ``--until`` (and aliases), ``--channels``, ``--task`` (deprecated). + + Examples: + ``python manage.py run_discord_activity_tracker`` — full pipeline with + settings-based channel allowlist. + + ``python manage.py run_discord_activity_tracker --dry-run`` — log planned + steps only. + + ``python manage.py run_discord_activity_tracker --channels 123,456 --skip-pinecone`` — + restrict channels and skip Pinecone. + + Raises: + CommandError: If ``DISCORD_USER_TOKEN`` or ``DISCORD_SERVER_ID`` is unset, or + date options fail to parse, or DiscordChatExporter fails (see ``task_discord_sync``). + + See Also: + ``docs/service_api/discord_activity_tracker.md`` + ``docs/operations/discord_chat_exporter.md`` + """ help = ( "Discord activity tracker: (1) fetch via DiscordChatExporter + DB + raw archive; " diff --git a/discord_activity_tracker/models.py b/discord_activity_tracker/models.py index f1f0504..fc9cd72 100644 --- a/discord_activity_tracker/models.py +++ b/discord_activity_tracker/models.py @@ -4,7 +4,16 @@ class DiscordServer(models.Model): - """Discord server/guild.""" + """Persisted Discord guild (server) metadata synced from export or API pipelines. + + One row per Discord guild snowflake ``server_id``. Holds display ``server_name`` + and optional ``icon_url`` for UI or audit. Timestamps ``created_at`` / + ``updated_at`` track row lifecycle. + + Relationships: + Reverse ``channels``: ``DiscordChannel`` rows with FK to this server + (``related_name="channels"`` on ``DiscordChannel``). + """ server_id = models.BigIntegerField(unique=True, db_index=True) server_name = models.CharField(max_length=255, db_index=True) @@ -20,7 +29,17 @@ def __str__(self): class DiscordChannel(models.Model): - """Discord channel within a server.""" + """A channel (text thread, category child, etc.) belonging to one ``DiscordServer``. + + Key fields: ``channel_id`` (Discord snowflake, globally unique), ``channel_name``, + ``channel_type`` (e.g. exporter string), ``topic``, ``position``, and optional + ``category_id`` / ``category_name`` for grouping in the guild tree. + + Relationships: + ``server``: FK to ``DiscordServer`` (column ``server_id``). + Reverse ``messages``: ``DiscordMessage`` rows for this channel + (``related_name="messages"`` on ``DiscordMessage``). + """ server = models.ForeignKey( DiscordServer, @@ -50,7 +69,23 @@ def __str__(self): class DiscordMessage(models.Model): - """Discord message in a channel.""" + """A single Discord message stored for search, export, and Pinecone preprocessing. + + Key fields: ``message_id`` (snowflake, unique), ``content``, ``message_type`` + (e.g. ``Default``, ``Reply``), ``is_pinned``, ``message_created_at`` / + ``message_edited_at``, ``reply_to_message_id``, ``attachment_urls`` (JSON list), + ``has_attachments``, and soft-delete flags ``is_deleted`` / ``deleted_at``. + + Relationships: + ``channel``: FK to ``DiscordChannel`` (column ``channel_id``). + ``author``: FK to ``DiscordProfile`` (``cppa_user_tracker.models``); column + ``author_id``. Reverse on profile: ``discord_messages``. + Reverse ``reactions``: ``DiscordReaction`` rows + (``related_name="reactions"`` on ``DiscordReaction``). + + Indexes on ``(channel, message_created_at)``, ``message_created_at``, + ``is_deleted``, and ``message_type`` support sync windows and queries. + """ message_id = models.BigIntegerField(unique=True, db_index=True) channel = models.ForeignKey( @@ -94,7 +129,14 @@ def __str__(self): class DiscordReaction(models.Model): - """Reaction on a Discord message.""" + """Aggregated emoji reaction counts on a ``DiscordMessage``. + + One row per (``message``, ``emoji``) pair (enforced by unique constraint). ``count`` + stores the total from the source payload at sync time. + + Relationships: + ``message``: FK to ``DiscordMessage`` (column ``message_id``). + """ message = models.ForeignKey( DiscordMessage, diff --git a/discord_activity_tracker/services.py b/discord_activity_tracker/services.py index 647e894..28c5422 100644 --- a/discord_activity_tracker/services.py +++ b/discord_activity_tracker/services.py @@ -1,6 +1,21 @@ """Service layer for Discord Activity Tracker. -All DB writes go through these functions (get_or_create_* pattern). +All writes to ``discord_activity_tracker`` models go through this module (single +writer policy). Higher-level API tables and narrative docs live in +``docs/service_api/discord_activity_tracker.md``. + +Bulk ingest expects dicts shaped like the output of +``discord_activity_tracker.sync.messages._prepare_message_data`` or +``discord_activity_tracker.sync.chat_exporter.convert_exporter_message_to_dict`` +(normalized message payloads with ``author``, ``message_id``, ``reactions``, etc.). + +**CollectorFailureCategory:** These functions perform database I/O only; they do +not call Discord HTTP APIs and do not assign ``CollectorFailureCategory`` labels. +Collectors and sync code classify failures via ``core.errors.classify_failure``. +If a caller logs ORM failures through that helper, mapping follows ``core.errors``. + +This module does not intentionally raise ``ValueError`` for bad inputs; bulk +paths may skip individual rows and log warnings (see each function's side effects). """ import logging @@ -26,7 +41,30 @@ def get_or_create_discord_server( server_id: int, server_name: str, icon_url: str = "" ) -> Tuple[DiscordServer, bool]: - """Get or create server, update name/icon if changed.""" + """Get or create a Discord guild (server) row and refresh metadata when it already exists. + + Uses ``get_or_create`` on ``server_id``. When the row already exists, updates + name and icon only if they differ, via ``save(update_fields=...)``. + + Does not perform Discord HTTP calls; does not emit ``CollectorFailureCategory``. + + Args: + server_id: Discord snowflake for the guild. + server_name: Current guild name. + icon_url: CDN URL for the guild icon; may be empty. + + Returns: + ``(server, created)`` where ``created`` is ``True`` iff a new + ``DiscordServer`` row was inserted on this call (Django ``get_or_create`` + semantics). + + Raises: + None intentionally. Django ORM may raise database-related exceptions + (e.g. ``IntegrityError``, ``OperationalError``) under concurrency or DB faults. + + Side effects: + Reads/writes ``DiscordServer``. May emit ``logger.debug`` on update. + """ server, created = DiscordServer.objects.get_or_create( server_id=server_id, defaults={ @@ -62,7 +100,33 @@ def get_or_create_discord_channel( category_id: Optional[int] = None, category_name: str = "", ) -> Tuple[DiscordChannel, bool]: - """Get or create channel, update fields if changed.""" + """Get or create a channel row and refresh fields when the row already exists. + + Uses ``get_or_create`` on ``channel_id``. Existing rows are updated when any + of name, type, topic, position, or category fields change (``category_name`` is + only applied when non-empty and different). + + Does not perform Discord HTTP calls; does not emit ``CollectorFailureCategory``. + + Args: + server: Parent ``DiscordServer`` (guild). + channel_id: Discord snowflake for the channel. + channel_name: Display name (e.g. without leading ``#``). + channel_type: Exporter/discord type string (e.g. ``GuildTextChat``). + topic: Channel topic text. + position: Sort order within the guild. + category_id: Parent category snowflake, or ``None`` if unknown/uncategorized. + category_name: Human-readable category name when known. + + Returns: + ``(channel, created)`` with Django ``get_or_create`` semantics for ``created``. + + Raises: + None intentionally. Django ORM may raise database-related exceptions. + + Side effects: + Reads/writes ``DiscordChannel``. May emit ``logger.debug`` on update. + """ channel, created = DiscordChannel.objects.get_or_create( channel_id=channel_id, defaults={ @@ -126,7 +190,36 @@ def create_or_update_discord_message( message_type: str = "Default", is_pinned: bool = False, ) -> Tuple[DiscordMessage, bool]: - """Create or update message.""" + """Create or update a single message by Discord ``message_id`` (upsert). + + Uses ``update_or_create`` so the row is keyed by ``message_id``; ``defaults`` + refresh channel, author, content, type, pins, timestamps, attachments, and + clears ``is_deleted``. ``has_attachments`` is derived from ``attachment_urls``. + + Does not perform Discord HTTP calls; does not emit ``CollectorFailureCategory``. + + Args: + message_id: Discord snowflake for the message. + channel: Channel the message belongs to. + author: ``DiscordProfile`` for the message author. + content: Message body text. + message_created_at: Original creation time (timezone-aware recommended). + message_edited_at: Last edit time, if any. + reply_to_message_id: Parent message snowflake for replies, or ``None``. + attachment_urls: List of attachment URLs; ``None`` is treated as empty. + message_type: Exporter/discord type string; empty coerces to the string ``Default``. + is_pinned: Whether the message is pinned in the channel. + + Returns: + ``(message, created)`` where ``created`` is ``True`` iff a new + ``DiscordMessage`` row was inserted (Django ``update_or_create`` semantics). + + Raises: + None intentionally. Django ORM may raise database-related exceptions. + + Side effects: + Reads/writes ``DiscordMessage``. + """ if attachment_urls is None: attachment_urls = [] @@ -153,7 +246,24 @@ def create_or_update_discord_message( def mark_message_deleted( message: DiscordMessage, deleted_at: Optional[datetime] = None ) -> DiscordMessage: - """Mark message as deleted.""" + """Soft-delete a message: set ``is_deleted`` and ``deleted_at``. + + Does not perform Discord HTTP calls; does not emit ``CollectorFailureCategory``. + + Args: + message: Row to mark deleted (mutated in memory and saved). + deleted_at: Deletion timestamp; defaults to ``django.utils.timezone.now()``. + + Returns: + The same ``DiscordMessage`` instance after ``save(update_fields=...)``. + + Raises: + None intentionally. Django ORM may raise database-related exceptions. + + Side effects: + Updates ``DiscordMessage.is_deleted``, ``deleted_at``, ``updated_at``. + Emits ``logger.debug``. + """ if deleted_at is None: deleted_at = django_timezone.now() @@ -168,7 +278,26 @@ def mark_message_deleted( def add_or_update_reaction( message: DiscordMessage, emoji: str, count: int ) -> Tuple[DiscordReaction, bool]: - """Add or update reaction.""" + """Upsert one reaction row per (message, emoji) with the given reaction count. + + Uses ``update_or_create`` on the unique pair ``(message, emoji)``. + + Does not perform Discord HTTP calls; does not emit ``CollectorFailureCategory``. + + Args: + message: Message the reaction is on. + emoji: Emoji string or custom emoji representation. + count: Aggregated reaction count from the source payload. + + Returns: + ``(reaction, created)`` with Django ``update_or_create`` semantics for ``created``. + + Raises: + None intentionally. Django ORM may raise database-related exceptions. + + Side effects: + Reads/writes ``DiscordReaction``. + """ reaction, created = DiscordReaction.objects.update_or_create( message=message, emoji=emoji, defaults={"count": count} ) @@ -177,7 +306,25 @@ def add_or_update_reaction( def get_channel_latest_message_at(channel: DiscordChannel) -> Optional[datetime]: - """Latest ``message_created_at`` among non-deleted messages in this channel, or None.""" + """Return the latest ``message_created_at`` among non-deleted messages in a channel. + + Read-only aggregate over ``DiscordMessage``; no writes. + + Does not perform Discord HTTP calls; does not emit ``CollectorFailureCategory``. + + Args: + channel: Channel to scan. + + Returns: + Maximum ``message_created_at`` for rows with ``is_deleted=False``, or + ``None`` if there are no such messages. + + Raises: + None intentionally. Django ORM may raise database-related exceptions. + + Side effects: + None (read-only query). + """ row = DiscordMessage.objects.filter(channel=channel, is_deleted=False).aggregate( m=Max("message_created_at") ) @@ -189,7 +336,28 @@ def queryset_channels_with_recent_messages( cutoff: datetime, channel_ids: Optional[List[int]] = None, ) -> QuerySet[DiscordChannel]: - """Channels on *server* that have at least one non-deleted message at or after *cutoff*.""" + """Channels on ``server`` with at least one non-deleted message at or after ``cutoff``. + + Compares ``message_created_at`` to ``cutoff``; use timezone-aware datetimes for + predictable UTC behavior. When ``channel_ids`` is set, restricts to those + Discord ``channel_id`` values (snowflakes), not internal PKs. + + Does not perform Discord HTTP calls; does not emit ``CollectorFailureCategory``. + + Args: + server: Guild whose channels are considered. + cutoff: Inclusive lower bound on ``DiscordMessage.message_created_at``. + channel_ids: Optional allowlist of Discord channel snowflakes. + + Returns: + ``QuerySet`` of ``DiscordChannel`` ordered by ``position``, ``channel_name``. + + Raises: + None intentionally. Django ORM may raise database-related exceptions. + + Side effects: + None (read-only query). + """ pks = ( DiscordMessage.objects.filter( channel__server=server, @@ -212,7 +380,27 @@ def get_active_channels( days: int = 30, channel_ids: Optional[List[int]] = None, ) -> QuerySet[DiscordChannel]: - """Channels with at least one non-deleted message in the last *days*, optional allowlist.""" + """Same as ``queryset_channels_with_recent_messages`` with ``cutoff = now - days``. + + ``days`` is calendar-style span from ``django.utils.timezone.now()`` using + ``datetime.timedelta``. + + Does not perform Discord HTTP calls; does not emit ``CollectorFailureCategory``. + + Args: + server: Guild whose channels are considered. + days: Lookback window in days from the current time. + channel_ids: Optional allowlist of Discord channel snowflakes. + + Returns: + ``QuerySet`` of ``DiscordChannel`` with recent activity. + + Raises: + None intentionally. Django ORM may raise database-related exceptions. + + Side effects: + None (read-only query; delegates to ``queryset_channels_with_recent_messages``). + """ from datetime import timedelta cutoff = django_timezone.now() - timedelta(days=days) @@ -227,11 +415,30 @@ def get_active_channels( def bulk_upsert_discord_users( user_data_list: List[Dict[str, Any]], ) -> Dict[int, DiscordProfile]: - """Bulk upsert Discord user profiles. Returns {discord_user_id: DiscordProfile} with PKs. + """Upsert author profiles for a batch of messages. + + Deduplicates by ``user_id`` (last dict wins). Existing ``DiscordProfile`` rows + are fetched in one query and updated in Python when fields differ; missing + users are created via ``get_or_create_discord_profile`` (no + ``bulk_create(update_conflicts=True)`` because ``DiscordProfile`` uses MTI). + + Does not perform Discord HTTP calls; does not emit ``CollectorFailureCategory``. + + Args: + user_data_list: Dicts with at least ``user_id`` and ``username``; optional + ``display_name``, ``avatar_url``, ``is_bot`` (see sync normalizers). + + Returns: + Map ``discord_user_id -> DiscordProfile`` including database PKs on profiles. - Uses get_or_create per user because DiscordProfile uses multi-table - inheritance (BaseProfile) which doesn't support bulk_create(update_conflicts=True). - Typical batches have 10-50 unique users, so individual creates are fine. + Raises: + None intentionally. Missing keys in a dict (e.g. no ``user_id``) will + raise ``KeyError``. Django ORM may raise database-related exceptions. + + Side effects: + Reads/writes ``cppa_user_tracker.DiscordProfile`` via queries and + ``get_or_create_discord_profile``; may call ``profile.save()`` without + ``update_fields`` when updating existing rows. """ if not user_data_list: return {} @@ -286,7 +493,29 @@ def bulk_upsert_discord_messages( channel: DiscordChannel, user_map: Dict[int, DiscordProfile], ) -> Dict[int, DiscordMessage]: - """Bulk upsert messages. Returns {discord_message_id: DiscordMessage} with PKs.""" + """Bulk upsert messages for one channel using ``bulk_create(update_conflicts=True)``. + + Skips a message (with ``logger.warning``) when ``user_map`` has no profile for + the author's ``user_id`` (``d["author"]["user_id"]``). Skips building rows when every message is skipped; + then returns an empty dict. + + Does not perform Discord HTTP calls; does not emit ``CollectorFailureCategory``. + + Args: + message_data_list: Normalized message dicts (``message_id``, ``author``, etc.). + channel: Target channel for all rows. + user_map: ``discord_user_id -> DiscordProfile`` from ``bulk_upsert_discord_users``. + + Returns: + Map ``message_id -> DiscordMessage`` with PKs loaded (``id``, ``message_id`` only). + + Raises: + None intentionally. Malformed dicts (missing keys) may raise ``KeyError``. + Django ORM may raise database-related exceptions. + + Side effects: + Writes ``DiscordMessage`` via ``bulk_create``. May emit ``logger.warning``. + """ if not message_data_list: return {} @@ -353,7 +582,27 @@ def bulk_upsert_discord_reactions( reaction_data_list: List[Dict[str, Any]], message_map: Dict[int, DiscordMessage], ) -> None: - """Bulk upsert reactions.""" + """Bulk upsert reactions using ``bulk_create(update_conflicts=True)``. + + Entries whose ``discord_message_id`` is missing from ``message_map`` are skipped + silently (no log). Duplicate (message PK, emoji) pairs keep the **last** payload. + + Does not perform Discord HTTP calls; does not emit ``CollectorFailureCategory``. + + Args: + reaction_data_list: Dicts with ``discord_message_id``, ``emoji``, optional ``count``. + message_map: ``message_id -> DiscordMessage`` from ``bulk_upsert_discord_messages``. + + Returns: + None + + Raises: + None intentionally. Malformed dicts may raise ``KeyError``. Django ORM may + raise database-related exceptions. + + Side effects: + Writes ``DiscordReaction``. + """ if not reaction_data_list: return @@ -388,7 +637,30 @@ def bulk_process_message_batch( message_data_list: List[Dict[str, Any]], channel: DiscordChannel, ) -> int: - """Orchestrate bulk upsert: users → messages → reactions. Returns count.""" + """Run user upsert, message upsert, and reaction upsert inside one DB transaction. + + Return value is **always** ``len(message_data_list)`` when the input list is + non-empty, even if some messages were skipped inside ``bulk_upsert_discord_messages`` + (skipped rows do not reduce the returned count). + + Does not perform Discord HTTP calls; does not emit ``CollectorFailureCategory``. + + Args: + message_data_list: Batch of normalized message dicts for one channel. + channel: Target ``DiscordChannel``. + + Returns: + ``0`` if ``message_data_list`` is empty; otherwise ``len(message_data_list)``. + + Raises: + None intentionally. Malformed dicts may raise ``KeyError``. Django ORM may + raise database-related exceptions; on failure the whole transaction rolls back. + + Side effects: + One ``transaction.atomic()`` block: writes profiles (via + ``bulk_upsert_discord_users``), messages, and reactions. See those functions + for logging and skip behavior. + """ if not message_data_list: return 0 diff --git a/discord_activity_tracker/sync/__init__.py b/discord_activity_tracker/sync/__init__.py index e69de29..774de71 100644 --- a/discord_activity_tracker/sync/__init__.py +++ b/discord_activity_tracker/sync/__init__.py @@ -0,0 +1,8 @@ +"""Discord ingest and export helpers (not the DB service layer). + +- ``sync.chat_exporter`` — DiscordChatExporter CLI integration and JSON parsing. +- ``sync.messages`` — Normalized message batches and ``discord.py`` client helpers. +- ``sync.client`` — ``DiscordSyncClient`` wrapper. +- ``sync.exporter_window`` — DB-backed lower bounds for incremental exports. +- ``sync.export`` — Markdown export from ORM data. +""" diff --git a/discord_activity_tracker/tests/test_backfill_command_extra.py b/discord_activity_tracker/tests/test_backfill_command_extra.py new file mode 100644 index 0000000..d4c2887 --- /dev/null +++ b/discord_activity_tracker/tests/test_backfill_command_extra.py @@ -0,0 +1,119 @@ +"""Extra coverage for backfill_discord_activity_tracker command.""" + +from __future__ import annotations + +import asyncio +from io import StringIO +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from discord_activity_tracker.management.commands.backfill_discord_activity_tracker import ( + Command, + DiscordBackfillCollector, + _json_display_path, +) + + +def test_json_display_path_outside_import_root_returns_basename(): + assert _json_display_path(Path("/a/b"), Path("/x/other.json")) == "other.json" + + +@pytest.mark.django_db +def test_backfill_collector_sync_pinecone_calls_runner(): + style = MagicMock() + style.SUCCESS = lambda x: x + c = DiscordBackfillCollector( + stdout=StringIO(), style=style, dry_run=False, skip_pinecone=False + ) + with patch( + "discord_activity_tracker.management.commands.backfill_discord_activity_tracker.task_discord_pinecone_sync" + ) as t: + c.sync_pinecone() + t.assert_called_once_with(dry_run=False) + + +@pytest.mark.django_db +def test_backfill_collector_sync_pinecone_skipped_when_dry_run(): + style = MagicMock() + c = DiscordBackfillCollector( + stdout=StringIO(), style=style, dry_run=True, skip_pinecone=False + ) + with patch( + "discord_activity_tracker.management.commands.backfill_discord_activity_tracker.task_discord_pinecone_sync" + ) as t: + c.sync_pinecone() + t.assert_not_called() + + +def test_backfill_get_collector_skip_pinecone_none(): + cmd = Command() + cmd.stdout = StringIO() + cmd.style = MagicMock() + c = cmd.get_collector(dry_run=False, skip_pinecone=None) + assert c.skip_pinecone is False + + +@pytest.mark.django_db +def test_backfill_run_handles_bad_json(tmp_path, settings): + settings.WORKSPACE_DIR = tmp_path / "ws" + settings.WORKSPACE_DIR.mkdir(parents=True) + imp = tmp_path / "import_here" + imp.mkdir() + bad = imp / "bad.json" + bad.write_text("{", encoding="utf-8") + + style = MagicMock() + style.WARNING = lambda x: x + style.SUCCESS = lambda x: x + style.ERROR = lambda x: x + out = StringIO() + + with patch( + "discord_activity_tracker.management.commands.backfill_discord_activity_tracker.get_cpp_discussion_import_dir", + return_value=imp, + ): + DiscordBackfillCollector( + stdout=out, style=style, dry_run=False, skip_pinecone=True + ).run() + + output = out.getvalue() + assert "bad.json" in output + assert "Failed bad.json:" in output + assert "Import complete: 0 messages from 1 file(s)" in output + + +@pytest.mark.django_db +def test_backfill_persist_channel_writes(settings, tmp_path): + settings.WORKSPACE_DIR = tmp_path / "ws" + settings.WORKSPACE_DIR.mkdir(parents=True) + gid, cid = 220011, 220022 + guild_info = {"id": gid, "name": "G", "iconUrl": ""} + channel_info = { + "id": cid, + "name": "c", + "type": "GuildTextChat", + "topic": "", + "category": "", + "categoryId": None, + } + messages = [ + { + "id": str(10**12 + 3), + "type": "Default", + "isPinned": False, + "timestamp": "2026-01-15T12:00:00Z", + "content": "hello world example text long enough for validation", + "author": {"id": "1082347485026070548", "name": "u"}, + "attachments": [], + "reactions": [], + } + ] + style = MagicMock() + style.SUCCESS = lambda x: x + c = DiscordBackfillCollector( + stdout=StringIO(), style=style, dry_run=False, skip_pinecone=True + ) + n = asyncio.run(c._persist_channel(guild_info, channel_info, messages)) + assert n >= 1 diff --git a/discord_activity_tracker/tests/test_chat_exporter_branch_coverage.py b/discord_activity_tracker/tests/test_chat_exporter_branch_coverage.py new file mode 100644 index 0000000..ee90133 --- /dev/null +++ b/discord_activity_tracker/tests/test_chat_exporter_branch_coverage.py @@ -0,0 +1,145 @@ +"""Extra branch coverage for sync/chat_exporter.py.""" + +from __future__ import annotations + +import sys +from unittest.mock import MagicMock, patch + +import pytest +from django.conf import settings + +from discord_activity_tracker.sync.chat_exporter import ( + DiscordChatExporterError, + _file_command_brief_description, + _run_channels_listing, + export_guild_to_json, + parse_channels_command_stdout, +) + + +def test_file_command_brief_description_no_file_binary(tmp_path): + with patch( + "discord_activity_tracker.sync.chat_exporter.shutil.which", return_value=None + ): + assert _file_command_brief_description(tmp_path / "x") is None + + +def test_file_command_brief_description_subprocess_error(tmp_path): + with ( + patch( + "discord_activity_tracker.sync.chat_exporter.shutil.which", + return_value="/bin/file", + ), + patch( + "discord_activity_tracker.sync.chat_exporter.subprocess.run", + side_effect=OSError("nope"), + ), + ): + assert _file_command_brief_description(tmp_path / "x") is None + + +def test_file_command_brief_description_nonzero_return(tmp_path): + proc = MagicMock(returncode=1, stdout="", stderr="") + with ( + patch( + "discord_activity_tracker.sync.chat_exporter.shutil.which", + return_value="/bin/file", + ), + patch( + "discord_activity_tracker.sync.chat_exporter.subprocess.run", + return_value=proc, + ), + ): + assert _file_command_brief_description(tmp_path / "x") is None + + +def test_run_channels_listing_failure_raises(tmp_path, monkeypatch): + cli = tmp_path / "cli" + cli.touch() + monkeypatch.setattr(settings, "DISCORD_CHAT_EXPORTER_DOTNET_DLL", None) + proc = MagicMock(returncode=1, stdout="", stderr="err") + with ( + patch( + "discord_activity_tracker.sync.chat_exporter._get_cli_path", + return_value=cli, + ), + patch( + "discord_activity_tracker.sync.chat_exporter.subprocess.run", + return_value=proc, + ), + ): + with pytest.raises(DiscordChatExporterError, match="channels"): + _run_channels_listing(cli, "tok", 1, "None") + + +def test_run_channels_listing_success(monkeypatch, tmp_path): + cli = tmp_path / "cli" + cli.touch() + monkeypatch.setattr(settings, "DISCORD_CHAT_EXPORTER_DOTNET_DLL", None) + proc = MagicMock(returncode=0, stdout="12345 | #general\n", stderr="") + with ( + patch( + "discord_activity_tracker.sync.chat_exporter._get_cli_path", + return_value=cli, + ), + patch( + "discord_activity_tracker.sync.chat_exporter.subprocess.run", + return_value=proc, + ), + ): + ids = _run_channels_listing(cli, "tok", 1, "None") + assert ids == [12345] + + +def test_export_guild_dotnet_dll_missing_raises(tmp_path, monkeypatch): + out = tmp_path / "out" + missing_dll = tmp_path / "nope.dll" + monkeypatch.setattr(settings, "DISCORD_CHAT_EXPORTER_DOTNET_DLL", str(missing_dll)) + with pytest.raises(DiscordChatExporterError, match="missing"): + export_guild_to_json("t", 1, out) + + +def test_export_guild_dotnet_no_dotnet_binary_raises(tmp_path, monkeypatch): + dll = tmp_path / "app.dll" + dll.write_bytes(b"x") + monkeypatch.setattr(settings, "DISCORD_CHAT_EXPORTER_DOTNET_DLL", str(dll)) + monkeypatch.setattr(settings, "DISCORD_CHAT_EXPORTER_DOTNET", "") + with patch( + "discord_activity_tracker.sync.chat_exporter.shutil.which", return_value=None + ): + with pytest.raises(DiscordChatExporterError, match="dotnet"): + export_guild_to_json("t", 1, tmp_path / "o") + + +def test_export_guild_os_error_errno_8_wraps(tmp_path, monkeypatch): + cli = tmp_path / "cli" + cli.touch() + monkeypatch.setattr(settings, "DISCORD_CHAT_EXPORTER_DOTNET_DLL", None) + err = OSError("exec format error") + err.errno = 8 + with ( + patch( + "discord_activity_tracker.sync.chat_exporter._get_cli_path", + return_value=cli, + ), + patch( + "discord_activity_tracker.sync.chat_exporter.validate_discord_chat_exporter_cli_architecture", + ), + patch( + "discord_activity_tracker.sync.chat_exporter._get_sequential_export", + return_value=False, + ), + patch( + "discord_activity_tracker.sync.chat_exporter._export_guild_exportguild", + side_effect=err, + ), + ): + if sys.platform == "win32": + pytest.skip("errno 8 branch is POSIX-only") + with pytest.raises(DiscordChatExporterError, match="wrong executable format"): + export_guild_to_json("t", 1, tmp_path / "o2", after_date=None) + + +def test_parse_channels_skips_thread_banner_lines(): + text = "* thread\n123 | #x\n" + assert parse_channels_command_stdout(text) == [123] diff --git a/discord_activity_tracker/tests/test_export_sync_coverage.py b/discord_activity_tracker/tests/test_export_sync_coverage.py new file mode 100644 index 0000000..1343b00 --- /dev/null +++ b/discord_activity_tracker/tests/test_export_sync_coverage.py @@ -0,0 +1,381 @@ +"""Coverage for sync/export.py (markdown export, git helpers).""" + +from __future__ import annotations + +import uuid +from datetime import datetime, timedelta, timezone +from unittest.mock import MagicMock, patch + +import pytest +from django.utils import timezone as django_timezone + +from cppa_user_tracker.models import DiscordProfile +from discord_activity_tracker.models import ( + DiscordChannel, + DiscordMessage, + DiscordServer, +) +from discord_activity_tracker.sync.export import ( + _strip_invisible_unicode, + commit_and_push_context_repo, + export_all_active_channels, + export_and_push, + export_channel_to_markdown, + generate_markdown_content, +) + + +def _uid() -> int: + return uuid.uuid4().int % (2**50) + + +@pytest.fixture +def export_server(db): + return DiscordServer.objects.create( + server_id=_uid(), server_name="Export Guild", icon_url="" + ) + + +@pytest.fixture +def export_channel(db, export_server): + return DiscordChannel.objects.create( + server=export_server, + channel_id=_uid(), + channel_name="general", + channel_type="text", + ) + + +@pytest.fixture +def export_author(db): + return DiscordProfile.objects.create( + discord_user_id=_uid(), + username="alice", + display_name="Alice", + avatar_url="", + is_bot=False, + ) + + +def test_strip_invisible_unicode_empty_returns_empty(): + assert _strip_invisible_unicode("") == "" + + +@pytest.mark.django_db +def test_generate_markdown_microsecond_timestamp(export_channel, export_author): + ts = datetime(2026, 3, 1, 10, 0, 0, 500000, tzinfo=timezone.utc) + msg = DiscordMessage.objects.create( + message_id=_uid(), + channel=export_channel, + author=export_author, + content="hi", + message_created_at=ts, + ) + out = generate_markdown_content(export_channel, "2026-03", [msg]) + assert "10:00:00.500" in out + + +@pytest.mark.django_db +def test_generate_markdown_reply_same_day(export_channel, export_author): + root = DiscordMessage.objects.create( + message_id=_uid(), + channel=export_channel, + author=export_author, + content="root text here", + message_created_at=datetime(2026, 3, 5, 9, 0, 0, tzinfo=timezone.utc), + ) + reply = DiscordMessage.objects.create( + message_id=_uid(), + channel=export_channel, + author=export_author, + content="reply", + message_created_at=datetime(2026, 3, 5, 9, 5, 0, tzinfo=timezone.utc), + reply_to_message_id=root.message_id, + ) + out = generate_markdown_content(export_channel, "2026-03", [root, reply]) + assert "Reply to:" in out + assert "Original:" in out + + +@pytest.mark.django_db +def test_generate_markdown_reply_split_by_day_other_month( + export_channel, export_author +): + root = DiscordMessage.objects.create( + message_id=_uid(), + channel=export_channel, + author=export_author, + content="x" * 90, + message_created_at=datetime(2026, 2, 28, 23, 0, 0, tzinfo=timezone.utc), + ) + reply = DiscordMessage.objects.create( + message_id=_uid(), + channel=export_channel, + author=export_author, + content="r", + message_created_at=datetime(2026, 3, 1, 1, 0, 0, tzinfo=timezone.utc), + reply_to_message_id=root.message_id, + ) + out = generate_markdown_content( + export_channel, "2026-03", [reply], date_str="2026-03-01", split_by_day=True + ) + assert "../2026-02/" in out or "2026-02" in out + + +@pytest.mark.django_db +def test_generate_markdown_reply_missing_parent_skipped(export_channel, export_author): + msg = DiscordMessage.objects.create( + message_id=_uid(), + channel=export_channel, + author=export_author, + content="orphan", + message_created_at=datetime(2026, 3, 1, 12, 0, 0, tzinfo=timezone.utc), + reply_to_message_id=999999999999, + ) + out = generate_markdown_content(export_channel, "2026-03", [msg]) + assert "orphan" in out + assert "Reply to:" not in out + + +@pytest.mark.django_db +def test_generate_markdown_code_fence_and_unclosed(export_channel, export_author): + msg = DiscordMessage.objects.create( + message_id=_uid(), + channel=export_channel, + author=export_author, + content="```\nunclosed", + message_created_at=datetime(2026, 3, 2, 8, 0, 0, tzinfo=timezone.utc), + ) + out = generate_markdown_content(export_channel, "2026-03", [msg]) + assert "" in out + assert out.count("```") >= 2 + + +@pytest.mark.django_db +def test_generate_markdown_attachments(export_channel, export_author): + msg = DiscordMessage.objects.create( + message_id=_uid(), + channel=export_channel, + author=export_author, + content="see file", + message_created_at=datetime(2026, 3, 3, 8, 0, 0, tzinfo=timezone.utc), + has_attachments=True, + attachment_urls=["https://cdn.discord.com/a/b/file.png?ex=1"], + ) + out = generate_markdown_content(export_channel, "2026-03", [msg]) + assert "Attachments:" in out + assert "file.png" in out + + +@pytest.mark.django_db +def test_generate_markdown_reply_same_month_aggregate_link( + export_channel, export_author +): + """Reply in same calendar month as year_month uses in-page anchor (export.py ~168).""" + root = DiscordMessage.objects.create( + message_id=_uid(), + channel=export_channel, + author=export_author, + content="root", + message_created_at=datetime(2026, 3, 1, 8, 0, 0, tzinfo=timezone.utc), + ) + reply = DiscordMessage.objects.create( + message_id=_uid(), + channel=export_channel, + author=export_author, + content="later", + message_created_at=datetime(2026, 3, 15, 9, 0, 0, tzinfo=timezone.utc), + reply_to_message_id=root.message_id, + ) + out = generate_markdown_content(export_channel, "2026-03", [root, reply]) + assert "Reply to:" in out + assert "](" in out and "#" in out + + +@pytest.mark.django_db +def test_generate_markdown_reply_microsecond_reply_time(export_channel, export_author): + root = DiscordMessage.objects.create( + message_id=_uid(), + channel=export_channel, + author=export_author, + content="root", + message_created_at=datetime(2026, 3, 10, 1, 0, 0, tzinfo=timezone.utc), + ) + reply = DiscordMessage.objects.create( + message_id=_uid(), + channel=export_channel, + author=export_author, + content="r", + message_created_at=datetime(2026, 3, 10, 1, 0, 0, 123000, tzinfo=timezone.utc), + reply_to_message_id=root.message_id, + ) + out = generate_markdown_content(export_channel, "2026-03", [root, reply]) + assert "Reply to:" in out + + +@pytest.mark.django_db +def test_export_channel_to_markdown_writes_per_day_files( + export_channel, export_author, tmp_path +): + repo = tmp_path / "ctx" + repo.mkdir() + t0 = datetime(2026, 4, 10, 12, 0, 0, tzinfo=timezone.utc) + DiscordMessage.objects.create( + message_id=_uid(), + channel=export_channel, + author=export_author, + content="day a", + message_created_at=t0, + ) + DiscordMessage.objects.create( + message_id=_uid(), + channel=export_channel, + author=export_author, + content="day b", + message_created_at=t0 + timedelta(days=1), + ) + paths = export_channel_to_markdown(export_channel, "2026-04", repo) + assert paths is not None and len(paths) == 2 + assert all(p.suffix == ".md" for p in paths) + + +@pytest.mark.django_db +def test_export_channel_to_markdown_empty_month_returns_none(export_channel, tmp_path): + assert export_channel_to_markdown(export_channel, "2026-05", tmp_path) is None + + +@pytest.mark.django_db +def test_export_all_active_channels_collects_paths( + export_server, export_channel, export_author, tmp_path, monkeypatch +): + now = django_timezone.now() + DiscordMessage.objects.create( + message_id=_uid(), + channel=export_channel, + author=export_author, + content="recent", + message_created_at=now - timedelta(days=1), + ) + ym = now.strftime("%Y-%m") + fake_paths = [tmp_path / f"{ym}-stub.md"] + + def fake_export(ch, year_month, out_dir): + if ch.pk == export_channel.pk and year_month == ym: + return fake_paths + return None + + monkeypatch.setattr( + "discord_activity_tracker.sync.export.export_channel_to_markdown", + fake_export, + ) + paths = export_all_active_channels( + tmp_path, export_server, months_back=1, active_days=30 + ) + assert paths == fake_paths + + +@pytest.mark.django_db +def test_export_all_active_channels_continues_on_channel_error( + export_server, export_channel, export_author, tmp_path, monkeypatch +): + now = django_timezone.now() + DiscordMessage.objects.create( + message_id=_uid(), + channel=export_channel, + author=export_author, + content="recent", + message_created_at=now - timedelta(hours=1), + ) + + def boom(*_a, **_k): + raise RuntimeError("export failed") + + monkeypatch.setattr( + "discord_activity_tracker.sync.export.export_channel_to_markdown", + boom, + ) + paths = export_all_active_channels(tmp_path, export_server, months_back=1) + assert paths == [] + + +def test_commit_and_push_no_changes(tmp_path): + calls: list[list[str]] = [] + + def run_side_effect(cmd, **_kwargs): + calls.append(list(cmd)) + if "status" in cmd: + return MagicMock(returncode=0, stdout="", stderr="") + return MagicMock(returncode=0, stdout="", stderr="") + + with patch("discord_activity_tracker.sync.export.subprocess.run", run_side_effect): + assert commit_and_push_context_repo(tmp_path) is True + assert any("status" in c for c in calls) + + +def test_commit_and_push_full_flow(tmp_path): + seq = iter( + [ + MagicMock(returncode=0, stdout="", stderr=""), + MagicMock(returncode=0, stdout=" M file\n", stderr=""), + MagicMock(returncode=0, stdout="", stderr=""), + MagicMock(returncode=0, stdout="", stderr=""), + ] + ) + + def run_side_effect(cmd, **_kwargs): + return next(seq) + + with patch("discord_activity_tracker.sync.export.subprocess.run", run_side_effect): + assert commit_and_push_context_repo(tmp_path, "msg") is True + + +def test_commit_and_push_git_error(tmp_path): + import subprocess as sp + + def run_side_effect(cmd, **_kwargs): + raise sp.CalledProcessError(1, cmd, stderr="err") + + with patch("discord_activity_tracker.sync.export.subprocess.run", run_side_effect): + assert commit_and_push_context_repo(tmp_path) is False + + +def test_commit_and_push_generic_exception(tmp_path): + with patch( + "discord_activity_tracker.sync.export.subprocess.run", + side_effect=OSError("boom"), + ): + assert commit_and_push_context_repo(tmp_path) is False + + +@pytest.mark.django_db +def test_export_and_push_no_files_returns_false(export_server, tmp_path): + with patch( + "discord_activity_tracker.sync.export.export_all_active_channels", + return_value=[], + ): + assert export_and_push(tmp_path, export_server) is False + + +@pytest.mark.django_db +def test_export_and_push_files_no_auto_commit(export_server, tmp_path): + with patch( + "discord_activity_tracker.sync.export.export_all_active_channels", + return_value=[tmp_path / "a.md"], + ): + assert export_and_push(tmp_path, export_server, auto_commit=False) is True + + +@pytest.mark.django_db +def test_export_and_push_auto_commit(export_server, tmp_path): + with ( + patch( + "discord_activity_tracker.sync.export.export_all_active_channels", + return_value=[tmp_path / "a.md"], + ), + patch( + "discord_activity_tracker.sync.export.commit_and_push_context_repo", + return_value=True, + ) as m, + ): + assert export_and_push(tmp_path, export_server, auto_commit=True) is True + m.assert_called_once() diff --git a/discord_activity_tracker/tests/test_exporter_window.py b/discord_activity_tracker/tests/test_exporter_window.py new file mode 100644 index 0000000..b7f4c91 --- /dev/null +++ b/discord_activity_tracker/tests/test_exporter_window.py @@ -0,0 +1,90 @@ +"""Tests for sync/exporter_window.py.""" + +from __future__ import annotations + +import uuid +from datetime import datetime, timezone + +import pytest + +from cppa_user_tracker.models import DiscordProfile +from discord_activity_tracker.models import ( + DiscordChannel, + DiscordMessage, + DiscordServer, +) +from discord_activity_tracker.sync.exporter_window import ( + latest_message_created_at_for_guild, +) + + +def _uid() -> int: + return uuid.uuid4().int % (2**50) + + +@pytest.mark.django_db +def test_latest_message_empty_db(): + assert latest_message_created_at_for_guild(999001, channel_ids=None) is None + + +@pytest.mark.django_db +def test_latest_message_ignores_deleted(): + srv = DiscordServer.objects.create(server_id=_uid(), server_name="G", icon_url="") + ch = DiscordChannel.objects.create( + server=srv, channel_id=_uid(), channel_name="c", channel_type="text" + ) + author = DiscordProfile.objects.create( + discord_user_id=_uid(), + username="u", + display_name="U", + avatar_url="", + is_bot=False, + ) + t = datetime(2026, 1, 1, tzinfo=timezone.utc) + DiscordMessage.objects.create( + message_id=_uid(), + channel=ch, + author=author, + content="deleted", + message_created_at=t, + is_deleted=True, + ) + assert latest_message_created_at_for_guild(srv.server_id, channel_ids=None) is None + + +@pytest.mark.django_db +def test_latest_message_respects_channel_allowlist(): + srv = DiscordServer.objects.create(server_id=_uid(), server_name="G", icon_url="") + ch1 = DiscordChannel.objects.create( + server=srv, channel_id=_uid(), channel_name="a", channel_type="text" + ) + ch2 = DiscordChannel.objects.create( + server=srv, channel_id=_uid(), channel_name="b", channel_type="text" + ) + author = DiscordProfile.objects.create( + discord_user_id=_uid(), + username="u", + display_name="U", + avatar_url="", + is_bot=False, + ) + t1 = datetime(2026, 2, 1, tzinfo=timezone.utc) + t2 = datetime(2026, 3, 1, tzinfo=timezone.utc) + DiscordMessage.objects.create( + message_id=_uid(), + channel=ch1, + author=author, + content="older", + message_created_at=t1, + ) + DiscordMessage.objects.create( + message_id=_uid(), + channel=ch2, + author=author, + content="newer", + message_created_at=t2, + ) + latest = latest_message_created_at_for_guild( + srv.server_id, channel_ids=[ch1.channel_id] + ) + assert latest == t1 diff --git a/discord_activity_tracker/tests/test_failure_classification.py b/discord_activity_tracker/tests/test_failure_classification.py new file mode 100644 index 0000000..359470e --- /dev/null +++ b/discord_activity_tracker/tests/test_failure_classification.py @@ -0,0 +1,67 @@ +"""Discord-related failure classification for CollectorFailureCategory.""" + +from __future__ import annotations + +from core.errors import CollectorFailureCategory, classify_failure + + +def _make_discord_http_exception(status: int) -> Exception: + cls = type("HTTPException", (Exception,), {}) + cls.__module__ = "discord.errors" + exc = cls() + exc.status = status + return exc + + +def test_discord_http_429_is_rate_limit(): + exc = _make_discord_http_exception(429) + assert classify_failure(exc) is CollectorFailureCategory.RATE_LIMIT + + +def test_discord_http_401_is_auth(): + exc = _make_discord_http_exception(401) + assert classify_failure(exc) is CollectorFailureCategory.AUTH + + +def test_discord_http_403_is_auth(): + exc = _make_discord_http_exception(403) + assert classify_failure(exc) is CollectorFailureCategory.AUTH + + +def test_discord_forbidden_subclass_403_is_auth(): + cls = type("Forbidden", (Exception,), {}) + cls.__module__ = "discord.errors" + exc = cls() + exc.status = 403 + assert classify_failure(exc) is CollectorFailureCategory.AUTH + + +def test_discord_not_found_subclass_404_is_unknown(): + cls = type("NotFound", (Exception,), {}) + cls.__module__ = "discord.errors" + exc = cls() + exc.status = 404 + assert classify_failure(exc) is CollectorFailureCategory.UNKNOWN + + +def test_discord_http_502_is_network(): + exc = _make_discord_http_exception(502) + assert classify_failure(exc) is CollectorFailureCategory.NETWORK + + +def test_discord_http_404_is_unknown(): + exc = _make_discord_http_exception(404) + assert classify_failure(exc) is CollectorFailureCategory.UNKNOWN + + +def test_discord_http_no_status_defaults_network(): + cls = type("HTTPException", (Exception,), {}) + cls.__module__ = "discord.errors" + exc = cls() + assert classify_failure(exc) is CollectorFailureCategory.NETWORK + + +def test_discord_login_failure_is_auth(): + cls = type("LoginFailure", (Exception,), {}) + cls.__module__ = "discord.errors" + assert classify_failure(cls()) is CollectorFailureCategory.AUTH diff --git a/discord_activity_tracker/tests/test_pinecone_runner_coverage.py b/discord_activity_tracker/tests/test_pinecone_runner_coverage.py new file mode 100644 index 0000000..b13a4c5 --- /dev/null +++ b/discord_activity_tracker/tests/test_pinecone_runner_coverage.py @@ -0,0 +1,47 @@ +"""Coverage for pinecone_runner.""" + +from __future__ import annotations + +from unittest.mock import patch + +import pytest + +from discord_activity_tracker.pinecone_runner import task_discord_pinecone_sync + + +def test_task_discord_pinecone_sync_dry_run(): + task_discord_pinecone_sync(dry_run=True) + + +@pytest.mark.django_db +def test_task_discord_pinecone_sync_skips_when_app_type_empty(monkeypatch, settings): + monkeypatch.setattr(settings, "PINECONE_DISCORD_APP_TYPE", "") + monkeypatch.setattr(settings, "PINECONE_DISCORD_NAMESPACE", "ns") + task_discord_pinecone_sync(dry_run=False) + + +@pytest.mark.django_db +def test_task_discord_pinecone_sync_skips_when_namespace_empty(monkeypatch, settings): + monkeypatch.setattr(settings, "PINECONE_DISCORD_APP_TYPE", "app") + monkeypatch.setattr(settings, "PINECONE_DISCORD_NAMESPACE", " ") + task_discord_pinecone_sync(dry_run=False) + + +@pytest.mark.django_db +def test_task_discord_pinecone_sync_calls_run_command(monkeypatch, settings): + monkeypatch.setattr(settings, "PINECONE_DISCORD_APP_TYPE", "discord") + monkeypatch.setattr(settings, "PINECONE_DISCORD_NAMESPACE", "ns") + with patch("discord_activity_tracker.pinecone_runner.call_command") as cc: + task_discord_pinecone_sync(dry_run=False) + cc.assert_called_once() + + +@pytest.mark.django_db +def test_task_discord_pinecone_sync_swallows_call_command_error(monkeypatch, settings): + monkeypatch.setattr(settings, "PINECONE_DISCORD_APP_TYPE", "discord") + monkeypatch.setattr(settings, "PINECONE_DISCORD_NAMESPACE", "ns") + with patch( + "discord_activity_tracker.pinecone_runner.call_command", + side_effect=RuntimeError("no command"), + ): + task_discord_pinecone_sync(dry_run=False) diff --git a/discord_activity_tracker/tests/test_preprocessor_extra.py b/discord_activity_tracker/tests/test_preprocessor_extra.py new file mode 100644 index 0000000..54526c6 --- /dev/null +++ b/discord_activity_tracker/tests/test_preprocessor_extra.py @@ -0,0 +1,159 @@ +"""Extra coverage for preprocessor reply chains and edge paths.""" + +from __future__ import annotations + +import uuid +from datetime import timedelta +from unittest.mock import patch + +import pytest +from django.utils import timezone as django_timezone + +from cppa_user_tracker.models import DiscordProfile +from discord_activity_tracker.models import ( + DiscordChannel, + DiscordMessage, + DiscordServer, +) +from discord_activity_tracker.preprocessor import ( + _build_reply_chains, + _chain_to_document, + preprocess_discord_for_pinecone, +) + + +def _uid() -> int: + return uuid.uuid4().int % (2**50) + + +@pytest.mark.django_db +def test_build_reply_chains_skips_reply_having_parent_in_batch(): + srv = DiscordServer.objects.create(server_id=_uid(), server_name="G", icon_url="") + ch = DiscordChannel.objects.create( + server=srv, channel_id=_uid(), channel_name="c", channel_type="text" + ) + author = DiscordProfile.objects.create( + discord_user_id=_uid(), + username="u", + display_name="U", + avatar_url="", + is_bot=False, + ) + root = DiscordMessage.objects.create( + message_id=_uid(), + channel=ch, + author=author, + content="root msg here long enough for any downstream checks", + message_created_at=django_timezone.now(), + ) + reply = DiscordMessage.objects.create( + message_id=_uid(), + channel=ch, + author=author, + content="reply text here long enough for any downstream checks", + message_created_at=django_timezone.now(), + reply_to_message_id=root.message_id, + ) + chains = _build_reply_chains([root, reply]) + assert len(chains) == 1 + assert {m.message_id for m in chains[0]} == {root.message_id, reply.message_id} + + +@pytest.mark.django_db +def test_build_reply_chains_orphan_at_end(): + srv = DiscordServer.objects.create(server_id=_uid(), server_name="G", icon_url="") + ch = DiscordChannel.objects.create( + server=srv, channel_id=_uid(), channel_name="c", channel_type="text" + ) + author = DiscordProfile.objects.create( + discord_user_id=_uid(), + username="u", + display_name="U", + avatar_url="", + is_bot=False, + ) + orphan = DiscordMessage.objects.create( + message_id=_uid(), + channel=ch, + author=author, + content="orphan reply text here long enough for downstream checks", + message_created_at=django_timezone.now(), + reply_to_message_id=999999999999, + ) + chains = _build_reply_chains([orphan]) + assert len(chains) == 1 + assert chains[0] == [orphan] + + +@pytest.mark.django_db +def test_chain_to_document_long_content_returns_document(): + srv = DiscordServer.objects.create(server_id=_uid(), server_name="G", icon_url="") + ch = DiscordChannel.objects.create( + server=srv, channel_id=_uid(), channel_name="c", channel_type="text" + ) + author = DiscordProfile.objects.create( + discord_user_id=_uid(), + username="u", + display_name="U", + avatar_url="", + is_bot=False, + ) + root = DiscordMessage.objects.create( + message_id=_uid(), + channel=ch, + author=author, + content="x" * 80, + message_created_at=django_timezone.now(), + ) + assert _chain_to_document([root]) is not None + + +@pytest.mark.django_db +def test_preprocess_discord_duplicate_doc_ids_skipped_second(): + srv = DiscordServer.objects.create(server_id=_uid(), server_name="G", icon_url="") + ch = DiscordChannel.objects.create( + server=srv, channel_id=_uid(), channel_name="c", channel_type="text" + ) + author = DiscordProfile.objects.create( + discord_user_id=_uid(), + username="u", + display_name="U", + avatar_url="", + is_bot=False, + ) + DiscordMessage.objects.create( + message_id=_uid(), + channel=ch, + author=author, + content="y" * 80, + message_created_at=django_timezone.now(), + ) + DiscordMessage.objects.create( + message_id=_uid(), + channel=ch, + author=author, + content="z" * 80, + message_created_at=django_timezone.now(), + ) + + doc = { + "content": "a" * 80, + "metadata": {"doc_id": "same", "type": "discord"}, + } + + with patch( + "discord_activity_tracker.preprocessor._chain_to_document", + return_value=doc, + ): + docs, _ = preprocess_discord_for_pinecone([], None) + + assert len(docs) == 1 + + +@pytest.mark.django_db +def test_preprocess_discord_nothing_to_sync_logs(caplog): + caplog.set_level("INFO") + future = django_timezone.now() + timedelta(days=3650) + docs, _ = preprocess_discord_for_pinecone([], future) + assert docs == [] + assert "nothing to sync" in caplog.text.lower() diff --git a/discord_activity_tracker/tests/test_run_command_coverage.py b/discord_activity_tracker/tests/test_run_command_coverage.py new file mode 100644 index 0000000..755a109 --- /dev/null +++ b/discord_activity_tracker/tests/test_run_command_coverage.py @@ -0,0 +1,298 @@ +"""Coverage for run_discord_activity_tracker command _handle_core and helpers.""" + +from __future__ import annotations + +import asyncio +from io import StringIO +from unittest.mock import MagicMock, patch + +import pytest +from django.core.management.base import CommandError + +from discord_activity_tracker.management.commands.run_discord_activity_tracker import ( + Command, + DiscordActivityCollector, + _resolve_exporter_date_bounds, + task_preprocess_workspace, +) + + +def _cmd_collector(**opts): + defaults = { + "dry_run": False, + "skip_discord_sync": False, + "skip_markdown_export": False, + "skip_remote_push": False, + "skip_pinecone": False, + "channels": "", + "since": None, + "until": None, + "task": None, + } + defaults.update(opts) + cmd = Command() + cmd.stdout = StringIO() + cmd.style = MagicMock() + cmd.style.WARNING = lambda x: x + cmd.style.SUCCESS = lambda x: x + collector = DiscordActivityCollector(cmd=cmd, options=defaults) + return cmd, collector + + +@pytest.mark.django_db +def test_resolve_bounds_since_after_until_resets(monkeypatch, caplog): + """since > until logs warning and falls back so bounds are recomputed.""" + caplog.set_level("WARNING") + after, before = _resolve_exporter_date_bounds( + {"since": "2026-06-10", "until": "2026-06-01"}, + guild_snowflake=1, + channel_ids=[], + ) + assert before is None + assert after is None + assert "invalid date range" in caplog.text + + +def test_resolve_bounds_bad_since_raises_command_error(): + with pytest.raises(CommandError): + _resolve_exporter_date_bounds( + {"since": "not-a-date", "until": None}, + guild_snowflake=1, + channel_ids=[], + ) + + +@pytest.mark.django_db +def test_handle_core_dry_run_all_branches(monkeypatch, settings): + monkeypatch.setattr(settings, "DISCORD_USER_TOKEN", "tok") + monkeypatch.setattr(settings, "DISCORD_SERVER_ID", 9001) + _, collector = _cmd_collector( + dry_run=True, + skip_discord_sync=False, + skip_markdown_export=False, + skip_remote_push=False, + skip_pinecone=False, + since="2026-01-01", + until="2026-01-31", + ) + with patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.task_preprocess_workspace" + ) as tp: + collector.cmd._handle_core(collector.options, collector) + tp.assert_called_once_with(dry_run=True) + out = collector.stdout.getvalue() + assert "DRY RUN" in out + assert "Lower bound" in out + assert "Upper bound" in out + + +@pytest.mark.django_db +def test_handle_core_dry_run_skip_sync_only(monkeypatch, settings): + monkeypatch.setattr(settings, "DISCORD_USER_TOKEN", "tok") + monkeypatch.setattr(settings, "DISCORD_SERVER_ID", 9002) + _, collector = _cmd_collector( + dry_run=True, + skip_discord_sync=True, + skip_markdown_export=True, + skip_remote_push=True, + skip_pinecone=True, + ) + with patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.task_preprocess_workspace" + ): + collector.cmd._handle_core(collector.options, collector) + out = collector.stdout.getvalue() + assert "full history" in out or "none" in out.lower() + + +@pytest.mark.django_db +def test_handle_core_task_sync_skips_markdown(monkeypatch, settings): + monkeypatch.setattr(settings, "DISCORD_USER_TOKEN", "tok") + monkeypatch.setattr(settings, "DISCORD_SERVER_ID", 9003) + _, collector = _cmd_collector(dry_run=False, task="sync") + with ( + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.task_discord_sync" + ) as ts, + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.task_markdown_export_and_push" + ) as tm, + ): + collector.cmd._handle_core(collector.options, collector) + ts.assert_called_once() + tm.assert_called_once() + assert collector.options["skip_markdown_export"] is True + assert collector.options["skip_remote_push"] is True + + +@pytest.mark.django_db +def test_handle_core_task_export_skips_sync(monkeypatch, settings): + monkeypatch.setattr(settings, "DISCORD_USER_TOKEN", "tok") + monkeypatch.setattr(settings, "DISCORD_SERVER_ID", 9004) + _, collector = _cmd_collector(dry_run=False, task="export") + with ( + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.task_discord_sync" + ) as ts, + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.task_markdown_export_and_push" + ) as tm, + ): + collector.cmd._handle_core(collector.options, collector) + ts.assert_called_once() + tm.assert_called_once() + assert collector.options["skip_discord_sync"] is True + assert collector.options["skip_pinecone"] is True + + +@pytest.mark.django_db +def test_handle_core_non_dry_calls_sync_and_markdown(monkeypatch, settings): + monkeypatch.setattr(settings, "DISCORD_USER_TOKEN", "tok") + monkeypatch.setattr(settings, "DISCORD_SERVER_ID", 9005) + _, collector = _cmd_collector(dry_run=False) + with ( + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.task_discord_sync" + ) as ts, + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.task_markdown_export_and_push" + ) as tm, + ): + collector.cmd._handle_core(collector.options, collector) + ts.assert_called_once() + tm.assert_called_once() + + +@pytest.mark.django_db +def test_handle_core_skip_pinecone_logs(monkeypatch, settings, caplog): + caplog.set_level("INFO") + monkeypatch.setattr(settings, "DISCORD_USER_TOKEN", "tok") + monkeypatch.setattr(settings, "DISCORD_SERVER_ID", 9006) + _, collector = _cmd_collector(dry_run=False, skip_pinecone=True) + with ( + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.task_discord_sync" + ) as ts, + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.task_markdown_export_and_push" + ) as tm, + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.task_discord_pinecone_sync" + ) as tp, + ): + collector.cmd._handle_core(collector.options, collector) + collector.sync_pinecone() + ts.assert_called_once() + tm.assert_called_once() + tp.assert_not_called() + assert "skipping Pinecone (--skip-pinecone)" in caplog.text + + +@pytest.mark.django_db +def test_handle_core_propagates_task_failure(monkeypatch, settings): + monkeypatch.setattr(settings, "DISCORD_USER_TOKEN", "tok") + monkeypatch.setattr(settings, "DISCORD_SERVER_ID", 9007) + _, collector = _cmd_collector(dry_run=False) + with patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.task_discord_sync", + side_effect=RuntimeError("fail"), + ): + with pytest.raises(RuntimeError, match="fail"): + collector.cmd._handle_core(collector.options, collector) + + +def test_get_collector_normalizes_skip_pinecone_none(): + cmd = Command() + cmd.stdout = StringIO() + cmd.style = MagicMock() + c = cmd.get_collector( + dry_run=False, + skip_discord_sync=False, + skip_markdown_export=False, + skip_remote_push=False, + skip_pinecone=None, + ) + assert c.options.get("skip_pinecone") is False + + +@pytest.mark.django_db +def test_task_preprocess_workspace_dry_run(tmp_path, settings): + settings.WORKSPACE_DIR = tmp_path / "ws" + settings.WORKSPACE_DIR.mkdir(parents=True) + task_preprocess_workspace(dry_run=True) + + +def test_resolve_bounds_since_naive_becomes_utc(): + after, before = _resolve_exporter_date_bounds( + {"since": "2026-04-01T00:00:00", "until": None}, + guild_snowflake=1, + channel_ids=[], + ) + assert after is not None + assert after.tzinfo is not None + assert before is None + + +@pytest.mark.django_db +def test_handle_core_task_all_runs_both_phases(monkeypatch, settings): + monkeypatch.setattr(settings, "DISCORD_USER_TOKEN", "tok") + monkeypatch.setattr(settings, "DISCORD_SERVER_ID", 9008) + _, collector = _cmd_collector(dry_run=False, task="all") + with ( + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.task_discord_sync" + ) as ts, + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.task_markdown_export_and_push" + ) as tm, + ): + collector.cmd._handle_core(collector.options, collector) + ts.assert_called_once() + tm.assert_called_once() + + +@pytest.mark.django_db +def test_handle_core_wraps_discord_exporter_error(monkeypatch, settings): + monkeypatch.setattr(settings, "DISCORD_USER_TOKEN", "tok") + monkeypatch.setattr(settings, "DISCORD_SERVER_ID", 9009) + _, collector = _cmd_collector(dry_run=False) + with patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.task_discord_sync", + side_effect=CommandError("DiscordChatExporter failed: cli missing"), + ): + with pytest.raises(CommandError, match="DiscordChatExporter"): + collector.cmd._handle_core(collector.options, collector) + + +@pytest.mark.django_db +def test_persist_channel_inserts_messages(monkeypatch, settings, tmp_path): + settings.WORKSPACE_DIR = tmp_path / "ws" + settings.WORKSPACE_DIR.mkdir(parents=True) + gid, cid = 330011, 330022 + guild_info = {"id": gid, "name": "Guild", "iconUrl": ""} + channel_info = { + "id": cid, + "name": "chan", + "type": "GuildTextChat", + "topic": "", + "category": "", + "categoryId": None, + } + messages = [ + { + "id": str(10**12 + 7), + "type": "Default", + "isPinned": False, + "timestamp": "2026-01-15T12:00:00Z", + "content": "hello world example text long enough for validation", + "author": {"id": "1082347485026070548", "name": "user"}, + "attachments": [], + "reactions": [], + } + ] + cmd = MagicMock() + cmd.stdout = StringIO() + cmd.style = MagicMock() + collector = DiscordActivityCollector(cmd=cmd, options={}) + count = asyncio.run(collector._persist_channel(guild_info, channel_info, messages)) + assert count >= 1 diff --git a/discord_activity_tracker/tests/test_staging_schema_extra.py b/discord_activity_tracker/tests/test_staging_schema_extra.py new file mode 100644 index 0000000..566b699 --- /dev/null +++ b/discord_activity_tracker/tests/test_staging_schema_extra.py @@ -0,0 +1,45 @@ +"""Extra coverage for staging_schema.""" + +from __future__ import annotations + +import pytest + +from discord_activity_tracker.staging_schema import ( + build_staging_json_schema_bundle, + validate_normalized_message, + write_staging_json_schema, +) + + +def test_build_staging_json_schema_bundle_has_models(): + bundle = build_staging_json_schema_bundle() + assert bundle["title"] == "discord_staging_v1" + assert "discord_chat_exporter_envelope" in bundle + assert "normalized_discord_message" in bundle + + +def test_write_staging_json_schema_writes_file(tmp_path): + p = tmp_path / "out.json" + out = write_staging_json_schema(p) + assert out == p + assert p.read_text(encoding="utf-8").startswith("{") + + +@pytest.mark.django_db +def test_validate_normalized_blank_edited_at_becomes_none(): + from discord_activity_tracker.sync.chat_exporter import ( + convert_exporter_message_to_dict, + ) + + raw = { + "id": "1", + "timestamp": "2026-01-01T00:00:00Z", + "timestampEdited": " ", + "content": "hello world example text long enough", + "author": {"id": "1", "name": "a"}, + "attachments": [], + "reactions": [], + } + d = convert_exporter_message_to_dict(raw, server_id=1, channel_id=2) + m = validate_normalized_message(d, source="t") + assert m.edited_at is None diff --git a/discord_activity_tracker/tests/test_sync_chat_exporter.py b/discord_activity_tracker/tests/test_sync_chat_exporter.py index 584fc9f..8681bca 100644 --- a/discord_activity_tracker/tests/test_sync_chat_exporter.py +++ b/discord_activity_tracker/tests/test_sync_chat_exporter.py @@ -678,3 +678,21 @@ def test_export_and_parse_returns_channels(tmp_path): assert len(rows) == 1 assert rows[0]["guild"] == {"id": "g"} assert rows[0]["file_path"] == ok + + +def test_convert_exporter_message_with_embeds_key_ignored(): + raw = { + "id": "1", + "timestamp": "2026-01-01T00:00:00Z", + "content": "body", + "author": {"id": "1", "name": "a"}, + "attachments": [], + "reactions": [], + "embeds": [{"title": "E", "description": "d"}], + } + out = convert_exporter_message_to_dict(raw, server_id=1, channel_id=2) + assert out["content"] == "body" + assert "embeds" not in out + from discord_activity_tracker.staging_schema import validate_normalized_message + + validate_normalized_message(out, source="embed-test") diff --git a/discord_activity_tracker/tests/test_sync_client.py b/discord_activity_tracker/tests/test_sync_client.py index 5f9617e..e2f5a5a 100644 --- a/discord_activity_tracker/tests/test_sync_client.py +++ b/discord_activity_tracker/tests/test_sync_client.py @@ -421,3 +421,69 @@ def test_message_to_dict_reply_and_pinned(mock_discord_pkg): d = discord_message_to_sync_dict(msg) assert d["message_type"] == "Reply" assert d["is_pinned"] is True + + +def test_fetch_messages_since_http_429_returns_empty(mock_discord_pkg): + m, _ = mock_discord_pkg + exc = m.HTTPException() + exc.status = 429 + + async def hist(*_a, **_k): + if False: + yield None + raise exc + + ch = MagicMock() + ch.name = "c" + ch.history = hist + + c = DiscordSyncClient("tok") + c._ready = True + + async def run(): + return await c.fetch_messages_since(ch, after=None, limit=None) + + assert asyncio.run(run()) == [] + + +def test_message_type_label_from_typed_message_type(mock_discord_pkg): + m, _ = mock_discord_pkg + MT = type("MessageType", (), {}) + m.MessageType = MT + mt = MT() + mt.name = "thread_created" + from discord_activity_tracker.sync.client import _message_type_label + + assert _message_type_label(mt) == "ThreadCreated" + + +def test_discord_sync_client_message_to_dict_delegates(mock_discord_pkg): + c = DiscordSyncClient("tok") + msg = MagicMock() + msg.id = 1 + msg.content = "x" + msg.author = SimpleNamespace( + id=1, name="a", display_name="a", bot=False, avatar=None + ) + msg.created_at = datetime(2026, 1, 1, tzinfo=timezone.utc) + msg.edited_at = None + msg.reference = None + msg.attachments = [] + msg.reactions = [] + msg.type = SimpleNamespace(name="default") + msg.pinned = False + d = c._message_to_dict(msg) + assert d["id"] == 1 + + +def test_shutdown_sync_logs_when_close_raises(mock_discord_pkg, caplog): + import logging + + caplog.set_level(logging.ERROR) + _, inner = mock_discord_pkg + inner.close = AsyncMock(side_effect=RuntimeError("close failed")) + c = DiscordSyncClient("tok") + c._ready = True + c._asyncio_loop = asyncio.new_event_loop() + c.shutdown_sync() + assert "Error while closing" in caplog.text diff --git a/discord_activity_tracker/tests/test_task_discord_sync_coverage.py b/discord_activity_tracker/tests/test_task_discord_sync_coverage.py new file mode 100644 index 0000000..585ff8b --- /dev/null +++ b/discord_activity_tracker/tests/test_task_discord_sync_coverage.py @@ -0,0 +1,436 @@ +"""Coverage for task_discord_sync (fetch → persist → raw archive).""" + +from __future__ import annotations + +import json +import secrets +from datetime import datetime, timezone +from io import StringIO +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from discord_activity_tracker.management.commands.run_discord_activity_tracker import ( + DiscordActivityCollector, + task_discord_sync, +) + + +def _phony_token() -> str: + return secrets.token_hex(16) + + +def _minimal_envelope(guild_id: int, channel_id: int): + msg = { + "id": str(10**12 + guild_id + channel_id), + "type": "Default", + "isPinned": False, + "timestamp": "2026-01-15T12:00:00Z", + "content": "hello world example text long enough for validation", + "author": {"id": "1082347485026070548", "name": "user"}, + "attachments": [], + "reactions": [], + } + return { + "guild": {"id": str(guild_id), "name": "G"}, + "channel": {"id": str(channel_id), "name": "c", "type": "GuildTextChat"}, + "messages": [msg], + } + + +@pytest.mark.django_db +def test_task_discord_sync_skip_returns_early(settings): + tok = _phony_token() + settings.DISCORD_USER_TOKEN = tok + cmd = MagicMock() + cmd.stdout = StringIO() + cmd.style = MagicMock() + cmd.style.SUCCESS = lambda x: x + collector = DiscordActivityCollector(cmd=cmd, options={}) + task_discord_sync( + dry_run=False, + skip_discord_sync=True, + user_token=tok, + guild_id=1, + channel_ids=[], + after_date=None, + before_date=None, + collector=collector, + ) + + +@pytest.mark.django_db +def test_task_discord_sync_dry_run_returns_early(settings): + tok = _phony_token() + settings.DISCORD_USER_TOKEN = tok + cmd = MagicMock() + cmd.stdout = StringIO() + collector = DiscordActivityCollector(cmd=cmd, options={}) + task_discord_sync( + dry_run=True, + skip_discord_sync=False, + user_token=tok, + guild_id=1, + channel_ids=[], + after_date=None, + before_date=None, + collector=collector, + ) + + +@pytest.mark.django_db +def test_task_discord_sync_happy_path_rename_raw(settings, tmp_path, monkeypatch): + settings.WORKSPACE_DIR = tmp_path / "ws" + settings.WORKSPACE_DIR.mkdir(parents=True) + tok = _phony_token() + settings.DISCORD_USER_TOKEN = tok + + gid, cid = 880011, 880022 + staging = tmp_path / "staging" + staging.mkdir() + raw_ch = tmp_path / "raw" / str(gid) / str(cid) + raw_ch.mkdir(parents=True) + + jpath = staging / "c.json" + jpath.write_text(json.dumps(_minimal_envelope(gid, cid)), encoding="utf-8") + + def fake_export(**_kwargs): + return [jpath] + + cmd = MagicMock() + cmd.stdout = StringIO() + cmd.style = MagicMock() + cmd.style.SUCCESS = lambda x: x + collector = DiscordActivityCollector(cmd=cmd, options={}) + collector._persist_channel = AsyncMock(return_value=1) + + with ( + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.export_guild_to_json", + side_effect=fake_export, + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.get_exporter_staging_dir", + return_value=staging, + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.clear_exporter_staging_dir", + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.get_channel_raw_dir", + return_value=raw_ch, + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.get_raw_dir", + return_value=tmp_path / "raw", + ), + ): + task_discord_sync( + dry_run=False, + skip_discord_sync=False, + user_token=tok, + guild_id=gid, + channel_ids=[], + after_date=datetime(2026, 1, 1, tzinfo=timezone.utc), + before_date=None, + collector=collector, + ) + + dest = raw_ch / "2026-01-01.json" + assert dest.is_file() + + +@pytest.mark.django_db +def test_task_discord_sync_skips_channel_not_in_allowlist(settings, tmp_path): + settings.WORKSPACE_DIR = tmp_path / "ws" + settings.WORKSPACE_DIR.mkdir(parents=True) + tok = _phony_token() + + gid, cid = 770011, 770022 + staging = tmp_path / "st2" + staging.mkdir() + jpath = staging / "x.json" + jpath.write_text(json.dumps(_minimal_envelope(gid, cid)), encoding="utf-8") + + cmd = MagicMock() + cmd.stdout = StringIO() + cmd.style = MagicMock() + cmd.style.SUCCESS = lambda x: x + collector = DiscordActivityCollector(cmd=cmd, options={}) + collector._persist_channel = AsyncMock(return_value=0) + + with ( + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.export_guild_to_json", + return_value=[jpath], + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.get_exporter_staging_dir", + return_value=staging, + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.clear_exporter_staging_dir", + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.get_channel_raw_dir", + return_value=tmp_path / "raw" / str(gid) / str(cid), + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.get_raw_dir", + return_value=tmp_path / "raw", + ), + ): + task_discord_sync( + dry_run=False, + skip_discord_sync=False, + user_token=tok, + guild_id=gid, + channel_ids=[999999], + after_date=None, + before_date=None, + collector=collector, + ) + + assert not jpath.exists() + + +@pytest.mark.django_db +def test_task_discord_sync_staging_validation_error_keeps_file( + settings, tmp_path, monkeypatch +): + settings.WORKSPACE_DIR = tmp_path / "ws" + settings.WORKSPACE_DIR.mkdir(parents=True) + tok = _phony_token() + gid, cid = 660011, 660022 + staging = tmp_path / "st3" + staging.mkdir() + jpath = staging / "bad.json" + jpath.write_text( + json.dumps({"guild": {}, "channel": {}, "messages": "bad"}), encoding="utf-8" + ) + + cmd = MagicMock() + cmd.stdout = StringIO() + cmd.style = MagicMock() + cmd.style.SUCCESS = lambda x: x + collector = DiscordActivityCollector(cmd=cmd, options={}) + + with ( + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.export_guild_to_json", + return_value=[jpath], + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.get_exporter_staging_dir", + return_value=staging, + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.clear_exporter_staging_dir", + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.get_channel_raw_dir", + return_value=tmp_path / "raw" / str(gid) / str(cid), + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.get_raw_dir", + return_value=tmp_path / "raw", + ), + ): + task_discord_sync( + dry_run=False, + skip_discord_sync=False, + user_token=tok, + guild_id=gid, + channel_ids=[], + after_date=None, + before_date=None, + collector=collector, + ) + assert jpath.is_file() + + +@pytest.mark.django_db +def test_task_discord_sync_value_error_unlinks(settings, tmp_path): + settings.WORKSPACE_DIR = tmp_path / "ws" + settings.WORKSPACE_DIR.mkdir(parents=True) + tok = _phony_token() + gid, cid = 550011, 550022 + staging = tmp_path / "st4" + staging.mkdir() + jpath = staging / "v.json" + jpath.write_text("{", encoding="utf-8") + + cmd = MagicMock() + cmd.stdout = StringIO() + cmd.style = MagicMock() + cmd.style.SUCCESS = lambda x: x + collector = DiscordActivityCollector(cmd=cmd, options={}) + + with ( + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.export_guild_to_json", + return_value=[jpath], + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.get_exporter_staging_dir", + return_value=staging, + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.clear_exporter_staging_dir", + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.get_channel_raw_dir", + return_value=tmp_path / "raw" / str(gid) / str(cid), + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.get_raw_dir", + return_value=tmp_path / "raw", + ), + ): + task_discord_sync( + dry_run=False, + skip_discord_sync=False, + user_token=tok, + guild_id=gid, + channel_ids=[], + after_date=None, + before_date=None, + collector=collector, + ) + assert not jpath.exists() + + +@pytest.mark.django_db +def test_task_discord_sync_exporter_error_becomes_command_error(settings, tmp_path): + from django.core.management.base import CommandError + + from discord_activity_tracker.sync.chat_exporter import DiscordChatExporterError + + settings.WORKSPACE_DIR = tmp_path / "ws" + settings.WORKSPACE_DIR.mkdir(parents=True) + tok = _phony_token() + cmd = MagicMock() + cmd.stdout = StringIO() + cmd.style = MagicMock() + cmd.style.SUCCESS = lambda x: x + collector = DiscordActivityCollector(cmd=cmd, options={}) + with patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.export_guild_to_json", + side_effect=DiscordChatExporterError("boom"), + ): + with pytest.raises(CommandError, match="DiscordChatExporter"): + task_discord_sync( + dry_run=False, + skip_discord_sync=False, + user_token=tok, + guild_id=1, + channel_ids=[], + after_date=None, + before_date=None, + collector=collector, + ) + + +@pytest.mark.django_db +def test_task_discord_sync_persist_raises_unlinks(settings, tmp_path): + settings.WORKSPACE_DIR = tmp_path / "ws" + settings.WORKSPACE_DIR.mkdir(parents=True) + tok = _phony_token() + gid, cid = 440011, 440022 + staging = tmp_path / "st5" + staging.mkdir() + jpath = staging / "ok.json" + jpath.write_text(json.dumps(_minimal_envelope(gid, cid)), encoding="utf-8") + + cmd = MagicMock() + cmd.stdout = StringIO() + cmd.style = MagicMock() + cmd.style.SUCCESS = lambda x: x + collector = DiscordActivityCollector(cmd=cmd, options={}) + collector._persist_channel = AsyncMock(side_effect=RuntimeError("db")) + + with ( + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.export_guild_to_json", + return_value=[jpath], + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.get_exporter_staging_dir", + return_value=staging, + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.clear_exporter_staging_dir", + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.get_channel_raw_dir", + return_value=tmp_path / "raw" / str(gid) / str(cid), + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.get_raw_dir", + return_value=tmp_path / "raw", + ), + ): + task_discord_sync( + dry_run=False, + skip_discord_sync=False, + user_token=tok, + guild_id=gid, + channel_ids=[], + after_date=None, + before_date=None, + collector=collector, + ) + assert not jpath.exists() + + +@pytest.mark.django_db +def test_task_discord_sync_stdout_includes_before_date(settings, tmp_path): + settings.WORKSPACE_DIR = tmp_path / "ws" + settings.WORKSPACE_DIR.mkdir(parents=True) + tok = _phony_token() + gid, cid = 410011, 410022 + staging = tmp_path / "st6" + staging.mkdir() + jpath = staging / "bd.json" + jpath.write_text(json.dumps(_minimal_envelope(gid, cid)), encoding="utf-8") + + cmd = MagicMock() + cmd.stdout = StringIO() + cmd.style = MagicMock() + cmd.style.SUCCESS = lambda x: x + collector = DiscordActivityCollector(cmd=cmd, options={}) + collector._persist_channel = AsyncMock(return_value=0) + before = datetime(2026, 12, 31, tzinfo=timezone.utc) + with ( + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.export_guild_to_json", + return_value=[jpath], + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.get_exporter_staging_dir", + return_value=staging, + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.clear_exporter_staging_dir", + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.get_channel_raw_dir", + return_value=tmp_path / "raw" / str(gid) / str(cid), + ), + patch( + "discord_activity_tracker.management.commands.run_discord_activity_tracker.get_raw_dir", + return_value=tmp_path / "raw", + ), + ): + task_discord_sync( + dry_run=False, + skip_discord_sync=False, + user_token=tok, + guild_id=gid, + channel_ids=[], + after_date=None, + before_date=before, + collector=collector, + ) + out = cmd.stdout.getvalue() + assert "Upper bound" in out diff --git a/discord_activity_tracker/tests/test_task_markdown_coverage.py b/discord_activity_tracker/tests/test_task_markdown_coverage.py new file mode 100644 index 0000000..60600b7 --- /dev/null +++ b/discord_activity_tracker/tests/test_task_markdown_coverage.py @@ -0,0 +1,163 @@ +"""Coverage for task_markdown_export_and_push.""" + +from __future__ import annotations + +from io import StringIO +from unittest.mock import MagicMock, patch + +import pytest + +from discord_activity_tracker.management.commands.run_discord_activity_tracker import ( + DiscordActivityCollector, + task_markdown_export_and_push, +) +from discord_activity_tracker.models import DiscordServer + + +@pytest.mark.django_db +def test_task_markdown_skip_export(): + cmd = MagicMock() + cmd.stdout = StringIO() + cmd.style = MagicMock() + cmd.style.WARNING = lambda x: x + cmd.style.SUCCESS = lambda x: x + collector = DiscordActivityCollector(cmd=cmd, options={}) + task_markdown_export_and_push( + dry_run=False, + skip_markdown_export=True, + skip_remote_push=False, + guild_id=1, + collector=collector, + ) + + +@pytest.mark.django_db +def test_task_markdown_no_context_path(monkeypatch, tmp_path, settings): + monkeypatch.setattr(settings, "DISCORD_CONTEXT_REPO_PATH", "") + cmd = MagicMock() + cmd.stdout = StringIO() + cmd.style = MagicMock() + cmd.style.WARNING = lambda x: x + collector = DiscordActivityCollector(cmd=cmd, options={}) + task_markdown_export_and_push( + dry_run=False, + skip_markdown_export=False, + skip_remote_push=False, + guild_id=1, + collector=collector, + ) + + +@pytest.mark.django_db +def test_task_markdown_dry_run(tmp_path, settings): + p = tmp_path / "ctx" + p.mkdir() + settings.DISCORD_CONTEXT_REPO_PATH = str(p) + cmd = MagicMock() + cmd.stdout = StringIO() + cmd.style = MagicMock() + cmd.style.WARNING = lambda x: x + collector = DiscordActivityCollector(cmd=cmd, options={}) + task_markdown_export_and_push( + dry_run=True, + skip_markdown_export=False, + skip_remote_push=False, + guild_id=1, + collector=collector, + ) + assert "ctx" in cmd.stdout.getvalue() or "dry-run" in cmd.stdout.getvalue().lower() + + +@pytest.mark.django_db +def test_task_markdown_server_not_in_db(tmp_path, settings): + settings.DISCORD_CONTEXT_REPO_PATH = str(tmp_path / "ctx") + (tmp_path / "ctx").mkdir() + cmd = MagicMock() + cmd.stdout = StringIO() + cmd.style = MagicMock() + cmd.style.WARNING = lambda x: x + cmd.style.SUCCESS = lambda x: x + collector = DiscordActivityCollector(cmd=cmd, options={}) + task_markdown_export_and_push( + dry_run=False, + skip_markdown_export=False, + skip_remote_push=True, + guild_id=999888777666, + collector=collector, + ) + assert "not in DB" in cmd.stdout.getvalue() or "Server" in cmd.stdout.getvalue() + + +@pytest.mark.django_db +def test_task_markdown_export_success(tmp_path, settings): + settings.DISCORD_CONTEXT_REPO_PATH = str(tmp_path / "ctx") + (tmp_path / "ctx").mkdir() + srv = DiscordServer.objects.create(server_id=424242, server_name="S", icon_url="") + cmd = MagicMock() + cmd.stdout = StringIO() + cmd.style = MagicMock() + cmd.style.WARNING = lambda x: x + cmd.style.SUCCESS = lambda x: x + collector = DiscordActivityCollector(cmd=cmd, options={}) + with patch( + "discord_activity_tracker.sync.export.export_and_push", + return_value=True, + ): + task_markdown_export_and_push( + dry_run=False, + skip_markdown_export=False, + skip_remote_push=True, + guild_id=srv.server_id, + collector=collector, + ) + assert "Exported" in cmd.stdout.getvalue() + + +@pytest.mark.django_db +def test_task_markdown_export_warns_on_false(tmp_path, settings): + settings.DISCORD_CONTEXT_REPO_PATH = str(tmp_path / "ctx") + (tmp_path / "ctx").mkdir() + srv = DiscordServer.objects.create(server_id=424243, server_name="S2", icon_url="") + cmd = MagicMock() + cmd.stdout = StringIO() + cmd.style = MagicMock() + cmd.style.WARNING = lambda x: x + cmd.style.SUCCESS = lambda x: x + collector = DiscordActivityCollector(cmd=cmd, options={}) + with patch( + "discord_activity_tracker.sync.export.export_and_push", + return_value=False, + ): + task_markdown_export_and_push( + dry_run=False, + skip_markdown_export=False, + skip_remote_push=True, + guild_id=srv.server_id, + collector=collector, + ) + assert "No markdown" in cmd.stdout.getvalue() + + +@pytest.mark.django_db +def test_task_markdown_auto_commit_path(tmp_path, settings): + settings.DISCORD_CONTEXT_REPO_PATH = str(tmp_path / "ctx") + (tmp_path / "ctx").mkdir() + settings.DISCORD_CONTEXT_AUTO_COMMIT = True + srv = DiscordServer.objects.create(server_id=424245, server_name="S4", icon_url="") + cmd = MagicMock() + cmd.stdout = StringIO() + cmd.style = MagicMock() + cmd.style.WARNING = lambda x: x + cmd.style.SUCCESS = lambda x: x + collector = DiscordActivityCollector(cmd=cmd, options={}) + with patch( + "discord_activity_tracker.sync.export.export_and_push", + return_value=True, + ): + task_markdown_export_and_push( + dry_run=False, + skip_markdown_export=False, + skip_remote_push=False, + guild_id=srv.server_id, + collector=collector, + ) diff --git a/discord_activity_tracker/tests/test_workspace_clear_staging.py b/discord_activity_tracker/tests/test_workspace_clear_staging.py new file mode 100644 index 0000000..1a2914f --- /dev/null +++ b/discord_activity_tracker/tests/test_workspace_clear_staging.py @@ -0,0 +1,23 @@ +"""Coverage for workspace.clear_exporter_staging_dir.""" + +from __future__ import annotations + +import pytest + +from discord_activity_tracker.workspace import ( + clear_exporter_staging_dir, + get_exporter_staging_dir, +) + + +@pytest.mark.django_db +def test_clear_exporter_staging_dir_removes_children(tmp_path, settings): + settings.WORKSPACE_DIR = tmp_path / "ws" + settings.WORKSPACE_DIR.mkdir(parents=True) + staging = get_exporter_staging_dir() + (staging / "a.json").write_text("{}", encoding="utf-8") + sub = staging / "sub" + sub.mkdir() + (sub / "x.txt").write_text("x", encoding="utf-8") + clear_exporter_staging_dir() + assert list(staging.iterdir()) == [] diff --git a/discord_activity_tracker/tests/test_write_staging_json_schema_script.py b/discord_activity_tracker/tests/test_write_staging_json_schema_script.py new file mode 100644 index 0000000..724f03e --- /dev/null +++ b/discord_activity_tracker/tests/test_write_staging_json_schema_script.py @@ -0,0 +1,15 @@ +"""Coverage for write_staging_json_schema script entrypoint.""" + +from __future__ import annotations + +from unittest.mock import patch + +import discord_activity_tracker.scripts.write_staging_json_schema as mod + + +def test_write_staging_json_schema_main_prints_path(tmp_path, capsys): + target = tmp_path / "schema.json" + with patch.object(mod, "write_staging_json_schema", return_value=target): + mod.main() + out = capsys.readouterr().out.strip() + assert str(target) in out diff --git a/docs/Service_API.md b/docs/Service_API.md index 2e6e34c..959346f 100644 --- a/docs/Service_API.md +++ b/docs/Service_API.md @@ -42,6 +42,8 @@ Some service functions validate arguments and raise before writing: - `get_or_create_boost_library(repo, name)`, `get_or_create_boost_version(version)`, `get_or_create_boost_library_category(name)` – Raise **`ValueError`** if name/version is empty or whitespace-only. - **boost_library_docs_tracker.services** - `get_or_create_doc_content(url, ...)` – Raises **`ValueError`** if `url` is empty or whitespace-only. +- **discord_activity_tracker.services** + - No intentional **`ValueError`** on invalid inputs; bulk helpers may **skip** rows and log warnings (see [discord_activity_tracker.md](service_api/discord_activity_tracker.md#raises-and-edge-behavior)). **`CollectorFailureCategory`** is not set in this module; see [discord_activity_tracker.md](service_api/discord_activity_tracker.md#collectorfailurecategory). See each app’s doc in [service_api/](service_api/) for parameter types, return types, and any **Raises** section. diff --git a/docs/service_api/discord_activity_tracker.md b/docs/service_api/discord_activity_tracker.md index b2c3f28..3afc5fd 100644 --- a/docs/service_api/discord_activity_tracker.md +++ b/docs/service_api/discord_activity_tracker.md @@ -7,6 +7,35 @@ --- +## Service contract + +- **get_or_create pattern:** `get_or_create_discord_server` and `get_or_create_discord_channel` return `tuple[Model, bool]` where the `bool` is Django's `created` flag (a new row was inserted on this call). +- **update_or_create pattern:** `create_or_update_discord_message` and `add_or_update_reaction` return `tuple[Model, bool]` with Django `update_or_create` semantics for `created`. +- **Partial updates:** On existing rows, server and channel helpers use `save(update_fields=[...])` when metadata changed; `mark_message_deleted` updates `is_deleted`, `deleted_at`, and `updated_at` via `update_fields`. +- **Bulk upsert:** `bulk_upsert_discord_messages` and `bulk_upsert_discord_reactions` use `bulk_create(..., update_conflicts=True, unique_fields=..., update_fields=...)`. **`bulk_upsert_discord_users`** uses per-row queries and `get_or_create_discord_profile` because `DiscordProfile` uses multi-table inheritance (no `bulk_create(update_conflicts=True)`). +- **Transactions:** `bulk_process_message_batch` wraps user → message → reaction upserts in a single `transaction.atomic()`; an unhandled exception rolls back all phases. +- **`bulk_process_message_batch` return value:** Returns `len(message_data_list)` when the input list is non-empty, **not** the count of rows successfully written. Individual messages may still be skipped inside `bulk_upsert_discord_messages` (see below). + +--- + +## Raises and edge behavior + +- **`discord_activity_tracker.services` does not intentionally raise `ValueError`** for invalid arguments; validate inputs at sync/staging boundaries where appropriate. +- **`bulk_upsert_discord_users`:** Each dict must include `user_id` (and keys used in the loop); malformed payloads can raise **`KeyError`**. +- **`bulk_upsert_discord_messages`:** If `user_map` has no profile for `message_data["author"]["user_id"]`, that message is **skipped** and a **warning** is logged (no exception). If every message in the batch is skipped, no bulk insert runs and `{}` is returned. +- **`bulk_upsert_discord_reactions`:** If `message_map` has no message for `discord_message_id`, that reaction is skipped **silently**. Duplicate `(message, emoji)` pairs in one batch keep the **last** entry. +- **ORM:** Functions may propagate Django database exceptions (e.g. `IntegrityError`, `OperationalError`) under concurrency or infrastructure faults. + +--- + +## CollectorFailureCategory + +`discord_activity_tracker.services` performs **database I/O only**. It does not call Discord HTTP APIs and does **not** assign [`CollectorFailureCategory`](../../core/errors.py) values. + +Collectors, management commands, and sync layers classify failures with [`classify_failure`](../../core/errors.py) when handling exceptions (e.g. DiscordChatExporter subprocess failures wrapped in `CommandError`, discord.py HTTP errors, rate limits). If ORM errors are passed through `classify_failure`, mapping follows **`core/errors.py`** (for example `django.core.exceptions.ValidationError` may map to **`VALIDATION`** in typical paths). + +--- + ## DiscordServer | Function | Parameter types | Return type | Description | @@ -57,7 +86,7 @@ Inputs are lists of pre-normalised message dicts (from `sync.messages._prepare_m | `bulk_upsert_discord_users` | `user_data_list: list[dict]` | `dict[int, DiscordProfile]` | Upsert `DiscordProfile` rows; returns `{discord_user_id: profile}`. | | `bulk_upsert_discord_messages` | `message_data_list: list[dict]`, `channel: DiscordChannel`, `user_map: dict[int, DiscordProfile]` | `dict[int, DiscordMessage]` | Upsert `DiscordMessage` rows incl. `message_type` and `is_pinned`; returns `{message_id: msg}`. | | `bulk_upsert_discord_reactions` | `reaction_data_list: list[dict]`, `message_map: dict[int, DiscordMessage]` | `None` | Upsert `DiscordReaction` rows. | -| `bulk_process_message_batch` | `message_data_list: list[dict]`, `channel: DiscordChannel` | `int` | Orchestrates user upsert → message upsert → reaction upsert; returns number of messages upserted. | +| `bulk_process_message_batch` | `message_data_list: list[dict]`, `channel: DiscordChannel` | `int` | Runs users → messages → reactions inside one `transaction.atomic()`. Return value is **`len(message_data_list)`** when non-empty (not the count of rows actually upserted); see **Raises and edge behavior** above. | ---