Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion core/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,8 @@ def classify_failure(exc: BaseException) -> CollectorFailureCategory:

``requests.HTTPError`` with ``response.status_code`` 429 maps to
:attr:`CollectorFailureCategory.RATE_LIMIT`; 401 and 403 map to
:attr:`CollectorFailureCategory.AUTH`. Other cases stay conservative.
:attr:`CollectorFailureCategory.AUTH`. ``discord.errors.HTTPException``
with ``status`` is classified similarly when discord.py is in use.
"""
# Django / app
try:
Expand Down Expand Up @@ -191,6 +192,24 @@ def classify_failure(exc: BaseException) -> CollectorFailureCategory:
if "HTTPStatus" in exc_name or "Transport" in exc_name or "Connect" in exc_name:
return CollectorFailureCategory.NETWORK

# discord.py (optional dependency): HTTPException and subclasses expose ``status``.
if exc_mod.startswith("discord"):
status = getattr(exc, "status", None)
if isinstance(status, int):
if status == 429:
return CollectorFailureCategory.RATE_LIMIT
if status in (401, 403):
return CollectorFailureCategory.AUTH
if 500 <= status < 600:
return CollectorFailureCategory.NETWORK
if 400 <= status < 500:
return CollectorFailureCategory.UNKNOWN
return CollectorFailureCategory.NETWORK
if exc_name == "HTTPException":
return CollectorFailureCategory.NETWORK
if exc_name in ("LoginFailure", "PrivilegedIntentsRequired", "ClientException"):
return CollectorFailureCategory.AUTH

if isinstance(exc, OSError):
return _classify_os_error(exc)

Expand Down
9 changes: 9 additions & 0 deletions discord_activity_tracker/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"""Discord Activity Tracker Django app.

Persists Discord guild, channel, message, and reaction data for analytics, Markdown
context export, and Pinecone indexing. All writes to app models go through
``discord_activity_tracker.services``. Ingestion is driven by management commands and
sync helpers (DiscordChatExporter and optional discord.py paths).

App config: ``discord_activity_tracker.apps.DiscordActivityTrackerConfig``.
"""
1 change: 1 addition & 0 deletions discord_activity_tracker/management/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Django ``management`` package for ``discord_activity_tracker`` (``manage.py`` commands)."""
1 change: 1 addition & 0 deletions discord_activity_tracker/management/commands/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Management commands: ``run_discord_activity_tracker``, ``backfill_discord_activity_tracker``."""
Original file line number Diff line number Diff line change
@@ -1,10 +1,26 @@
"""Import pre-exported Discord JSON from disk → DB → optional Pinecone sync.
"""Django management command ``backfill_discord_activity_tracker``.

Reads DiscordChatExporter JSON files under
``workspace/discord_activity_tracker/Discussion - c-cpp-discussion/``
(recursively, including nested subfolders),
persists messages to the database, then deletes each file after a successful import
so it is not processed again.
Imports **pre-exported** DiscordChatExporter JSON from the workspace drop folder
(``workspace/discord_activity_tracker/Discussion - c-cpp-discussion/``,
recursively), validates envelope and normalized messages, upserts into the database
via the service layer, then **deletes** each file after a successful import so it is
not processed again.

This command does **not** invoke DiscordChatExporter; place JSON exports in the drop
folder manually or from another host.

Optional arguments: ``--dry-run`` (list files only), ``--skip-pinecone`` /
``--ignore-pinecone`` (skip ``task_discord_pinecone_sync`` after import). See
``Command.add_arguments`` and ``docs/service_api/discord_activity_tracker.md``.

Side effects: DB writes to ``DiscordServer``, ``DiscordChannel``, ``DiscordMessage``,
``DiscordReaction``, and ``DiscordProfile`` (via services); filesystem deletes on
success; Pinecone sync when enabled.

Raises:
Per-file parse/validation failures are caught inside ``DiscordBackfillCollector.run``
(logged and reported on stdout); they do not abort the whole command. Uncaught
exceptions from ``sync_pinecone`` or the base command layer may still propagate.
"""

from __future__ import annotations
Expand Down Expand Up @@ -48,7 +64,18 @@ def _json_display_path(import_dir: Path, json_path: Path) -> str:


class DiscordBackfillCollector(CollectorBase):
"""Import DiscordChatExporter JSON files from the c-cpp-discussion drop folder."""
"""Backfill collector: scan drop folder, import each JSON, delete on success.

``run()`` lists JSON under ``get_cpp_discussion_import_dir()``, optionally
dry-run prints paths, else for each file parses, validates staging schema,
upserts messages in batches, unlinks the file on success, or logs failure and
keeps the file.

``sync_pinecone()`` runs after a successful collector run (unless dry-run or
``skip_pinecone``).

Side effects: Same as module docstring (DB, deletes, optional Pinecone).
"""

def __init__(self, *, stdout, style, **opts: Any) -> None:
self.stdout = stdout
Expand Down Expand Up @@ -152,6 +179,28 @@ def sync_pinecone(self) -> None:


class Command(BaseCollectorCommand):
"""``manage.py backfill_discord_activity_tracker`` — import JSON from the drop folder.

Uses ``DiscordBackfillCollector``. Required layout: JSON files under
``{WORKSPACE_DIR}/discord_activity_tracker/Discussion - c-cpp-discussion/``.

Optional arguments: ``--dry-run``, ``--skip-pinecone`` / ``--ignore-pinecone``.

Examples:
``python manage.py backfill_discord_activity_tracker``

``python manage.py backfill_discord_activity_tracker --dry-run``

``python manage.py backfill_discord_activity_tracker --skip-pinecone``

Raises:
Per-file errors are swallowed in the collector loop; see class docstring.
Base command / Pinecone task may raise if misconfigured.

See Also:
``docs/service_api/discord_activity_tracker.md``
"""

help = (
"Import DiscordChatExporter JSON from "
"workspace/discord_activity_tracker/Discussion - c-cpp-discussion/ "
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,34 @@
"""
Management command: run_discord_activity_tracker

Runs several tasks in order:
1. Ensure raw workspace layout
2. Fetch Discord messages (DiscordChatExporter) → DB → archive JSON under
WORKSPACE_DIR/raw/discord_activity_tracker/<server_id>/<channel_id>/
3. Export DB messages as Markdown to DISCORD_CONTEXT_REPO_PATH (optional git push)
4. Upsert Discord messages to Pinecone (run_cppa_pinecone_sync)
"""Django management command ``run_discord_activity_tracker``.

Orchestrates the scheduled Discord ingest pipeline: workspace prep, optional
DiscordChatExporter fetch with DB upsert and raw JSON archival, Markdown export to
``DISCORD_CONTEXT_REPO_PATH``, and optional Pinecone sync via ``run_cppa_pinecone_sync``.

Phases (see ``DiscordActivityCollector`` and task helpers in this module):

1. **Workspace** — Ensure raw/staging dirs under ``WORKSPACE_DIR`` (see
``discord_activity_tracker.workspace``).
2. **Sync** — Run DiscordChatExporter (unless ``--skip-discord-sync``), parse JSON,
validate staging schema, upsert via ``discord_activity_tracker.services``,
move exports under
``{WORKSPACE_DIR}/raw/discord_activity_tracker/<server_id>/<channel_id>/``.
3. **Markdown** — Export DB rows to the context repo (unless ``--skip-markdown-export``);
optional git push when ``DISCORD_CONTEXT_AUTO_COMMIT`` is true and
``--skip-remote-push`` is not set.
4. **Pinecone** — ``task_discord_pinecone_sync`` when ``PINECONE_DISCORD_*`` are set
and ``--skip-pinecone`` is not used.

Required settings for a full sync: ``DISCORD_USER_TOKEN``, ``DISCORD_SERVER_ID``.
Channel scope uses ``DISCORD_CHANNEL_IDS`` unless overridden by ``--channels``.

CLI flags are documented on ``Command.add_argument`` ``help=`` strings and in
``docs/service_api/discord_activity_tracker.md``.

Raises:
django.core.management.base.CommandError: Missing token/guild, invalid
``--since``/``--until`` parse, or DiscordChatExporter failure (wrapped from
``DiscordChatExporterError``). Other exceptions from the collector may propagate
after logging from ``_handle_core``.
"""

from __future__ import annotations
Expand Down Expand Up @@ -312,7 +334,18 @@ def task_markdown_export_and_push(


class DiscordActivityCollector(CollectorBase):
"""Discord sync + Markdown + Pinecone; ``sync_pinecone`` runs ``run_cppa_pinecone_sync``."""
"""Collector implementation for ``run_discord_activity_tracker``.

Holds stdout/style, resolved ``channel_ids`` (from ``--channels`` or
``settings.DISCORD_CHANNEL_IDS``), and delegates to ``Command._handle_core``.

``run()`` drives fetch → Markdown → Pinecone according to options.
``sync_pinecone()`` runs ``task_discord_pinecone_sync`` when not dry-run and not
skipping Pinecone.

Side effects: Same as the management command (DB, filesystem, subprocess calls
to DiscordChatExporter and Pinecone tooling via configured runners).
"""

def __init__(self, cmd: "Command", options: dict) -> None:
self.cmd = cmd
Expand Down Expand Up @@ -374,7 +407,35 @@ async def _persist_channel(


class Command(BaseCollectorCommand):
"""Discord activity tracker: fetch → DB → raw JSON; Markdown export; Pinecone upsert."""
"""``manage.py run_discord_activity_tracker`` — incremental Discord ingest and exports.

Wraps ``DiscordActivityCollector`` with ``BaseCollectorCommand`` (dry-run, logging,
collector phases). See module docstring for phases and required settings.

Optional arguments (full text on each ``add_argument``):

``--dry-run``, ``--skip-discord-sync``, ``--skip-markdown-export``,
``--skip-remote-push``, ``--skip-pinecone`` / ``--ignore-pinecone``,
``--since`` / ``--until`` (and aliases), ``--channels``, ``--task`` (deprecated).

Examples:
``python manage.py run_discord_activity_tracker`` — full pipeline with
settings-based channel allowlist.

``python manage.py run_discord_activity_tracker --dry-run`` — log planned
steps only.

``python manage.py run_discord_activity_tracker --channels 123,456 --skip-pinecone`` —
restrict channels and skip Pinecone.

Raises:
CommandError: If ``DISCORD_USER_TOKEN`` or ``DISCORD_SERVER_ID`` is unset, or
date options fail to parse, or DiscordChatExporter fails (see ``task_discord_sync``).

See Also:
``docs/service_api/discord_activity_tracker.md``
``docs/operations/discord_chat_exporter.md``
"""

help = (
"Discord activity tracker: (1) fetch via DiscordChatExporter + DB + raw archive; "
Expand Down
50 changes: 46 additions & 4 deletions discord_activity_tracker/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,16 @@


class DiscordServer(models.Model):
"""Discord server/guild."""
"""Persisted Discord guild (server) metadata synced from export or API pipelines.

One row per Discord guild snowflake ``server_id``. Holds display ``server_name``
and optional ``icon_url`` for UI or audit. Timestamps ``created_at`` /
``updated_at`` track row lifecycle.

Relationships:
Reverse ``channels``: ``DiscordChannel`` rows with FK to this server
(``related_name="channels"`` on ``DiscordChannel``).
"""

server_id = models.BigIntegerField(unique=True, db_index=True)
server_name = models.CharField(max_length=255, db_index=True)
Expand All @@ -20,7 +29,17 @@ def __str__(self):


class DiscordChannel(models.Model):
"""Discord channel within a server."""
"""A channel (text thread, category child, etc.) belonging to one ``DiscordServer``.

Key fields: ``channel_id`` (Discord snowflake, globally unique), ``channel_name``,
``channel_type`` (e.g. exporter string), ``topic``, ``position``, and optional
``category_id`` / ``category_name`` for grouping in the guild tree.

Relationships:
``server``: FK to ``DiscordServer`` (column ``server_id``).
Reverse ``messages``: ``DiscordMessage`` rows for this channel
(``related_name="messages"`` on ``DiscordMessage``).
"""

server = models.ForeignKey(
DiscordServer,
Expand Down Expand Up @@ -50,7 +69,23 @@ def __str__(self):


class DiscordMessage(models.Model):
"""Discord message in a channel."""
"""A single Discord message stored for search, export, and Pinecone preprocessing.

Key fields: ``message_id`` (snowflake, unique), ``content``, ``message_type``
(e.g. ``Default``, ``Reply``), ``is_pinned``, ``message_created_at`` /
``message_edited_at``, ``reply_to_message_id``, ``attachment_urls`` (JSON list),
``has_attachments``, and soft-delete flags ``is_deleted`` / ``deleted_at``.

Relationships:
``channel``: FK to ``DiscordChannel`` (column ``channel_id``).
``author``: FK to ``DiscordProfile`` (``cppa_user_tracker.models``); column
``author_id``. Reverse on profile: ``discord_messages``.
Reverse ``reactions``: ``DiscordReaction`` rows
(``related_name="reactions"`` on ``DiscordReaction``).

Indexes on ``(channel, message_created_at)``, ``message_created_at``,
``is_deleted``, and ``message_type`` support sync windows and queries.
"""

message_id = models.BigIntegerField(unique=True, db_index=True)
channel = models.ForeignKey(
Expand Down Expand Up @@ -94,7 +129,14 @@ def __str__(self):


class DiscordReaction(models.Model):
"""Reaction on a Discord message."""
"""Aggregated emoji reaction counts on a ``DiscordMessage``.

One row per (``message``, ``emoji``) pair (enforced by unique constraint). ``count``
stores the total from the source payload at sync time.

Relationships:
``message``: FK to ``DiscordMessage`` (column ``message_id``).
"""

message = models.ForeignKey(
DiscordMessage,
Expand Down
Loading
Loading