From a3a5cebfe82c39e27f4401ccea4bac45a7e76cca Mon Sep 17 00:00:00 2001 From: AuraMindNest Date: Mon, 6 Apr 2026 14:11:41 -0600 Subject: [PATCH 1/7] Update the fetching for private channel. --- cppa_slack_tracker/admin.py | 18 + cppa_slack_tracker/fetcher.py | 3 +- ...am_team_id_slackchannelprivate_and_more.py | 43 ++ cppa_slack_tracker/models.py | 56 +++ cppa_slack_tracker/services.py | 74 +++- cppa_slack_tracker/slack_oauth_server.py | 414 ++++++++++++++++++ cppa_slack_tracker/sync/sync_channel.py | 13 +- cppa_slack_tracker/tests/test_services.py | 28 +- 8 files changed, 623 insertions(+), 26 deletions(-) create mode 100644 cppa_slack_tracker/migrations/0004_alter_slackteam_team_id_slackchannelprivate_and_more.py create mode 100644 cppa_slack_tracker/slack_oauth_server.py diff --git a/cppa_slack_tracker/admin.py b/cppa_slack_tracker/admin.py index 9770705f..a1c5acf4 100644 --- a/cppa_slack_tracker/admin.py +++ b/cppa_slack_tracker/admin.py @@ -7,6 +7,7 @@ from .models import ( SlackTeam, SlackChannel, + SlackChannelPrivate, SlackMessage, SlackChannelMembership, SlackChannelMembershipChangeLog, @@ -39,6 +40,23 @@ class SlackChannelAdmin(admin.ModelAdmin): raw_id_fields = ("team", "creator") +@admin.register(SlackChannelPrivate) +class SlackChannelPrivateAdmin(admin.ModelAdmin): + list_display = ( + "channel_id", + "channel_name", + "channel_type", + "team", + "creator", + "created_at", + ) + list_filter = ("channel_type", "created_at", "updated_at") + search_fields = ("channel_id", "channel_name", "description") + readonly_fields = ("created_at", "updated_at") + date_hierarchy = "created_at" + raw_id_fields = ("team", "creator") + + @admin.register(SlackMessage) class SlackMessageAdmin(admin.ModelAdmin): list_display = ( diff --git a/cppa_slack_tracker/fetcher.py b/cppa_slack_tracker/fetcher.py index f3e43319..3f374223 100644 --- a/cppa_slack_tracker/fetcher.py +++ b/cppa_slack_tracker/fetcher.py @@ -105,13 +105,14 @@ def fetch_team_info( def fetch_channel_list( _team_id: str, *, - types: str = "public_channel", + types: str = "public_channel,private_channel,mpim,im", exclude_archived: bool = False, client=None, ) -> list[dict]: """ Fetch channel list for the workspace (team_id). The bot token is scoped to one workspace. Returns list of channel dicts (id, name, ...). + Default types include public, private, multi-party IM, and IM (see Slack conversations.list). """ if client is None: client = get_slack_client(team_id=_team_id) diff --git a/cppa_slack_tracker/migrations/0004_alter_slackteam_team_id_slackchannelprivate_and_more.py b/cppa_slack_tracker/migrations/0004_alter_slackteam_team_id_slackchannelprivate_and_more.py new file mode 100644 index 00000000..2c6fabc7 --- /dev/null +++ b/cppa_slack_tracker/migrations/0004_alter_slackteam_team_id_slackchannelprivate_and_more.py @@ -0,0 +1,43 @@ +# Generated by Django 4.2.29 on 2026-04-06 20:07 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('cppa_user_tracker', '0005_alter_slackuser_slack_user_id'), + ('cppa_slack_tracker', '0003_alter_slackchannel_unique_team_channel_id'), + ] + + operations = [ + migrations.AlterField( + model_name='slackteam', + name='team_id', + field=models.CharField(max_length=50, unique=True), + ), + migrations.CreateModel( + name='SlackChannelPrivate', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('channel_id', models.CharField(db_index=True, max_length=50)), + ('channel_name', models.CharField(db_index=True, max_length=255)), + ('channel_type', models.CharField(choices=[('private_channel', 'Private channel'), ('mpim', 'Multi-party direct message'), ('im', 'Direct message')], db_index=True, help_text='Type: private_channel, mpim, or im (not public_channel).', max_length=50)), + ('description', models.TextField(blank=True, null=True)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('creator', models.ForeignKey(blank=True, db_column='creator_user_id', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='created_private_channels', to='cppa_user_tracker.slackuser')), + ('team', models.ForeignKey(db_column='team_id', on_delete=django.db.models.deletion.CASCADE, related_name='private_channels', to='cppa_slack_tracker.slackteam')), + ], + options={ + 'verbose_name': 'Slack Channel (non-public)', + 'verbose_name_plural': 'Slack Channels (non-public)', + 'db_table': 'cppa_slack_tracker_slackchannel_private', + }, + ), + migrations.AddConstraint( + model_name='slackchannelprivate', + constraint=models.UniqueConstraint(fields=('team', 'channel_id'), name='unique_team_channel_id_private'), + ), + ] diff --git a/cppa_slack_tracker/models.py b/cppa_slack_tracker/models.py index b8a05756..8ea9b0a5 100644 --- a/cppa_slack_tracker/models.py +++ b/cppa_slack_tracker/models.py @@ -21,6 +21,14 @@ class SlackChannelType(models.TextChoices): IM = "im", "Direct message" +class SlackChannelPrivateType(models.TextChoices): + """Channel kinds stored in SlackChannelPrivate (non-public only).""" + + PRIVATE_CHANNEL = "private_channel", "Private channel" + MPIM = "mpim", "Multi-party direct message" + IM = "im", "Direct message" + + class SlackTeam(models.Model): """ Slack team (workspace) model. @@ -86,6 +94,54 @@ def __str__(self): return f"#{self.channel_name} ({self.channel_id})" +class SlackChannelPrivate(models.Model): + """ + Non-public Slack channels (private, mpim, im) stored separately from SlackChannel. + + Public channels remain in cppa_slack_tracker_slackchannel; this table holds the rest. + """ + + team = models.ForeignKey( + SlackTeam, + on_delete=models.CASCADE, + related_name="private_channels", + db_column="team_id", + ) + channel_id = models.CharField(max_length=50, db_index=True) + channel_name = models.CharField(max_length=255, db_index=True) + channel_type = models.CharField( + max_length=50, + choices=SlackChannelPrivateType.choices, + db_index=True, + help_text="Type: private_channel, mpim, or im (not public_channel).", + ) + description = models.TextField(null=True, blank=True) + creator = models.ForeignKey( + SlackUser, + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="created_private_channels", + db_column="creator_user_id", + ) + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + db_table = "cppa_slack_tracker_slackchannel_private" + verbose_name = "Slack Channel (non-public)" + verbose_name_plural = "Slack Channels (non-public)" + constraints = [ + models.UniqueConstraint( + fields=["team", "channel_id"], + name="unique_team_channel_id_private", + ), + ] + + def __str__(self): + return f"#{self.channel_name} ({self.channel_id}) [{self.channel_type}]" + + class SlackMessage(models.Model): """ Slack message model. diff --git a/cppa_slack_tracker/services.py b/cppa_slack_tracker/services.py index 9eab53b6..52d92633 100644 --- a/cppa_slack_tracker/services.py +++ b/cppa_slack_tracker/services.py @@ -22,6 +22,7 @@ from .fetcher import fetch_user_info from .models import ( SlackChannel, + SlackChannelPrivate, SlackChannelMembership, SlackChannelMembershipChangeLog, SlackMessage, @@ -110,15 +111,12 @@ def get_or_create_slack_team( return team, created -# --- SlackChannel --- -@transaction.atomic -def get_or_create_slack_channel( +# --- SlackChannel / SlackChannelPrivate --- +def _slack_channel_payload_fields( slack_channel: dict[str, Any], team: SlackTeam, -) -> tuple[Optional[SlackChannel], bool]: - """Get or create a Slack channel. Returns (channel, created); channel is None when skipped.""" - if not slack_channel.get("id"): - raise ValueError("Slack channel ID is required") +) -> tuple[str, str, str, Optional[SlackUser]]: + """Parse Slack API channel object: channel_type, channel_name, description, creator.""" creator = None creator_user_id = slack_channel.get("creator") if creator_user_id: @@ -141,12 +139,52 @@ def get_or_create_slack_channel( channel_type = "public_channel" else: channel_type = slack_channel.get("type", "public_channel") - if channel_type != "public_channel": - logger.warning(f"Skipping non-public channel: {slack_channel['id']}") - return None, False - channel, created = SlackChannel.objects.get_or_create( + return channel_type, channel_name, description, creator + + +@transaction.atomic +def get_or_create_slack_channel( + slack_channel: dict[str, Any], + team: SlackTeam, +) -> tuple[Optional[SlackChannel], Optional[SlackChannelPrivate], bool]: + """ + Get or create a Slack channel row. + + Public channels use SlackChannel; non-public use SlackChannelPrivate + (table cppa_slack_tracker_slackchannel_private). + + Returns (public_channel, private_channel, created). Exactly one of the first two + is set on success; both None only when the payload is invalid (e.g. missing id). + """ + if not slack_channel.get("id"): + raise ValueError("Slack channel ID is required") + channel_type, channel_name, description, creator = _slack_channel_payload_fields( + slack_channel, team + ) + cid = slack_channel["id"] + if channel_type == "public_channel": + channel, created = SlackChannel.objects.get_or_create( + team=team, + channel_id=cid, + defaults={ + "channel_name": channel_name, + "channel_type": channel_type, + "description": description, + "creator": creator, + }, + ) + if not created: + channel.channel_name = channel_name or channel.channel_name + channel.channel_type = channel_type or channel.channel_type + channel.description = description + if creator is not None: + channel.creator = creator + channel.save() + return channel, None, created + + priv, created = SlackChannelPrivate.objects.get_or_create( team=team, - channel_id=slack_channel["id"], + channel_id=cid, defaults={ "channel_name": channel_name, "channel_type": channel_type, @@ -155,13 +193,13 @@ def get_or_create_slack_channel( }, ) if not created: - channel.channel_name = channel_name or channel.channel_name - channel.channel_type = channel_type or channel.channel_type - channel.description = description + priv.channel_name = channel_name or priv.channel_name + priv.channel_type = channel_type or priv.channel_type + priv.description = description if creator is not None: - channel.creator = creator - channel.save() - return channel, created + priv.creator = creator + priv.save() + return None, priv, created # --- SlackChannelMembership --- diff --git a/cppa_slack_tracker/slack_oauth_server.py b/cppa_slack_tracker/slack_oauth_server.py new file mode 100644 index 00000000..309cbf73 --- /dev/null +++ b/cppa_slack_tracker/slack_oauth_server.py @@ -0,0 +1,414 @@ +""" +Slack user OAuth server + +Loads .env from project root. + +- GET / → landing page with links (root is not Slack's callback) +- GET /slack/connect → Slack authorize URL with CSRF `state` (server-side, TTL) +- GET /slack/oauth/callback → `?code=...&state=...`; state validated before token exchange +- GET /slack/authorized → list authorized users (user_id, team_id; tokens not shown) +""" + +from __future__ import annotations + +import json +import logging +import os +import secrets +import tempfile +import time +from html import escape +from pathlib import Path +from urllib.parse import urlencode + +import httpx +from dotenv import load_dotenv +from starlette.applications import Starlette +from starlette.requests import Request +from starlette.responses import HTMLResponse, RedirectResponse +from starlette.routing import Route + +# Load .env from project root (src/ingestion -> src -> project root) +_ROOT = Path(__file__).resolve().parent.parent.parent +load_dotenv(_ROOT / ".env") + +logger = logging.getLogger(__name__) + +TOKENS_FILE = _ROOT / "credential" / "slack_user_tokens.json" + +# CSRF: one-time OAuth state values (server-side). Values are secrets.token_urlsafe; +# each maps to unix expiry time. Prevents replay after TTL via expiry + pop-on-use. +_oauth_states: dict[str, float] = {} + + +def _oauth_state_ttl_s() -> int: + return max(60, int(os.environ.get("OAUTH_STATE_TTL_S", "600"))) + + +def _purge_expired_oauth_states() -> None: + now = time.time() + expired = [s for s, until in _oauth_states.items() if until < now] + for s in expired: + del _oauth_states[s] + + +def _register_oauth_state(state: str) -> None: + _purge_expired_oauth_states() + _oauth_states[state] = time.time() + _oauth_state_ttl_s() + + +def _consume_oauth_state(state: str | None) -> bool: + """Validate and consume a state (single use). False if missing, unknown, or expired.""" + if not state: + return False + _purge_expired_oauth_states() + until = _oauth_states.pop(state, None) + if until is None: + return False + if time.time() > until: + return False + return True + + +# --------------------------------------------------------------------------- +# Static HTML responses (predefined at import time) +# --------------------------------------------------------------------------- + +_HTML_INDEX = HTMLResponse(content=""" + + + WG21 Paralegal — Slack OAuth + +

Slack OAuth helper

+

Use these links:

+ +

Slack redirects to /slack/oauth/callback after authorization.

+ + + """) + +_HTML_MISSING_CODE = HTMLResponse( + content=""" + + Slack OAuth + +

Missing code

+

No code in query string. Start from + /slack/connect. +

+ + """, + status_code=400, +) + +_HTML_SUCCESS = HTMLResponse(content=""" + + + Slack OAuth + +

You're all set

+

This user is now authorized. Your app can receive + "Subscribe to events on behalf of users" events for them.

+

Token stored in slack_user_tokens.json.

+ + + """) + + +# --------------------------------------------------------------------------- +# Dynamic HTML response builders (depend on runtime values) +# --------------------------------------------------------------------------- + +def _html_auth_error(error: str) -> HTMLResponse: + safe_error = escape(error) + return HTMLResponse( + content=f""" + + Slack OAuth +

Authorization failed

Slack returned: {safe_error}

+ """, + status_code=400, + ) + + +def _html_exchange_failed(error: str) -> HTMLResponse: + safe_error = escape(error) + return HTMLResponse( + content=f""" + + Slack OAuth + +

Token exchange failed

+

{safe_error}

+ + """, + status_code=400, + ) + + +def _html_invalid_oauth_state() -> HTMLResponse: + return HTMLResponse( + content=""" + + Slack OAuth + +

Invalid or expired session

+

OAuth state was missing, invalid, or expired. Start again from + /slack/connect. +

+ + """, + status_code=400, + ) + + +def _html_authorized(table_body: str) -> HTMLResponse: + return HTMLResponse(content=f""" + + + Authorized users + +

Authorized users

+

Users who have completed the "Connect Slack" flow. + Tokens are stored in slack_user_tokens.json (not shown here).

+ + + + {table_body} + +
User IDTeam ID
+

Add another user

+ + + """) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _get_env(key: str) -> str: + val = (os.environ.get(key) or "").strip() + # If the value accidentally includes "KEY=" (e.g. pasted full .env line), strip the prefix. + if val.startswith(f"{key}="): + val = val.split("=", 1)[1].strip() + if not val or val == "your_client_id_here" or val == "your_client_secret_here": + raise RuntimeError( + f"Set {key} in .env (copy from .env.example and fill in your Slack app credentials)" + ) + return val + + +def _load_tokens() -> dict[str, dict]: # type: ignore[type-arg] + if not TOKENS_FILE.exists(): + return {} + with TOKENS_FILE.open(encoding="utf-8") as f: + return json.load(f) # type: ignore[no-any-return] + + +def _ensure_tokens_dir() -> None: + """Create credential dir with owner-only perms where possible.""" + # TODO: move token storage to a dedicated secret manager when available. + TOKENS_FILE.parent.mkdir(mode=0o700, parents=True, exist_ok=True) + try: + TOKENS_FILE.parent.chmod(0o700) + except OSError: + # Best effort on non-POSIX filesystems/platforms. + pass + + +def _save_tokens(data: dict[str, dict]) -> None: # type: ignore[type-arg] + _ensure_tokens_dir() + with tempfile.NamedTemporaryFile( + "w", + encoding="utf-8", + dir=TOKENS_FILE.parent, + delete=False, + prefix=f"{TOKENS_FILE.name}.", + suffix=".tmp", + ) as f: + json.dump(data, f, indent=2) + f.flush() + os.fsync(f.fileno()) + tmp_path = Path(f.name) + try: + tmp_path.chmod(0o600) + except OSError: + pass + tmp_path.replace(TOKENS_FILE) + try: + TOKENS_FILE.chmod(0o600) + except OSError: + # Best effort on non-POSIX filesystems/platforms. + pass + + +async def _exchange_code(code: str) -> dict: # type: ignore[type-arg] + """POST to oauth.v2.access. + + Returns Slack's JSON object on success. On transport, HTTP, or parse errors returns + a dict with ok=False and an error string so slack_oauth_callback can call + _html_exchange_failed. + """ + try: + client_id = _get_env("SLACK_CLIENT_ID") + client_secret = _get_env("SLACK_CLIENT_SECRET") + redirect_uri = _get_env("SLACK_REDIRECT_URI") + except RuntimeError as exc: + return {"ok": False, "error": str(exc)} + + try: + async with httpx.AsyncClient() as client: + resp = await client.post( + "https://slack.com/api/oauth.v2.access", + data={ + "client_id": client_id, + "client_secret": client_secret, + "code": code, + "redirect_uri": redirect_uri, + }, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + timeout=30.0, + ) + resp.raise_for_status() + except httpx.HTTPStatusError as exc: + snippet = "" + try: + snippet = (exc.response.text or "")[:400] + except Exception: + logger.debug( + "Failed to read HTTP error response body for Slack OAuth token exchange", + exc_info=True, + ) + err = f"HTTP {exc.response.status_code}" + if snippet: + err = f"{err}: {snippet}" + return {"ok": False, "error": err} + except httpx.RequestError as exc: + return {"ok": False, "error": f"Token request failed: {exc}"} + + try: + data = resp.json() + except (json.JSONDecodeError, ValueError) as exc: + return {"ok": False, "error": f"Invalid response from Slack (not JSON): {exc}"} + + if not isinstance(data, dict): + return {"ok": False, "error": "Unexpected response from Slack (expected JSON object)."} + return data + + +def _store_token(data: dict) -> None: # type: ignore[type-arg] + """Persist the authed_user token from an oauth.v2.access response.""" + authed_user = data.get("authed_user") or {} + user_token = authed_user.get("access_token") + user_id = authed_user.get("id") + team_id = (data.get("team") or {}).get("id") + if not (user_token and user_id): + return + tokens = _load_tokens() + key = f"{team_id}:{user_id}" if team_id else user_id + tokens[key] = {"user_id": user_id, "team_id": team_id, "access_token": user_token} + _save_tokens(tokens) + + +async def index(_: Request) -> HTMLResponse: + """Root URL — OAuth entry points live under /slack/…""" + return _HTML_INDEX + + +async def slack_connect(_: Request) -> RedirectResponse: + """Redirect user to Slack authorize URL (entry point for 'Connect Slack').""" + client_id = _get_env("SLACK_CLIENT_ID") + redirect_uri = _get_env("SLACK_REDIRECT_URI") + user_scope = os.environ.get( + "SLACK_USER_SCOPES", + "channels:history,channels:read,groups:history,groups:read," + "im:history,im:read,mpim:history,mpim:read", + ) + state = secrets.token_urlsafe(32) + _register_oauth_state(state) + params = { + "client_id": client_id, + "user_scope": user_scope, + "redirect_uri": redirect_uri, + "state": state, + } + url = "https://slack.com/oauth/v2/authorize?" + urlencode(params) + return RedirectResponse(url=url, status_code=302) + + +async def slack_oauth_callback(request: Request) -> HTMLResponse: + """Handle redirect from Slack: exchange code for user token, store it, show success.""" + code = request.query_params.get("code") + error = request.query_params.get("error") + state = request.query_params.get("state") + + if not _consume_oauth_state(state): + return _html_invalid_oauth_state() + + if error: + return _html_auth_error(error) + + if not code: + return _HTML_MISSING_CODE + + try: + data = await _exchange_code(code) + except Exception as exc: + return _html_exchange_failed(f"Token exchange failed: {exc}") + + if not data.get("ok"): + return _html_exchange_failed(data.get("error", "Unknown error")) + + _store_token(data) + + return _HTML_SUCCESS + + +async def slack_authorized(_: Request) -> HTMLResponse: + """List authorized users (user_id, team_id); tokens are not shown.""" + tokens = _load_tokens() + rows = [ + f"{escape(str(d.get('user_id', '')))}" + f"{escape(str(d.get('team_id', '')))}" + for d in tokens.values() + ] + no_rows = 'No users authorized yet.' + table_body = "\n".join(rows) if rows else no_rows + return _html_authorized(table_body) + + +def _slack_oauth_debug_enabled() -> bool: + """Starlette debug mode — off unless SLACK_OAUTH_DEBUG is truthy (unsafe for production).""" + return os.environ.get("SLACK_OAUTH_DEBUG", "").strip().lower() in ( + "1", + "true", + "yes", + "on", + ) + + +app = Starlette( + debug=_slack_oauth_debug_enabled(), + routes=[ + Route("/", index), + Route("/slack/connect", slack_connect), + Route("/slack/oauth/callback", slack_oauth_callback), + Route("/slack/authorized", slack_authorized), + ], +) + + +def main() -> None: + import uvicorn + + port = int(os.environ.get("SLACK_OAUTH_PORT", "8000")) + host = (os.environ.get("HOST") or "127.0.0.1").strip() or "127.0.0.1" + uvicorn.run(app, host=host, port=port) + + +if __name__ == "__main__": + main() diff --git a/cppa_slack_tracker/sync/sync_channel.py b/cppa_slack_tracker/sync/sync_channel.py index a5738e77..1e63dc36 100644 --- a/cppa_slack_tracker/sync/sync_channel.py +++ b/cppa_slack_tracker/sync/sync_channel.py @@ -47,13 +47,13 @@ def sync_team(team_id: str, team_name: Optional[str] = None) -> SlackTeam: def _process_channel_info(ch: dict, team: SlackTeam) -> bool: """ - Process one channel: get_or_create_slack_channel. Returns True if synced, - False if skipped (missing id or non-public channel). Raises on error. + Process one channel: get_or_create_slack_channel. Returns True if synced + to SlackChannel (public) or SlackChannelPrivate (non-public). Raises on error. """ if not ch.get("id"): return False - channel, _ = get_or_create_slack_channel(ch, team) - return channel is not None + pub, priv, _ = get_or_create_slack_channel(ch, team) + return pub is not None or priv is not None def sync_channels( @@ -61,14 +61,15 @@ def sync_channels( *, channel_id: Optional[str] = None, team_id: Optional[str] = None, - types: str = "public_channel", + types: str = "public_channel,private_channel,mpim,im", exclude_archived: bool = False, ) -> tuple[int, int]: """ Sync channels for a team to the database. If channel_id is set, fetch only that channel (conversations.info). - Otherwise fetch via fetch_channel_list(team_id). Returns (success_count, error_count). + Otherwise fetch via fetch_channel_list(team_id). Default types are all + Slack conversation kinds (public, private, mpim, im). Returns (success_count, error_count). """ success_count = 0 error_count = 0 diff --git a/cppa_slack_tracker/tests/test_services.py b/cppa_slack_tracker/tests/test_services.py index 81cd4ced..28267fbe 100644 --- a/cppa_slack_tracker/tests/test_services.py +++ b/cppa_slack_tracker/tests/test_services.py @@ -14,6 +14,7 @@ _parse_slack_ts_string, ) from cppa_slack_tracker.models import ( + SlackChannelPrivate, SlackChannelMembership, SlackChannelMembershipChangeLog, ) @@ -92,17 +93,42 @@ def test_add_slack_channel( self, sample_slack_team, sample_slack_user, sample_slack_channel_data ): """Test adding a Slack channel.""" - channel, _ = get_or_create_slack_channel( + channel, priv, _ = get_or_create_slack_channel( sample_slack_channel_data, sample_slack_team, ) + assert priv is None + assert channel is not None assert channel.channel_id == "C87654321" assert channel.channel_name == "random" assert channel.channel_type == "public_channel" assert channel.description == "Random discussions" assert channel.creator == sample_slack_user + def test_add_slack_channel_private( + self, sample_slack_team, sample_slack_user + ): + """Non-public channels are stored in SlackChannelPrivate.""" + data = { + "id": "G012PRIVATE1", + "name": "private-team", + "is_private": True, + "is_channel": True, + "creator": sample_slack_user.slack_user_id, + "purpose": {"value": "Private stuff"}, + } + pub, priv, created = get_or_create_slack_channel(data, sample_slack_team) + assert pub is None + assert created is True + assert priv is not None + assert priv.channel_id == "G012PRIVATE1" + assert priv.channel_name == "private-team" + assert priv.channel_type == "private_channel" + assert priv.description == "Private stuff" + assert priv.creator == sample_slack_user + assert SlackChannelPrivate.objects.filter(channel_id="G012PRIVATE1").exists() + def test_add_channel_membership_change( self, sample_slack_channel, sample_slack_user ): From e18e69e8ddfee7c4246b12a650eaf9ba890d335b Mon Sep 17 00:00:00 2001 From: AuraMindNest Date: Wed, 8 Apr 2026 09:22:49 -0600 Subject: [PATCH 2/7] Add schema migration for private data. --- cppa_slack_tracker/admin.py | 31 +++ .../commands/run_cppa_slack_tracker.py | 29 +- ...am_team_id_slackchannelprivate_and_more.py | 259 ++++++++++++++++-- cppa_slack_tracker/models.py | 56 +++- cppa_slack_tracker/services.py | 73 +++++ cppa_slack_tracker/sync/__init__.py | 2 + cppa_slack_tracker/sync/sync_channel_user.py | 34 ++- cppa_slack_tracker/sync/sync_message.py | 39 ++- 8 files changed, 482 insertions(+), 41 deletions(-) diff --git a/cppa_slack_tracker/admin.py b/cppa_slack_tracker/admin.py index a1c5acf4..a7462b15 100644 --- a/cppa_slack_tracker/admin.py +++ b/cppa_slack_tracker/admin.py @@ -9,6 +9,7 @@ SlackChannel, SlackChannelPrivate, SlackMessage, + SlackMessagePrivate, SlackChannelMembership, SlackChannelMembershipChangeLog, ) @@ -87,6 +88,36 @@ def message_preview(self, obj): return obj.message[:50] + "..." if len(obj.message) > 50 else obj.message +@admin.register(SlackMessagePrivate) +class SlackMessagePrivateAdmin(admin.ModelAdmin): + list_display = ( + "ts", + "channel", + "user", + "message_preview", + "slack_message_created_at", + ) + list_filter = ("slack_message_created_at", "slack_message_updated_at") + search_fields = ( + "ts", + "message", + "channel__channel_name", + "user__username", + ) + readonly_fields = ( + "ts", + "slack_message_created_at", + "slack_message_updated_at", + ) + date_hierarchy = "slack_message_created_at" + raw_id_fields = ("channel", "user") + + @admin.display(description="Message Preview") + def message_preview(self, obj): + """Return a short preview of the message.""" + return obj.message[:50] + "..." if len(obj.message) > 50 else obj.message + + @admin.register(SlackChannelMembership) class SlackChannelMembershipAdmin(admin.ModelAdmin): list_display = ( diff --git a/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py b/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py index b44e04fe..6ebbddb1 100644 --- a/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py +++ b/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py @@ -17,10 +17,11 @@ from django.conf import settings from django.core.management.base import BaseCommand, CommandError -from cppa_slack_tracker.models import SlackTeam -from cppa_slack_tracker.services import save_slack_message +from cppa_slack_tracker.models import SlackChannelPrivate, SlackTeam +from cppa_slack_tracker.services import save_slack_message, save_slack_message_private from cppa_slack_tracker.sync import ( get_channels_to_sync, + get_private_channels_to_sync, sync_channel_users, sync_channels, sync_messages, @@ -302,10 +303,12 @@ def sync_messages(self, options, team: SlackTeam): Sync messages via sync.sync_messages (workspace JSONs, then fetch by day). Optional legacy: load from --messages-json path and save to DB first. """ - channels = get_channels_to_sync( - team, channel_id=(options.get("channel_id") or "").strip() or None + channel_id_opt = (options.get("channel_id") or "").strip() or None + channels = get_channels_to_sync(team, channel_id=channel_id_opt) + private_channels = get_private_channels_to_sync( + team, channel_id=channel_id_opt ) - if not channels: + if not channels and not private_channels: logger.warning("No channels to sync. Sync channels first.") return @@ -324,6 +327,9 @@ def sync_messages(self, options, team: SlackTeam): len(all_loaded), ) channel_by_id = {c.channel_id: c for c in channels} + channel_by_id.update( + {c.channel_id: c for c in private_channels} + ) load_failures = 0 for msg in all_loaded: if not isinstance(msg, dict): @@ -344,7 +350,10 @@ def sync_messages(self, options, team: SlackTeam): ) continue try: - save_slack_message(channel, msg) + if isinstance(channel, SlackChannelPrivate): + save_slack_message_private(channel, msg) + else: + save_slack_message(channel, msg) except Exception: msg_ts = msg.get("ts", msg.get("client_msg_id", "?")) logger.exception( @@ -371,6 +380,14 @@ def sync_messages(self, options, team: SlackTeam): s, e, ) + for channel in private_channels: + s, e = sync_messages(channel, start_date=start_d, end_date=end_d) + logger.info( + " #%s (non-public): %s saved, %s errors", + channel.channel_name, + s, + e, + ) def sync_to_pinecone(self, team: SlackTeam): """Sync Slack messages to Pinecone after message sync.""" diff --git a/cppa_slack_tracker/migrations/0004_alter_slackteam_team_id_slackchannelprivate_and_more.py b/cppa_slack_tracker/migrations/0004_alter_slackteam_team_id_slackchannelprivate_and_more.py index 2c6fabc7..ffbb6b95 100644 --- a/cppa_slack_tracker/migrations/0004_alter_slackteam_team_id_slackchannelprivate_and_more.py +++ b/cppa_slack_tracker/migrations/0004_alter_slackteam_team_id_slackchannelprivate_and_more.py @@ -1,43 +1,260 @@ -# Generated by Django 4.2.29 on 2026-04-06 20:07 +# Slack private schema, SlackChannelPrivate, SlackMessagePrivate, and PostgreSQL grants. + +from __future__ import annotations + +import logging +import re -from django.db import migrations, models import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + +logger = logging.getLogger(__name__) + +_PG_IDENT = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") +_SCHEMA = "slack_private" +_TABLES = ( + "cppa_slack_tracker_slackchannel_private", + "cppa_slack_tracker_slackmessage_private", +) + + +def _validate_pg_role(name: str) -> str: + if not _PG_IDENT.fullmatch(name): + raise ValueError( + "PRIVATE_ACCESS_USER must be a valid PostgreSQL identifier " + f"(letters, digits, underscore; letter or underscore first): {name!r}" + ) + return name + + +def _pg_qualified_ident(connection, schema: str, name: str) -> str: + """Match Django db_table hack: schema + '.' + relation -> \"schema\".\"name\".""" + return connection.ops.quote_name(f'{schema}"."{name}') + + +def _flush_deferred_ddl(schema_editor) -> None: + """Run deferred SQL now so tables exist before REVOKE/GRANT in RunPython.""" + while schema_editor.deferred_sql: + schema_editor.execute(schema_editor.deferred_sql.pop(0)) + + +def apply_slack_private_schema(_apps, schema_editor) -> None: + connection = schema_editor.connection + if connection.vendor != "postgresql": + logger.info("Not PostgreSQL; skipping CREATE SCHEMA slack_private.") + return + with connection.cursor() as cursor: + cursor.execute(f"CREATE SCHEMA IF NOT EXISTS {_SCHEMA}") + cursor.execute(f"REVOKE ALL ON SCHEMA {_SCHEMA} FROM PUBLIC") + + +def reverse_slack_private_schema(_apps, schema_editor) -> None: + connection = schema_editor.connection + if connection.vendor != "postgresql": + return + with connection.cursor() as cursor: + cursor.execute(f"DROP SCHEMA IF EXISTS {_SCHEMA} CASCADE") + + +def apply_private_table_grants(_apps, schema_editor) -> None: + role = getattr(settings, "PRIVATE_ACCESS_USER", None) or "" + role = role.strip() + if not role: + logger.warning( + "PRIVATE_ACCESS_USER is unset; skipping private Slack table GRANT/REVOKE " + "(set env and re-run migrate if needed)." + ) + return + + role = _validate_pg_role(role) + connection = schema_editor.connection + if connection.vendor != "postgresql": + logger.info("Not PostgreSQL; skipping private Slack table GRANT/REVOKE.") + return + + _flush_deferred_ddl(schema_editor) + + with connection.cursor() as cursor: + cursor.execute( + f"GRANT USAGE ON SCHEMA {connection.ops.quote_name(_SCHEMA)} TO {role}" + ) + for table in _TABLES: + qtable = _pg_qualified_ident(connection, _SCHEMA, table) + cursor.execute(f"REVOKE ALL ON TABLE {qtable} FROM PUBLIC") + cursor.execute( + f"GRANT SELECT, INSERT, UPDATE, DELETE, TRUNCATE, REFERENCES, TRIGGER " + f"ON TABLE {qtable} TO {role}" + ) + qseq = _pg_qualified_ident(connection, _SCHEMA, f"{table}_id_seq") + cursor.execute(f"REVOKE ALL ON SEQUENCE {qseq} FROM PUBLIC") + cursor.execute(f"GRANT USAGE, SELECT ON SEQUENCE {qseq} TO {role}") + + +def reverse_private_table_grants(_apps, schema_editor) -> None: + role = getattr(settings, "PRIVATE_ACCESS_USER", None) or "" + role = role.strip() + if not role or not _PG_IDENT.fullmatch(role): + return + connection = schema_editor.connection + if connection.vendor != "postgresql": + return + + with connection.cursor() as cursor: + for table in _TABLES: + qtable = _pg_qualified_ident(connection, _SCHEMA, table) + cursor.execute( + f"REVOKE ALL PRIVILEGES ON TABLE {qtable} FROM {role} CASCADE" + ) + qseq = _pg_qualified_ident(connection, _SCHEMA, f"{table}_id_seq") + cursor.execute( + f"REVOKE ALL PRIVILEGES ON SEQUENCE {qseq} FROM {role} CASCADE" + ) + cursor.execute( + f"REVOKE USAGE ON SCHEMA {connection.ops.quote_name(_SCHEMA)} FROM {role}" + ) class Migration(migrations.Migration): dependencies = [ - ('cppa_user_tracker', '0005_alter_slackuser_slack_user_id'), - ('cppa_slack_tracker', '0003_alter_slackchannel_unique_team_channel_id'), + ("cppa_user_tracker", "0004_alter_slackuser_slack_user_id_and_more"), + ("cppa_slack_tracker", "0003_alter_slackchannel_unique_team_channel_id"), ] operations = [ migrations.AlterField( - model_name='slackteam', - name='team_id', + model_name="slackteam", + name="team_id", field=models.CharField(max_length=50, unique=True), ), + migrations.RunPython(apply_slack_private_schema, reverse_slack_private_schema), migrations.CreateModel( - name='SlackChannelPrivate', + name="SlackChannelPrivate", fields=[ - ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('channel_id', models.CharField(db_index=True, max_length=50)), - ('channel_name', models.CharField(db_index=True, max_length=255)), - ('channel_type', models.CharField(choices=[('private_channel', 'Private channel'), ('mpim', 'Multi-party direct message'), ('im', 'Direct message')], db_index=True, help_text='Type: private_channel, mpim, or im (not public_channel).', max_length=50)), - ('description', models.TextField(blank=True, null=True)), - ('created_at', models.DateTimeField(auto_now_add=True)), - ('updated_at', models.DateTimeField(auto_now=True)), - ('creator', models.ForeignKey(blank=True, db_column='creator_user_id', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='created_private_channels', to='cppa_user_tracker.slackuser')), - ('team', models.ForeignKey(db_column='team_id', on_delete=django.db.models.deletion.CASCADE, related_name='private_channels', to='cppa_slack_tracker.slackteam')), + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("channel_id", models.CharField(db_index=True, max_length=50)), + ("channel_name", models.CharField(db_index=True, max_length=255)), + ( + "channel_type", + models.CharField( + choices=[ + ("private_channel", "Private channel"), + ("mpim", "Multi-party direct message"), + ("im", "Direct message"), + ], + db_index=True, + help_text="Type: private_channel, mpim, or im (not public_channel).", + max_length=50, + ), + ), + ("description", models.TextField(blank=True, null=True)), + ("created_at", models.DateTimeField(auto_now_add=True)), + ("updated_at", models.DateTimeField(auto_now=True)), + ( + "creator", + models.ForeignKey( + blank=True, + db_column="creator_user_id", + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="created_private_channels", + to="cppa_user_tracker.slackuser", + ), + ), + ( + "team", + models.ForeignKey( + db_column="team_id", + on_delete=django.db.models.deletion.CASCADE, + related_name="private_channels", + to="cppa_slack_tracker.slackteam", + ), + ), ], options={ - 'verbose_name': 'Slack Channel (non-public)', - 'verbose_name_plural': 'Slack Channels (non-public)', - 'db_table': 'cppa_slack_tracker_slackchannel_private', + "verbose_name": "Slack Channel (non-public)", + "verbose_name_plural": "Slack Channels (non-public)", + "db_table": 'slack_private"."cppa_slack_tracker_slackchannel_private', }, ), migrations.AddConstraint( - model_name='slackchannelprivate', - constraint=models.UniqueConstraint(fields=('team', 'channel_id'), name='unique_team_channel_id_private'), + model_name="slackchannelprivate", + constraint=models.UniqueConstraint( + fields=("team", "channel_id"), + name="unique_team_channel_id_private", + ), + ), + migrations.CreateModel( + name="SlackMessagePrivate", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "ts", + models.CharField( + db_index=True, + help_text="Slack message timestamp (unique per channel)", + max_length=50, + ), + ), + ("message", models.TextField(blank=True)), + ( + "thread_ts", + models.CharField( + blank=True, + db_index=True, + help_text="Thread timestamp if this is a threaded message", + max_length=50, + null=True, + ), + ), + ("slack_message_created_at", models.DateTimeField(db_index=True)), + ( + "slack_message_updated_at", + models.DateTimeField(blank=True, db_index=True, null=True), + ), + ( + "channel", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="messages", + to="cppa_slack_tracker.slackchannelprivate", + ), + ), + ( + "user", + models.ForeignKey( + blank=True, + db_column="slack_user_id", + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="private_slack_messages", + to="cppa_user_tracker.slackuser", + ), + ), + ], + options={ + "verbose_name": "Slack Message (non-public channel)", + "verbose_name_plural": "Slack Messages (non-public channels)", + "db_table": 'slack_private"."cppa_slack_tracker_slackmessage_private', + "unique_together": {("channel", "ts")}, + }, ), + migrations.RunPython(apply_private_table_grants, reverse_private_table_grants), ] diff --git a/cppa_slack_tracker/models.py b/cppa_slack_tracker/models.py index 8ea9b0a5..d47faf73 100644 --- a/cppa_slack_tracker/models.py +++ b/cppa_slack_tracker/models.py @@ -128,7 +128,9 @@ class SlackChannelPrivate(models.Model): updated_at = models.DateTimeField(auto_now=True) class Meta: - db_table = "cppa_slack_tracker_slackchannel_private" + # Django quotes db_table as one identifier unless we inject a closing quote; + # this yields "slack_private"."cppa_slack_tracker_slackchannel_private" in SQL. + db_table = 'slack_private"."cppa_slack_tracker_slackchannel_private' verbose_name = "Slack Channel (non-public)" verbose_name_plural = "Slack Channels (non-public)" constraints = [ @@ -190,6 +192,58 @@ def __str__(self): return f"Message by {self.user} in {self.channel}: {message_preview}" +class SlackMessagePrivate(models.Model): + """ + Slack messages for non-public channels (private, mpim, im). + + Stored in cppa_slack_tracker_slackmessage_private; mirrors SlackMessage but + references SlackChannelPrivate. + """ + + channel = models.ForeignKey( + SlackChannelPrivate, + on_delete=models.CASCADE, + related_name="messages", + ) + ts = models.CharField( + max_length=50, + db_index=True, + help_text="Slack message timestamp (unique per channel)", + ) + user = models.ForeignKey( + SlackUser, + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="private_slack_messages", + db_column="slack_user_id", + ) + message = models.TextField(blank=True) + thread_ts = models.CharField( + max_length=50, + null=True, + blank=True, + db_index=True, + help_text="Thread timestamp if this is a threaded message", + ) + slack_message_created_at = models.DateTimeField(db_index=True) + slack_message_updated_at = models.DateTimeField( + db_index=True, null=True, blank=True + ) + + class Meta: + db_table = 'slack_private"."cppa_slack_tracker_slackmessage_private' + verbose_name = "Slack Message (non-public channel)" + verbose_name_plural = "Slack Messages (non-public channels)" + unique_together = [["channel", "ts"]] + + def __str__(self): + message_preview = ( + self.message[:50] + "..." if len(self.message) > 50 else self.message + ) + return f"Message by {self.user} in {self.channel}: {message_preview}" + + class SlackChannelMembership(models.Model): """ Current channel membership status. diff --git a/cppa_slack_tracker/services.py b/cppa_slack_tracker/services.py index 52d92633..199f269d 100644 --- a/cppa_slack_tracker/services.py +++ b/cppa_slack_tracker/services.py @@ -26,6 +26,7 @@ SlackChannelMembership, SlackChannelMembershipChangeLog, SlackMessage, + SlackMessagePrivate, SlackTeam, ) @@ -388,3 +389,75 @@ def save_slack_message( message.slack_message_updated_at = updated_at message.save() return message + + +@transaction.atomic +def save_slack_message_private( + channel: SlackChannelPrivate, + slack_message: dict[str, Any], +) -> Optional[SlackMessagePrivate]: + """ + Save or update a Slack message for a non-public channel. + + Same rules as save_slack_message, but persists to SlackMessagePrivate. + channel_join / channel_leave are ignored here: membership for non-public + channels is not modeled on SlackChannelMembership (public SlackChannel only). + """ + subtype = slack_message.get("subtype") + if subtype in SUBTYPE_IGNORE: + return None + if subtype in ("channel_join", "channel_leave"): + return None + + user: Optional[SlackUser] = None + text: str + if subtype == "file_comment": + user = _get_or_fetch_slack_user( + slack_message.get("user", "") or "-1", team_id=channel.team.team_id + ) + text = slack_message.get("text", "") + comment = slack_message.get("comment") + if isinstance(comment, dict): + text += f"\nComment: {comment.get('comment', '')}" + elif subtype: + text = _message_text_for_subtype(slack_message, subtype) or "" + else: + text = slack_message.get("text", "") + + if user is None: + user_id = slack_message.get("user") + if not user_id: + if slack_message.get("text") == "A file was commented on": + return None + raise ValueError("User not found") + user = _get_or_fetch_slack_user(user_id, team_id=channel.team.team_id) + + clean_text = text.replace("\x00", "").replace("\u0000", "") + ts = slack_message.get("ts") + if not ts: + raise ValueError("Message timestamp (ts) is required") + created_at = _parse_slack_ts_string(ts) + edited = slack_message.get("edited") + if not isinstance(edited, dict): + edited = {} + updated_at = _parse_slack_ts_string(edited.get("ts", ts)) if edited else created_at + + message, created = SlackMessagePrivate.objects.get_or_create( + channel=channel, + ts=ts, + defaults={ + "user": user, + "message": clean_text, + "thread_ts": slack_message.get("thread_ts"), + "slack_message_created_at": created_at, + "slack_message_updated_at": updated_at, + }, + ) + if not created: + message.user = user + message.message = clean_text + message.thread_ts = slack_message.get("thread_ts") + message.slack_message_created_at = created_at + message.slack_message_updated_at = updated_at + message.save() + return message diff --git a/cppa_slack_tracker/sync/__init__.py b/cppa_slack_tracker/sync/__init__.py index 84160ec9..ca36c15d 100644 --- a/cppa_slack_tracker/sync/__init__.py +++ b/cppa_slack_tracker/sync/__init__.py @@ -3,6 +3,7 @@ from cppa_slack_tracker.sync.sync_channel import sync_channels, sync_team from cppa_slack_tracker.sync.sync_channel_user import ( get_channels_to_sync, + get_private_channels_to_sync, sync_channel_users, ) from cppa_slack_tracker.sync.sync_message import sync_messages @@ -10,6 +11,7 @@ __all__ = [ "get_channels_to_sync", + "get_private_channels_to_sync", "sync_channels", "sync_channel_users", "sync_messages", diff --git a/cppa_slack_tracker/sync/sync_channel_user.py b/cppa_slack_tracker/sync/sync_channel_user.py index 24d94199..75f1a9c7 100644 --- a/cppa_slack_tracker/sync/sync_channel_user.py +++ b/cppa_slack_tracker/sync/sync_channel_user.py @@ -3,8 +3,8 @@ Fetches channel member lists via cppa_slack_tracker.fetcher.fetch_channel_user_list and syncs memberships to the database. Use get_channels_to_sync() to get the list -of channels to sync (one or all in a team); run_cppa_slack_tracker uses it to -avoid duplicating channel resolution logic. +of channels to sync (one or all in a team); get_private_channels_to_sync does +the same for SlackChannelPrivate. run_cppa_slack_tracker uses both for message sync. """ from __future__ import annotations @@ -13,7 +13,7 @@ from typing import Optional from cppa_slack_tracker.fetcher import fetch_channel_user_list -from cppa_slack_tracker.models import SlackChannel, SlackTeam +from cppa_slack_tracker.models import SlackChannel, SlackChannelPrivate, SlackTeam from cppa_slack_tracker.services import sync_channel_memberships logger = logging.getLogger(__name__) @@ -43,6 +43,34 @@ def get_channels_to_sync( return list(SlackChannel.objects.filter(team=team).order_by("channel_id")) +def get_private_channels_to_sync( + team: SlackTeam, + *, + channel_id: Optional[str] = None, +) -> list[SlackChannelPrivate]: + """ + Return non-public channels to sync for a team (for message sync and similar). + + If channel_id is set and exists on SlackChannelPrivate, returns that channel; + if not found, logs a warning and returns all private channels in the team. + If channel_id is not set, returns all SlackChannelPrivate rows ordered by channel_id. + """ + if channel_id: + try: + return [ + SlackChannelPrivate.objects.get(team=team, channel_id=channel_id) + ] + except SlackChannelPrivate.DoesNotExist: + logger.warning( + "Private channel %s not found in team %s; syncing all non-public channels.", + channel_id, + team.team_id, + ) + return list( + SlackChannelPrivate.objects.filter(team=team).order_by("channel_id") + ) + + def sync_channel_users( team: SlackTeam, *, diff --git a/cppa_slack_tracker/sync/sync_message.py b/cppa_slack_tracker/sync/sync_message.py index 242b2e11..e976f224 100644 --- a/cppa_slack_tracker/sync/sync_message.py +++ b/cppa_slack_tracker/sync/sync_message.py @@ -1,6 +1,9 @@ """ Sync Slack messages with the database. +Works for both SlackChannel (public) and SlackChannelPrivate (non-public); the +latter persists rows via SlackMessagePrivate (cppa_slack_tracker_slackmessage_private). + Flow when start_date is None: 1. Process any existing workspace JSONs for the channel (old → new), remove them. 2. Determine start_date: same day as last message in DB (to avoid missing same-day @@ -27,8 +30,13 @@ from django.db.models.functions import Coalesce from cppa_slack_tracker.fetcher import fetch_messages -from cppa_slack_tracker.models import SlackChannel, SlackMessage -from cppa_slack_tracker.services import save_slack_message +from cppa_slack_tracker.models import ( + SlackChannel, + SlackChannelPrivate, + SlackMessage, + SlackMessagePrivate, +) +from cppa_slack_tracker.services import save_slack_message, save_slack_message_private from cppa_slack_tracker.workspace import ( get_message_json_path, get_raw_message_json_path, @@ -69,15 +77,21 @@ def _messages_by_day( return dict(by_day) -def _process_message(channel: SlackChannel, msg: dict) -> bool: +def _process_message( + channel: SlackChannel | SlackChannelPrivate, msg: dict +) -> bool: """ - Process one message: save_slack_message. Returns True if saved, False if - skipped (e.g. ignored subtype). Raises on error. + Process one message: save to SlackMessage or SlackMessagePrivate. Returns True + if saved, False if skipped (e.g. ignored subtype). Raises on error. """ + if isinstance(channel, SlackChannelPrivate): + return save_slack_message_private(channel, msg) is not None return save_slack_message(channel, msg) is not None -def _process_workspace_jsons(channel: SlackChannel) -> tuple[int, int]: +def _process_workspace_jsons( + channel: SlackChannel | SlackChannelPrivate, +) -> tuple[int, int]: """ Process all existing workspace JSONs for the channel in date order (old to new). Saves messages to DB and removes each workspace file. @@ -113,11 +127,16 @@ def _process_workspace_jsons(channel: SlackChannel) -> tuple[int, int]: return success_count, error_count -def _last_message_date(channel: SlackChannel) -> Optional[date]: +def _last_message_date( + channel: SlackChannel | SlackChannelPrivate, +) -> Optional[date]: """Return the date (UTC) of the most recently updated (or created) message in DB for this channel, or None.""" + if isinstance(channel, SlackChannelPrivate): + qs = SlackMessagePrivate.objects.filter(channel=channel) + else: + qs = SlackMessage.objects.filter(channel=channel) last_dt = ( - SlackMessage.objects.filter(channel=channel) - .annotate( + qs.annotate( effective=Coalesce( F("slack_message_updated_at"), F("slack_message_created_at") ) @@ -148,7 +167,7 @@ def _merge_messages_by_ts( def sync_messages( - channel: SlackChannel, + channel: SlackChannel | SlackChannelPrivate, start_date: date | datetime | None = None, end_date: date | datetime | None = None, ) -> tuple[int, int]: From 41b4f12abade9bbac794bede4611cc2edbfc2547 Mon Sep 17 00:00:00 2001 From: AuraMindNest Date: Wed, 8 Apr 2026 12:49:23 -0600 Subject: [PATCH 3/7] Implement some functions to save user im(dm) messages. --- .env.example | 20 ++ .gitignore | 3 + cppa_slack_tracker/admin.py | 37 +++ cppa_slack_tracker/fetcher.py | 34 ++ .../commands/run_cppa_slack_tracker.py | 99 +++++- ...am_team_id_slackchannelprivate_and_more.py | 309 +++++------------- cppa_slack_tracker/models.py | 65 ++++ cppa_slack_tracker/services.py | 137 +++++++- cppa_slack_tracker/slack_oauth_server.py | 17 +- cppa_slack_tracker/sync/sync_channel_user.py | 50 ++- cppa_slack_tracker/sync/sync_message.py | 16 +- cppa_slack_tracker/tests/fixtures.py | 29 ++ cppa_slack_tracker/tests/test_services.py | 112 +++++++ cppa_slack_tracker/user_tokens.py | 67 ++++ requirements-dev.txt | 3 + requirements.txt | 6 + 16 files changed, 751 insertions(+), 253 deletions(-) create mode 100644 cppa_slack_tracker/user_tokens.py diff --git a/.env.example b/.env.example index 468461a2..61baee45 100644 --- a/.env.example +++ b/.env.example @@ -229,6 +229,26 @@ DATABASE_URL=postgres://user:password@localhost:5432/boost_dashboard # SLACK_XOXC_TOKEN=xoxc-... # SLACK_XOXD_TOKEN=... +# --- User OAuth (cppa_slack_tracker/slack_oauth_server.py + IM message sync) --- +# Run the helper: python -m cppa_slack_tracker.slack_oauth_server +# Open https://api.slack.com/apps → your app → OAuth & Permissions: +# - Add User Token Scopes (e.g. im:read, im:history, mpim:read, mpim:history, …). +# - Redirect URLs: must match SLACK_REDIRECT_URI exactly (e.g. https://your-host/slack/oauth/callback). +# After a user visits /slack/connect and authorizes, tokens are stored under credential/slack_user_tokens.json +# (same path Django uses via cppa_slack_tracker.user_tokens for run_cppa_slack_tracker IM sync). +# SLACK_CLIENT_ID=10473611477057.xxxxxxxxx +# SLACK_CLIENT_SECRET=xxxxxxxx +# SLACK_REDIRECT_URI=https://your-public-host.example.com/slack/oauth/callback +# Optional: comma-separated user scopes (defaults in slack_oauth_server match IM + channels + groups + mpim). +# SLACK_USER_SCOPES=channels:history,channels:read,groups:history,groups:read,im:history,im:read,mpim:history,mpim:read +# Optional: override JSON path for user tokens (default: /credential/slack_user_tokens.json). +# SLACK_USER_TOKENS_PATH=/absolute/path/to/slack_user_tokens.json +# Optional: OAuth CSRF state TTL (seconds), server bind, debug. +# OAUTH_STATE_TTL_S=600 +# SLACK_OAUTH_PORT=8000 +# HOST=127.0.0.1 +# SLACK_OAUTH_DEBUG=0 + # ============================================================================= # Selenium / Chrome (optional; for Slack xoxc/xoxd token extraction only) # ============================================================================= diff --git a/.gitignore b/.gitignore index d9be1a38..e9e583a7 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,9 @@ celerybeat-schedule celerybeat-schedule-* celerybeat.pid +# Credentials +credential/ + # Environment .env .env.local diff --git a/cppa_slack_tracker/admin.py b/cppa_slack_tracker/admin.py index a7462b15..07a932d4 100644 --- a/cppa_slack_tracker/admin.py +++ b/cppa_slack_tracker/admin.py @@ -12,6 +12,8 @@ SlackMessagePrivate, SlackChannelMembership, SlackChannelMembershipChangeLog, + SlackChannelMembershipPrivate, + SlackChannelMembershipChangeLogPrivate, ) @@ -151,3 +153,38 @@ class SlackChannelMembershipChangeLogAdmin(admin.ModelAdmin): readonly_fields = ("created_at",) date_hierarchy = "created_at" raw_id_fields = ("channel", "user") + + +@admin.register(SlackChannelMembershipPrivate) +class SlackChannelMembershipPrivateAdmin(admin.ModelAdmin): + list_display = ( + "channel", + "user", + "is_restricted", + "is_deleted", + "created_at", + "updated_at", + ) + list_filter = ("is_restricted", "is_deleted", "created_at", "updated_at") + search_fields = ( + "channel__channel_name", + "user__username", + "user__display_name", + ) + readonly_fields = ("created_at", "updated_at") + date_hierarchy = "created_at" + raw_id_fields = ("channel", "user") + + +@admin.register(SlackChannelMembershipChangeLogPrivate) +class SlackChannelMembershipChangeLogPrivateAdmin(admin.ModelAdmin): + list_display = ("channel", "user", "is_joined", "created_at") + list_filter = ("is_joined", "created_at") + search_fields = ( + "channel__channel_name", + "user__username", + "user__display_name", + ) + readonly_fields = ("created_at",) + date_hierarchy = "created_at" + raw_id_fields = ("channel", "user") diff --git a/cppa_slack_tracker/fetcher.py b/cppa_slack_tracker/fetcher.py index 3f374223..41b344ab 100644 --- a/cppa_slack_tracker/fetcher.py +++ b/cppa_slack_tracker/fetcher.py @@ -137,6 +137,40 @@ def fetch_channel_list( return channels +def fetch_im_channel_list_for_user( + team_id: str, + user_token: str, + *, + exclude_archived: bool = False, +) -> list[dict]: + """ + List IM (direct message) conversations visible to the authorizing user. + + Uses a user OAuth token (``xoxp-``); the bot token cannot see user-to-user DMs. + Paginates ``conversations.list`` with ``types=im`` only. + """ + client = get_slack_client(bot_token=user_token, team_id=team_id) + channels: list[dict] = [] + cursor = None + while True: + data = client.conversations_list( + types="im", + exclude_archived=exclude_archived, + limit=500, + cursor=cursor, + ) + if not data.get("ok"): + logger.warning( + "conversations.list (im) failed: %s", data.get("error", "unknown") + ) + break + channels.extend(data.get("channels", [])) + cursor = (data.get("response_metadata") or {}).get("next_cursor") + if not cursor: + break + return channels + + def _ts_to_utc_date(ts: Optional[str]) -> Optional[date]: """Convert Slack ts string to UTC date, or None if invalid.""" if not ts: diff --git a/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py b/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py index 6ebbddb1..d1407070 100644 --- a/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py +++ b/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py @@ -18,7 +18,12 @@ from django.core.management.base import BaseCommand, CommandError from cppa_slack_tracker.models import SlackChannelPrivate, SlackTeam -from cppa_slack_tracker.services import save_slack_message, save_slack_message_private +from cppa_slack_tracker.services import ( + get_or_create_slack_channel, + save_slack_message, + save_slack_message_private, +) +from cppa_slack_tracker.user_tokens import iter_user_tokens_for_team from cppa_slack_tracker.sync import ( get_channels_to_sync, get_private_channels_to_sync, @@ -308,8 +313,12 @@ def sync_messages(self, options, team: SlackTeam): private_channels = get_private_channels_to_sync( team, channel_id=channel_id_opt ) - if not channels and not private_channels: - logger.warning("No channels to sync. Sync channels first.") + has_user_tokens = any(iter_user_tokens_for_team(team.team_id)) + if not channels and not private_channels and not has_user_tokens: + logger.warning( + "No channels to sync and no user OAuth tokens for this team. " + "Sync channels first or authorize users via slack_oauth_server for IMs." + ) return start_date_str = (options.get("start_date") or "").strip() or None @@ -389,6 +398,90 @@ def sync_messages(self, options, team: SlackTeam): e, ) + self._sync_im_messages_user_tokens(team, start_d, end_d, channel_id_opt) + + def _sync_im_messages_user_tokens( + self, + team: SlackTeam, + start_d, + end_d, + channel_id_opt: Optional[str], + ) -> None: + """ + List IMs per authorized user token and sync history (user-to-user DMs + not visible to the bot). + """ + from cppa_slack_tracker.fetcher import fetch_im_channel_list_for_user + from operations.slack_ops.tokens import get_slack_client + + tokens = list(iter_user_tokens_for_team(team.team_id)) + if not tokens: + logger.info( + "No Slack user OAuth tokens for team %s; skipping IM user-token sync.", + team.team_id, + ) + return + + logger.info( + "Syncing IM channels with user tokens (%s authorized user(s))...", + len(tokens), + ) + for user_slack_id, access_token in tokens: + user_client = get_slack_client( + bot_token=access_token, team_id=team.team_id + ) + try: + im_channels = fetch_im_channel_list_for_user( + team.team_id, access_token + ) + except Exception: + logger.exception( + "Failed to list IM channels for user %s", user_slack_id + ) + continue + + if channel_id_opt: + im_channels = [ + c + for c in im_channels + if isinstance(c, dict) and c.get("id") == channel_id_opt + ] + + for ch in im_channels: + if not isinstance(ch, dict) or not ch.get("id"): + continue + try: + pub, priv, _created = get_or_create_slack_channel(ch, team) + ch_obj = pub or priv + if ch_obj is None: + continue + except Exception: + logger.exception( + "Failed to upsert IM channel %s", ch.get("id") + ) + continue + + try: + s, e = sync_messages( + ch_obj, + start_date=start_d, + end_date=end_d, + client=user_client, + ) + logger.info( + " IM #%s (user %s): %s saved, %s errors", + ch_obj.channel_name, + user_slack_id, + s, + e, + ) + except Exception: + logger.exception( + "Failed to sync messages for IM channel %s user %s", + ch.get("id"), + user_slack_id, + ) + def sync_to_pinecone(self, team: SlackTeam): """Sync Slack messages to Pinecone after message sync.""" try: diff --git a/cppa_slack_tracker/migrations/0004_alter_slackteam_team_id_slackchannelprivate_and_more.py b/cppa_slack_tracker/migrations/0004_alter_slackteam_team_id_slackchannelprivate_and_more.py index ffbb6b95..bd963645 100644 --- a/cppa_slack_tracker/migrations/0004_alter_slackteam_team_id_slackchannelprivate_and_more.py +++ b/cppa_slack_tracker/migrations/0004_alter_slackteam_team_id_slackchannelprivate_and_more.py @@ -1,260 +1,103 @@ -# Slack private schema, SlackChannelPrivate, SlackMessagePrivate, and PostgreSQL grants. +# Generated by Django 4.2.29 on 2026-04-08 18:36 -from __future__ import annotations - -import logging -import re - -import django.db.models.deletion -from django.conf import settings from django.db import migrations, models - -logger = logging.getLogger(__name__) - -_PG_IDENT = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") -_SCHEMA = "slack_private" -_TABLES = ( - "cppa_slack_tracker_slackchannel_private", - "cppa_slack_tracker_slackmessage_private", -) - - -def _validate_pg_role(name: str) -> str: - if not _PG_IDENT.fullmatch(name): - raise ValueError( - "PRIVATE_ACCESS_USER must be a valid PostgreSQL identifier " - f"(letters, digits, underscore; letter or underscore first): {name!r}" - ) - return name - - -def _pg_qualified_ident(connection, schema: str, name: str) -> str: - """Match Django db_table hack: schema + '.' + relation -> \"schema\".\"name\".""" - return connection.ops.quote_name(f'{schema}"."{name}') - - -def _flush_deferred_ddl(schema_editor) -> None: - """Run deferred SQL now so tables exist before REVOKE/GRANT in RunPython.""" - while schema_editor.deferred_sql: - schema_editor.execute(schema_editor.deferred_sql.pop(0)) - - -def apply_slack_private_schema(_apps, schema_editor) -> None: - connection = schema_editor.connection - if connection.vendor != "postgresql": - logger.info("Not PostgreSQL; skipping CREATE SCHEMA slack_private.") - return - with connection.cursor() as cursor: - cursor.execute(f"CREATE SCHEMA IF NOT EXISTS {_SCHEMA}") - cursor.execute(f"REVOKE ALL ON SCHEMA {_SCHEMA} FROM PUBLIC") - - -def reverse_slack_private_schema(_apps, schema_editor) -> None: - connection = schema_editor.connection - if connection.vendor != "postgresql": - return - with connection.cursor() as cursor: - cursor.execute(f"DROP SCHEMA IF EXISTS {_SCHEMA} CASCADE") - - -def apply_private_table_grants(_apps, schema_editor) -> None: - role = getattr(settings, "PRIVATE_ACCESS_USER", None) or "" - role = role.strip() - if not role: - logger.warning( - "PRIVATE_ACCESS_USER is unset; skipping private Slack table GRANT/REVOKE " - "(set env and re-run migrate if needed)." - ) - return - - role = _validate_pg_role(role) - connection = schema_editor.connection - if connection.vendor != "postgresql": - logger.info("Not PostgreSQL; skipping private Slack table GRANT/REVOKE.") - return - - _flush_deferred_ddl(schema_editor) - - with connection.cursor() as cursor: - cursor.execute( - f"GRANT USAGE ON SCHEMA {connection.ops.quote_name(_SCHEMA)} TO {role}" - ) - for table in _TABLES: - qtable = _pg_qualified_ident(connection, _SCHEMA, table) - cursor.execute(f"REVOKE ALL ON TABLE {qtable} FROM PUBLIC") - cursor.execute( - f"GRANT SELECT, INSERT, UPDATE, DELETE, TRUNCATE, REFERENCES, TRIGGER " - f"ON TABLE {qtable} TO {role}" - ) - qseq = _pg_qualified_ident(connection, _SCHEMA, f"{table}_id_seq") - cursor.execute(f"REVOKE ALL ON SEQUENCE {qseq} FROM PUBLIC") - cursor.execute(f"GRANT USAGE, SELECT ON SEQUENCE {qseq} TO {role}") - - -def reverse_private_table_grants(_apps, schema_editor) -> None: - role = getattr(settings, "PRIVATE_ACCESS_USER", None) or "" - role = role.strip() - if not role or not _PG_IDENT.fullmatch(role): - return - connection = schema_editor.connection - if connection.vendor != "postgresql": - return - - with connection.cursor() as cursor: - for table in _TABLES: - qtable = _pg_qualified_ident(connection, _SCHEMA, table) - cursor.execute( - f"REVOKE ALL PRIVILEGES ON TABLE {qtable} FROM {role} CASCADE" - ) - qseq = _pg_qualified_ident(connection, _SCHEMA, f"{table}_id_seq") - cursor.execute( - f"REVOKE ALL PRIVILEGES ON SEQUENCE {qseq} FROM {role} CASCADE" - ) - cursor.execute( - f"REVOKE USAGE ON SCHEMA {connection.ops.quote_name(_SCHEMA)} FROM {role}" - ) +import django.db.models.deletion +import django.utils.timezone class Migration(migrations.Migration): dependencies = [ - ("cppa_user_tracker", "0004_alter_slackuser_slack_user_id_and_more"), - ("cppa_slack_tracker", "0003_alter_slackchannel_unique_team_channel_id"), + ('cppa_user_tracker', '0008_alter_youtubespeaker_display_name'), + ('cppa_slack_tracker', '0003_alter_slackchannel_unique_team_channel_id'), ] operations = [ migrations.AlterField( - model_name="slackteam", - name="team_id", + model_name='slackteam', + name='team_id', field=models.CharField(max_length=50, unique=True), ), - migrations.RunPython(apply_slack_private_schema, reverse_slack_private_schema), migrations.CreateModel( - name="SlackChannelPrivate", + name='SlackChannelPrivate', fields=[ - ( - "id", - models.BigAutoField( - auto_created=True, - primary_key=True, - serialize=False, - verbose_name="ID", - ), - ), - ("channel_id", models.CharField(db_index=True, max_length=50)), - ("channel_name", models.CharField(db_index=True, max_length=255)), - ( - "channel_type", - models.CharField( - choices=[ - ("private_channel", "Private channel"), - ("mpim", "Multi-party direct message"), - ("im", "Direct message"), - ], - db_index=True, - help_text="Type: private_channel, mpim, or im (not public_channel).", - max_length=50, - ), - ), - ("description", models.TextField(blank=True, null=True)), - ("created_at", models.DateTimeField(auto_now_add=True)), - ("updated_at", models.DateTimeField(auto_now=True)), - ( - "creator", - models.ForeignKey( - blank=True, - db_column="creator_user_id", - null=True, - on_delete=django.db.models.deletion.SET_NULL, - related_name="created_private_channels", - to="cppa_user_tracker.slackuser", - ), - ), - ( - "team", - models.ForeignKey( - db_column="team_id", - on_delete=django.db.models.deletion.CASCADE, - related_name="private_channels", - to="cppa_slack_tracker.slackteam", - ), - ), + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('channel_id', models.CharField(db_index=True, max_length=50)), + ('channel_name', models.CharField(db_index=True, max_length=255)), + ('channel_type', models.CharField(choices=[('private_channel', 'Private channel'), ('mpim', 'Multi-party direct message'), ('im', 'Direct message')], db_index=True, help_text='Type: private_channel, mpim, or im (not public_channel).', max_length=50)), + ('description', models.TextField(blank=True, null=True)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('creator', models.ForeignKey(blank=True, db_column='creator_user_id', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='created_private_channels', to='cppa_user_tracker.slackuser')), + ('team', models.ForeignKey(db_column='team_id', on_delete=django.db.models.deletion.CASCADE, related_name='private_channels', to='cppa_slack_tracker.slackteam')), ], options={ - "verbose_name": "Slack Channel (non-public)", - "verbose_name_plural": "Slack Channels (non-public)", - "db_table": 'slack_private"."cppa_slack_tracker_slackchannel_private', + 'verbose_name': 'Slack Channel (non-public)', + 'verbose_name_plural': 'Slack Channels (non-public)', + 'db_table': 'slack_private"."cppa_slack_tracker_slackchannel_private', }, ), - migrations.AddConstraint( - model_name="slackchannelprivate", - constraint=models.UniqueConstraint( - fields=("team", "channel_id"), - name="unique_team_channel_id_private", - ), + migrations.CreateModel( + name='SlackChannelMembershipPrivate', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('is_restricted', models.BooleanField(default=False)), + ('is_deleted', models.BooleanField(default=False)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('channel', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='memberships', to='cppa_slack_tracker.slackchannelprivate')), + ('user', models.ForeignKey(db_column='slack_user_id', on_delete=django.db.models.deletion.CASCADE, related_name='private_channel_memberships', to='cppa_user_tracker.slackuser')), + ], + options={ + 'verbose_name': 'Slack Channel Membership (non-public)', + 'verbose_name_plural': 'Slack Channel Memberships (non-public)', + 'db_table': 'slack_private"."cppa_slack_tracker_slackchannelmembership_private', + }, ), migrations.CreateModel( - name="SlackMessagePrivate", + name='SlackChannelMembershipChangeLogPrivate', fields=[ - ( - "id", - models.BigAutoField( - auto_created=True, - primary_key=True, - serialize=False, - verbose_name="ID", - ), - ), - ( - "ts", - models.CharField( - db_index=True, - help_text="Slack message timestamp (unique per channel)", - max_length=50, - ), - ), - ("message", models.TextField(blank=True)), - ( - "thread_ts", - models.CharField( - blank=True, - db_index=True, - help_text="Thread timestamp if this is a threaded message", - max_length=50, - null=True, - ), - ), - ("slack_message_created_at", models.DateTimeField(db_index=True)), - ( - "slack_message_updated_at", - models.DateTimeField(blank=True, db_index=True, null=True), - ), - ( - "channel", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="messages", - to="cppa_slack_tracker.slackchannelprivate", - ), - ), - ( - "user", - models.ForeignKey( - blank=True, - db_column="slack_user_id", - null=True, - on_delete=django.db.models.deletion.SET_NULL, - related_name="private_slack_messages", - to="cppa_user_tracker.slackuser", - ), - ), + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('is_joined', models.BooleanField(help_text='True if joined, False if left')), + ('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)), + ('channel', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='membership_changes', to='cppa_slack_tracker.slackchannelprivate')), + ('user', models.ForeignKey(db_column='slack_user_id', on_delete=django.db.models.deletion.CASCADE, related_name='private_membership_changes', to='cppa_user_tracker.slackuser')), ], options={ - "verbose_name": "Slack Message (non-public channel)", - "verbose_name_plural": "Slack Messages (non-public channels)", - "db_table": 'slack_private"."cppa_slack_tracker_slackmessage_private', - "unique_together": {("channel", "ts")}, + 'verbose_name': 'Slack Channel Membership Change Log (non-public)', + 'verbose_name_plural': 'Slack Channel Membership Change Logs (non-public)', + 'db_table': 'slack_private"."cppa_slack_tracker_slackchannelmembershipchangelog_private', }, ), - migrations.RunPython(apply_private_table_grants, reverse_private_table_grants), + migrations.CreateModel( + name='SlackMessagePrivate', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('ts', models.CharField(db_index=True, help_text='Slack message timestamp (unique per channel)', max_length=50)), + ('message', models.TextField(blank=True)), + ('thread_ts', models.CharField(blank=True, db_index=True, help_text='Thread timestamp if this is a threaded message', max_length=50, null=True)), + ('slack_message_created_at', models.DateTimeField(db_index=True)), + ('slack_message_updated_at', models.DateTimeField(blank=True, db_index=True, null=True)), + ('channel', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='messages', to='cppa_slack_tracker.slackchannelprivate')), + ('user', models.ForeignKey(blank=True, db_column='slack_user_id', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='private_slack_messages', to='cppa_user_tracker.slackuser')), + ], + options={ + 'verbose_name': 'Slack Message (non-public channel)', + 'verbose_name_plural': 'Slack Messages (non-public channels)', + 'db_table': 'slack_private"."cppa_slack_tracker_slackmessage_private', + 'unique_together': {('channel', 'ts')}, + }, + ), + migrations.AddConstraint( + model_name='slackchannelprivate', + constraint=models.UniqueConstraint(fields=('team', 'channel_id'), name='unique_team_channel_id_private'), + ), + migrations.AlterUniqueTogether( + name='slackchannelmembershipprivate', + unique_together={('channel', 'user')}, + ), + migrations.AlterUniqueTogether( + name='slackchannelmembershipchangelogprivate', + unique_together={('channel', 'user', 'created_at')}, + ), ] diff --git a/cppa_slack_tracker/models.py b/cppa_slack_tracker/models.py index d47faf73..27b9fc57 100644 --- a/cppa_slack_tracker/models.py +++ b/cppa_slack_tracker/models.py @@ -303,3 +303,68 @@ class Meta: def __str__(self): action = "joined" if self.is_joined else "left" return f"{self.user} {action} {self.channel} at {self.created_at}" + + +class SlackChannelMembershipPrivate(models.Model): + """ + Current membership for non-public channels (private_channel, mpim, im). + + Stored in schema ``slack_private`` alongside SlackChannelPrivate. + """ + + channel = models.ForeignKey( + SlackChannelPrivate, + on_delete=models.CASCADE, + related_name="memberships", + ) + user = models.ForeignKey( + SlackUser, + on_delete=models.CASCADE, + related_name="private_channel_memberships", + db_column="slack_user_id", + ) + is_restricted = models.BooleanField(default=False) + is_deleted = models.BooleanField(default=False) + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + db_table = 'slack_private"."cppa_slack_tracker_slackchannelmembership_private' + verbose_name = "Slack Channel Membership (non-public)" + verbose_name_plural = "Slack Channel Memberships (non-public)" + unique_together = [["channel", "user"]] + + def __str__(self): + return f"{self.user} in {self.channel}" + + +class SlackChannelMembershipChangeLogPrivate(models.Model): + """ + Join/leave history for non-public channels (private_channel, mpim, im). + """ + + channel = models.ForeignKey( + SlackChannelPrivate, + on_delete=models.CASCADE, + related_name="membership_changes", + ) + user = models.ForeignKey( + SlackUser, + on_delete=models.CASCADE, + related_name="private_membership_changes", + db_column="slack_user_id", + ) + is_joined = models.BooleanField(help_text="True if joined, False if left") + created_at = models.DateTimeField(default=timezone.now, db_index=True) + + class Meta: + db_table = ( + 'slack_private"."cppa_slack_tracker_slackchannelmembershipchangelog_private' + ) + verbose_name = "Slack Channel Membership Change Log (non-public)" + verbose_name_plural = "Slack Channel Membership Change Logs (non-public)" + unique_together = [["channel", "user", "created_at"]] + + def __str__(self): + action = "joined" if self.is_joined else "left" + return f"{self.user} {action} {self.channel} at {self.created_at}" diff --git a/cppa_slack_tracker/services.py b/cppa_slack_tracker/services.py index 199f269d..9790910a 100644 --- a/cppa_slack_tracker/services.py +++ b/cppa_slack_tracker/services.py @@ -23,8 +23,11 @@ from .models import ( SlackChannel, SlackChannelPrivate, + SlackChannelPrivateType, SlackChannelMembership, SlackChannelMembershipChangeLog, + SlackChannelMembershipChangeLogPrivate, + SlackChannelMembershipPrivate, SlackMessage, SlackMessagePrivate, SlackTeam, @@ -32,6 +35,19 @@ logger = logging.getLogger(__name__) +# Non-public channels that store membership in SlackChannelMembership*Private +# (excludes `im`: DMs are not synced here). +_PRIVATE_MEMBERSHIP_CHANNEL_TYPES = frozenset( + { + SlackChannelPrivateType.PRIVATE_CHANNEL, + SlackChannelPrivateType.MPIM, + } +) + + +def _private_channel_tracks_membership(channel: SlackChannelPrivate) -> bool: + return channel.channel_type in _PRIVATE_MEMBERSHIP_CHANNEL_TYPES + # Slack message subtypes to ignore SUBTYPE_IGNORE = [ @@ -275,6 +291,86 @@ def sync_channel_memberships(channel: SlackChannel, member_ids: list[str]) -> No continue +# --- SlackChannelMembershipPrivate (private_channel, mpim only) --- +@transaction.atomic +def add_channel_membership_change_private( + channel: SlackChannelPrivate, + slack_user_id: str, + ts: str, + is_joined: bool, +) -> SlackChannelMembershipChangeLogPrivate: + """Record join/leave for a non-public channel; updates current membership row.""" + if not _private_channel_tracks_membership(channel): + raise ValueError( + "Membership is only tracked for private_channel and mpim conversations" + ) + try: + user = SlackUser.objects.get(slack_user_id=slack_user_id) + except SlackUser.DoesNotExist: + raise ValueError(f"User {slack_user_id} not found") + created_at = _parse_slack_ts_string(ts) + change_log, _ = SlackChannelMembershipChangeLogPrivate.objects.get_or_create( + channel=channel, + user=user, + created_at=created_at, + defaults={"is_joined": is_joined}, + ) + if change_log.is_joined != is_joined: + change_log.is_joined = is_joined + change_log.save(update_fields=["is_joined"]) + if is_joined: + membership, _ = SlackChannelMembershipPrivate.objects.get_or_create( + channel=channel, + user=user, + defaults={"is_deleted": False}, + ) + if membership.is_deleted: + membership.is_deleted = False + membership.save() + else: + SlackChannelMembershipPrivate.objects.filter(channel=channel, user=user).update( + is_deleted=True + ) + return change_log + + +@transaction.atomic +def sync_channel_memberships_private( + channel: SlackChannelPrivate, member_ids: list[str] +) -> None: + """Sync memberships for a private_channel or mpim; no-op if channel type is not supported.""" + if not _private_channel_tracks_membership(channel): + return + existing_memberships = SlackChannelMembershipPrivate.objects.filter( + channel=channel, + is_deleted=False, + ).select_related("user") + existing_user_ids = {m.user.slack_user_id for m in existing_memberships} + new_member_ids = set(member_ids) - existing_user_ids + removed_member_ids = existing_user_ids - set(member_ids) + for user_id in new_member_ids: + try: + user = SlackUser.objects.get(slack_user_id=user_id) + membership, created = SlackChannelMembershipPrivate.objects.get_or_create( + channel=channel, + user=user, + defaults={"is_deleted": False}, + ) + if not created and membership.is_deleted: + membership.is_deleted = False + membership.save() + except SlackUser.DoesNotExist: + continue + for user_id in removed_member_ids: + try: + user = SlackUser.objects.get(slack_user_id=user_id) + SlackChannelMembershipPrivate.objects.filter( + channel=channel, user=user + ).update(is_deleted=True) + except SlackUser.DoesNotExist: + continue + + # --- SlackMessage --- def _message_text_for_subtype( slack_message: dict[str, Any], subtype: str @@ -400,13 +496,48 @@ def save_slack_message_private( Save or update a Slack message for a non-public channel. Same rules as save_slack_message, but persists to SlackMessagePrivate. - channel_join / channel_leave are ignored here: membership for non-public - channels is not modeled on SlackChannelMembership (public SlackChannel only). + For private_channel and mpim, channel_join / channel_leave update + SlackChannelMembershipPrivate / SlackChannelMembershipChangeLogPrivate. + Join/leave on im is ignored (no membership rows for DMs). """ subtype = slack_message.get("subtype") if subtype in SUBTYPE_IGNORE: return None - if subtype in ("channel_join", "channel_leave"): + if subtype == "channel_join": + if not _private_channel_tracks_membership(channel): + return None + event_ts = slack_message.get("ts") + if not event_ts: + logger.warning("Skipping channel_join without ts (private channel)") + return None + if slack_message.get("user"): + user = _get_or_fetch_slack_user( + slack_message["user"], team_id=channel.team.team_id + ) + add_channel_membership_change_private( + channel, + user.slack_user_id, + event_ts, + True, + ) + return None + if subtype == "channel_leave": + if not _private_channel_tracks_membership(channel): + return None + event_ts = slack_message.get("ts") + if not event_ts: + logger.warning("Skipping channel_leave without ts (private channel)") + return None + if slack_message.get("user"): + user = _get_or_fetch_slack_user( + slack_message["user"], team_id=channel.team.team_id + ) + add_channel_membership_change_private( + channel, + user.slack_user_id, + event_ts, + False, + ) return None user: Optional[SlackUser] = None diff --git a/cppa_slack_tracker/slack_oauth_server.py b/cppa_slack_tracker/slack_oauth_server.py index 309cbf73..315e4cdb 100644 --- a/cppa_slack_tracker/slack_oauth_server.py +++ b/cppa_slack_tracker/slack_oauth_server.py @@ -28,13 +28,22 @@ from starlette.responses import HTMLResponse, RedirectResponse from starlette.routing import Route -# Load .env from project root (src/ingestion -> src -> project root) -_ROOT = Path(__file__).resolve().parent.parent.parent -load_dotenv(_ROOT / ".env") +# Repo root: cppa_slack_tracker/slack_oauth_server.py -> parent.parent +_REPO_ROOT = Path(__file__).resolve().parent.parent +load_dotenv(_REPO_ROOT / ".env") logger = logging.getLogger(__name__) -TOKENS_FILE = _ROOT / "credential" / "slack_user_tokens.json" + +def _slack_user_tokens_file() -> Path: + """Path to JSON map of OAuth user tokens; override with SLACK_USER_TOKENS_PATH.""" + override = (os.environ.get("SLACK_USER_TOKENS_PATH") or "").strip() + if override: + return Path(override) + return _REPO_ROOT / "credential" / "slack_user_tokens.json" + + +TOKENS_FILE = _slack_user_tokens_file() # CSRF: one-time OAuth state values (server-side). Values are secrets.token_urlsafe; # each maps to unix expiry time. Prevents replay after TTL via expiry + pop-on-use. diff --git a/cppa_slack_tracker/sync/sync_channel_user.py b/cppa_slack_tracker/sync/sync_channel_user.py index 75f1a9c7..5a773323 100644 --- a/cppa_slack_tracker/sync/sync_channel_user.py +++ b/cppa_slack_tracker/sync/sync_channel_user.py @@ -2,9 +2,10 @@ Sync Slack channel memberships with the database. Fetches channel member lists via cppa_slack_tracker.fetcher.fetch_channel_user_list -and syncs memberships to the database. Use get_channels_to_sync() to get the list -of channels to sync (one or all in a team); get_private_channels_to_sync does -the same for SlackChannelPrivate. run_cppa_slack_tracker uses both for message sync. +and syncs memberships to the database. Public channels use SlackChannelMembership; +private_channel and mpim use SlackChannelMembershipPrivate (im is skipped). + +Use get_channels_to_sync() / get_private_channels_to_sync() for message sync lists. """ from __future__ import annotations @@ -13,8 +14,16 @@ from typing import Optional from cppa_slack_tracker.fetcher import fetch_channel_user_list -from cppa_slack_tracker.models import SlackChannel, SlackChannelPrivate, SlackTeam -from cppa_slack_tracker.services import sync_channel_memberships +from cppa_slack_tracker.models import ( + SlackChannel, + SlackChannelPrivate, + SlackChannelPrivateType, + SlackTeam, +) +from cppa_slack_tracker.services import ( + sync_channel_memberships, + sync_channel_memberships_private, +) logger = logging.getLogger(__name__) @@ -79,9 +88,12 @@ def sync_channel_users( """ Sync channel memberships for all (or one) channels in a team. - Uses get_channels_to_sync(team, channel_id=channel_id) to get the channel - list, then for each channel fetches member IDs from the Slack API and - syncs memberships to the database. Returns (success_count, error_count). + Public channels: get_channels_to_sync → SlackChannelMembership. + + Non-public private_channel and mpim: get_private_channels_to_sync → + SlackChannelMembershipPrivate (skips im). + + Returns (success_count, error_count). """ channels = get_channels_to_sync(team, channel_id=channel_id) success_count = 0 @@ -98,4 +110,26 @@ def sync_channel_users( "Failed to fetch/sync members for %s: %s", channel.channel_id, e ) error_count += 1 + + private_channels = get_private_channels_to_sync(team, channel_id=channel_id) + membership_private = [ + c + for c in private_channels + if c.channel_type + in (SlackChannelPrivateType.PRIVATE_CHANNEL, SlackChannelPrivateType.MPIM) + ] + for channel in membership_private: + try: + member_ids = fetch_channel_user_list( + channel.channel_id, team_id=channel.team.team_id + ) + sync_channel_memberships_private(channel, member_ids) + success_count += 1 + except Exception as e: + logger.exception( + "Failed to fetch/sync members for private/mpim %s: %s", + channel.channel_id, + e, + ) + error_count += 1 return success_count, error_count diff --git a/cppa_slack_tracker/sync/sync_message.py b/cppa_slack_tracker/sync/sync_message.py index e976f224..d29bdab6 100644 --- a/cppa_slack_tracker/sync/sync_message.py +++ b/cppa_slack_tracker/sync/sync_message.py @@ -24,9 +24,12 @@ import logging from collections import defaultdict from datetime import date, datetime, timedelta, timezone -from typing import Optional +from typing import TYPE_CHECKING, Optional from django.db.models import F + +if TYPE_CHECKING: + from operations.slack_ops.client import SlackAPIClient from django.db.models.functions import Coalesce from cppa_slack_tracker.fetcher import fetch_messages @@ -170,6 +173,8 @@ def sync_messages( channel: SlackChannel | SlackChannelPrivate, start_date: date | datetime | None = None, end_date: date | datetime | None = None, + *, + client: Optional["SlackAPIClient"] = None, ) -> tuple[int, int]: """ Sync messages for a channel over a date range (UTC). @@ -185,6 +190,9 @@ def sync_messages( - Write JSON to workspace. Merge into raw file by ts (same ts → update, new ts → add). - Process workspace → save to DB → remove workspace file. + If ``client`` is set (e.g. user OAuth token for IMs), it is used for + ``conversations.history``; otherwise the workspace bot token is used. + Returns (success_count, error_count). """ today = datetime.now(timezone.utc).date() @@ -217,7 +225,11 @@ def sync_messages( # Step 2: fetch messages ([start_date, end_date] or all up to end_date if start_date is None) try: all_messages = fetch_messages( - channel_id, start_date, end_date, team_id=channel.team.team_id + channel_id, + start_date, + end_date, + client=client, + team_id=channel.team.team_id, ) except Exception: logger.exception( diff --git a/cppa_slack_tracker/tests/fixtures.py b/cppa_slack_tracker/tests/fixtures.py index d23c6c6a..62471f25 100644 --- a/cppa_slack_tracker/tests/fixtures.py +++ b/cppa_slack_tracker/tests/fixtures.py @@ -8,6 +8,7 @@ from cppa_slack_tracker.models import ( SlackTeam, SlackChannel, + SlackChannelPrivate, SlackMessage, SlackChannelMembership, ) @@ -78,6 +79,34 @@ def sample_slack_channel(db, sample_slack_team, sample_slack_user): ) +@pytest.fixture +def sample_slack_channel_private(db, sample_slack_team, sample_slack_user): + """Create a sample private Slack channel (private_channel).""" + _ = db + return SlackChannelPrivate.objects.create( + team=sample_slack_team, + channel_id="G012PRIVATE1", + channel_name="private-team", + channel_type="private_channel", + description="Private stuff", + creator=sample_slack_user, + ) + + +@pytest.fixture +def sample_slack_channel_im(db, sample_slack_team, sample_slack_user): + """Create a sample IM channel (membership not tracked).""" + _ = db + return SlackChannelPrivate.objects.create( + team=sample_slack_team, + channel_id="D01IM00001", + channel_name="someone", + channel_type="im", + description="", + creator=sample_slack_user, + ) + + @pytest.fixture def _sample_slack_channel(sample_slack_channel): """Alias for tests that need DB state but do not use the fixture value (silences ARG002).""" diff --git a/cppa_slack_tracker/tests/test_services.py b/cppa_slack_tracker/tests/test_services.py index 28267fbe..d9ad7144 100644 --- a/cppa_slack_tracker/tests/test_services.py +++ b/cppa_slack_tracker/tests/test_services.py @@ -9,14 +9,19 @@ get_or_create_slack_channel, get_or_create_slack_team, add_channel_membership_change, + add_channel_membership_change_private, save_slack_message, + save_slack_message_private, sync_channel_memberships, + sync_channel_memberships_private, _parse_slack_ts_string, ) from cppa_slack_tracker.models import ( SlackChannelPrivate, SlackChannelMembership, SlackChannelMembershipChangeLog, + SlackChannelMembershipPrivate, + SlackChannelMembershipChangeLogPrivate, ) from cppa_user_tracker.models import Email, SlackUser @@ -331,3 +336,110 @@ def test_save_slack_message_unknown_user_fetches_info_and_creates_unknown_withou calls = [c[0][0] for c in mock_fetch.call_args_list] assert "U99999999" in calls assert "-1" not in calls + + def test_add_channel_membership_change_private( + self, sample_slack_channel_private, sample_slack_user + ): + """Join/leave for private_channel updates private membership tables.""" + log = add_channel_membership_change_private( + sample_slack_channel_private, + "U12345678", + "1609459200.123456", + is_joined=True, + ) + assert log.channel == sample_slack_channel_private + assert log.user == sample_slack_user + assert log.is_joined + m = SlackChannelMembershipPrivate.objects.get( + channel=sample_slack_channel_private, + user=sample_slack_user, + ) + assert not m.is_deleted + + def test_add_channel_membership_change_private_rejects_im( + self, sample_slack_channel_im, sample_slack_user + ): + """Membership helpers are not used for im channels.""" + with pytest.raises(ValueError, match="private_channel and mpim"): + add_channel_membership_change_private( + sample_slack_channel_im, + sample_slack_user.slack_user_id, + "1609459200.123456", + is_joined=True, + ) + + def test_save_slack_message_private_channel_join( + self, sample_slack_channel_private, sample_slack_user + ): + """channel_join on private_channel records membership; no SlackMessagePrivate row.""" + message_data = { + "user": "U12345678", + "text": "joined the channel", + "ts": "1609459200.123456", + "subtype": "channel_join", + } + msg = save_slack_message_private( + sample_slack_channel_private, + message_data, + ) + assert msg is None + assert SlackChannelMembershipChangeLogPrivate.objects.filter( + channel=sample_slack_channel_private, + user=sample_slack_user, + is_joined=True, + ).exists() + + def test_save_slack_message_private_channel_join_skipped_for_im( + self, sample_slack_channel_im, sample_slack_user + ): + """channel_join on im does not create private membership rows.""" + message_data = { + "user": "U12345678", + "text": "joined", + "ts": "1609459200.123456", + "subtype": "channel_join", + } + msg = save_slack_message_private(sample_slack_channel_im, message_data) + assert msg is None + assert not SlackChannelMembershipChangeLogPrivate.objects.filter( + channel=sample_slack_channel_im, + user=sample_slack_user, + ).exists() + + def test_sync_channel_memberships_private( + self, sample_slack_channel_private, sample_slack_user, sample_identity + ): + """sync_channel_memberships_private adds/removes rows for private_channel.""" + user2 = SlackUser.objects.create( + identity=sample_identity, + slack_user_id="U22222222", + username="user2", + ) + SlackChannelMembershipPrivate.objects.create( + channel=sample_slack_channel_private, + user=sample_slack_user, + ) + sync_channel_memberships_private( + sample_slack_channel_private, + ["U22222222"], + ) + m1 = SlackChannelMembershipPrivate.objects.get( + channel=sample_slack_channel_private, user=sample_slack_user + ) + assert m1.is_deleted + m2 = SlackChannelMembershipPrivate.objects.get( + channel=sample_slack_channel_private, user=user2 + ) + assert not m2.is_deleted + + def test_sync_channel_memberships_private_noop_for_im( + self, sample_slack_channel_im, sample_slack_user + ): + """Member list sync does not persist rows for im.""" + sync_channel_memberships_private( + sample_slack_channel_im, + [sample_slack_user.slack_user_id], + ) + assert not SlackChannelMembershipPrivate.objects.filter( + channel=sample_slack_channel_im + ).exists() diff --git a/cppa_slack_tracker/user_tokens.py b/cppa_slack_tracker/user_tokens.py new file mode 100644 index 00000000..858ed3bc --- /dev/null +++ b/cppa_slack_tracker/user_tokens.py @@ -0,0 +1,67 @@ +""" +Load Slack user OAuth tokens written by slack_oauth_server (credential file). + +Same path as [slack_oauth_server](slack_oauth_server.py): optional SLACK_USER_TOKENS_PATH, +else BASE_DIR/credential/slack_user_tokens.json. +""" + +from __future__ import annotations + +import json +import logging +import os +from pathlib import Path +from typing import Iterator + +logger = logging.getLogger(__name__) + + +def slack_user_tokens_path() -> Path: + """Absolute path to slack_user_tokens.json.""" + override = (os.environ.get("SLACK_USER_TOKENS_PATH") or "").strip() + if override: + return Path(override) + from django.conf import settings + + return Path(settings.BASE_DIR) / "credential" / "slack_user_tokens.json" + + +def load_slack_user_tokens() -> dict[str, dict]: + """ + Return the raw JSON object: keys are ``team_id:user_id`` (or ``user_id``); + values include user_id, team_id, access_token. + """ + path = slack_user_tokens_path() + if not path.exists(): + return {} + try: + with path.open(encoding="utf-8") as f: + data = json.load(f) + except (OSError, json.JSONDecodeError) as exc: + logger.warning("Could not load Slack user tokens from %s: %s", path, exc) + return {} + if not isinstance(data, dict): + return {} + return data # type: ignore[return-value] + + +def iter_user_tokens_for_team(team_id: str) -> Iterator[tuple[str, str]]: + """ + Yield (slack_user_id, access_token) for rows whose team_id matches ``team_id``. + + Skips malformed entries or empty tokens. + """ + tid = (team_id or "").strip() + if not tid: + return + for _key, row in load_slack_user_tokens().items(): + if not isinstance(row, dict): + continue + uid = (row.get("user_id") or "").strip() + token = (row.get("access_token") or "").strip() + row_team = (row.get("team_id") or "").strip() + if not uid or not token: + continue + if row_team != tid: + continue + yield (uid, token) diff --git a/requirements-dev.txt b/requirements-dev.txt index 0b2f596c..c12e0507 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,6 +1,9 @@ # Include production dependencies -r requirements.txt +# slack_oauth_server (python -m cppa_slack_tracker.slack_oauth_server): httpx, starlette, +# uvicorn, python-dotenv are listed in requirements.txt — pip install -r requirements-dev.txt + # Test stack (see docs/testing or project testing summary) pytest>=7.4 pytest-django>=4.5 diff --git a/requirements.txt b/requirements.txt index 34f77e77..989d7e78 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,6 +24,12 @@ slack-bolt>=1.18 pytz>=2024.1 selenium>=4.35 +# cppa_slack_tracker slack_oauth_server (user OAuth; python -m cppa_slack_tracker.slack_oauth_server) +httpx>=0.27 +python-dotenv>=1.0 +starlette>=0.37 +uvicorn[standard]>=0.30 + # cppa_youtube_script_tracker app (YouTube Data API v3 + VTT transcript download) google-api-python-client>=2.100 yt-dlp==2026.2.4 From 11302e748efd63fae401ea5cc265c53797116528 Mon Sep 17 00:00:00 2001 From: AuraMindNest Date: Thu, 9 Apr 2026 11:08:05 -0600 Subject: [PATCH 4/7] Update due to CI failure. --- conftest.py | 14 +++++++++++ .../commands/run_cppa_slack_tracker.py | 20 ++++----------- ...am_team_id_slackchannelprivate_and_more.py | 8 ++++-- cppa_slack_tracker/slack_oauth_server.py | 25 +++++++++++++------ cppa_slack_tracker/sync/sync_channel_user.py | 8 ++---- cppa_slack_tracker/sync/sync_message.py | 4 +-- cppa_slack_tracker/tests/test_services.py | 4 +-- 7 files changed, 47 insertions(+), 36 deletions(-) diff --git a/conftest.py b/conftest.py index 6701adc1..140e9057 100644 --- a/conftest.py +++ b/conftest.py @@ -21,8 +21,22 @@ def __copy__(self): BaseContext.__copy__ = __copy__ +def _ensure_slack_private_schema_on_postgres_connect(): + """Create slack_private before table sync when pytest uses --no-migrations.""" + from django.db.backends.signals import connection_created + + def on_connection_created(sender, connection, **kwargs): + if connection.vendor != "postgresql": + return + with connection.cursor() as cursor: + cursor.execute("CREATE SCHEMA IF NOT EXISTS slack_private;") + + connection_created.connect(on_connection_created) + + def pytest_configure(config): # noqa: F841 (pytest hook; name must match spec) _patch_django_context_copy_py314() + _ensure_slack_private_schema_on_postgres_connect() # Load app-level fixture modules so fixtures from each app are available everywhere. diff --git a/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py b/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py index d1407070..23e7f927 100644 --- a/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py +++ b/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py @@ -310,9 +310,7 @@ def sync_messages(self, options, team: SlackTeam): """ channel_id_opt = (options.get("channel_id") or "").strip() or None channels = get_channels_to_sync(team, channel_id=channel_id_opt) - private_channels = get_private_channels_to_sync( - team, channel_id=channel_id_opt - ) + private_channels = get_private_channels_to_sync(team, channel_id=channel_id_opt) has_user_tokens = any(iter_user_tokens_for_team(team.team_id)) if not channels and not private_channels and not has_user_tokens: logger.warning( @@ -336,9 +334,7 @@ def sync_messages(self, options, team: SlackTeam): len(all_loaded), ) channel_by_id = {c.channel_id: c for c in channels} - channel_by_id.update( - {c.channel_id: c for c in private_channels} - ) + channel_by_id.update({c.channel_id: c for c in private_channels}) load_failures = 0 for msg in all_loaded: if not isinstance(msg, dict): @@ -427,13 +423,9 @@ def _sync_im_messages_user_tokens( len(tokens), ) for user_slack_id, access_token in tokens: - user_client = get_slack_client( - bot_token=access_token, team_id=team.team_id - ) + user_client = get_slack_client(bot_token=access_token, team_id=team.team_id) try: - im_channels = fetch_im_channel_list_for_user( - team.team_id, access_token - ) + im_channels = fetch_im_channel_list_for_user(team.team_id, access_token) except Exception: logger.exception( "Failed to list IM channels for user %s", user_slack_id @@ -456,9 +448,7 @@ def _sync_im_messages_user_tokens( if ch_obj is None: continue except Exception: - logger.exception( - "Failed to upsert IM channel %s", ch.get("id") - ) + logger.exception("Failed to upsert IM channel %s", ch.get("id")) continue try: diff --git a/cppa_slack_tracker/migrations/0004_alter_slackteam_team_id_slackchannelprivate_and_more.py b/cppa_slack_tracker/migrations/0004_alter_slackteam_team_id_slackchannelprivate_and_more.py index bd963645..fe1dd08a 100644 --- a/cppa_slack_tracker/migrations/0004_alter_slackteam_team_id_slackchannelprivate_and_more.py +++ b/cppa_slack_tracker/migrations/0004_alter_slackteam_team_id_slackchannelprivate_and_more.py @@ -8,11 +8,15 @@ class Migration(migrations.Migration): dependencies = [ - ('cppa_user_tracker', '0008_alter_youtubespeaker_display_name'), - ('cppa_slack_tracker', '0003_alter_slackchannel_unique_team_channel_id'), + ("cppa_user_tracker", "0007_youtubespeaker_external_id"), + ("cppa_slack_tracker", "0003_alter_slackchannel_unique_team_channel_id"), ] operations = [ + migrations.RunSQL( + sql='CREATE SCHEMA IF NOT EXISTS slack_private;', + reverse_sql=migrations.RunSQL.noop, + ), migrations.AlterField( model_name='slackteam', name='team_id', diff --git a/cppa_slack_tracker/slack_oauth_server.py b/cppa_slack_tracker/slack_oauth_server.py index 315e4cdb..e1cf6953 100644 --- a/cppa_slack_tracker/slack_oauth_server.py +++ b/cppa_slack_tracker/slack_oauth_server.py @@ -83,7 +83,8 @@ def _consume_oauth_state(state: str | None) -> bool: # Static HTML responses (predefined at import time) # --------------------------------------------------------------------------- -_HTML_INDEX = HTMLResponse(content=""" +_HTML_INDEX = HTMLResponse( + content=""" WG21 Paralegal — Slack OAuth @@ -97,7 +98,8 @@ def _consume_oauth_state(state: str | None) -> bool:

Slack redirects to /slack/oauth/callback after authorization.

- """) + """ +) _HTML_MISSING_CODE = HTMLResponse( content=""" @@ -113,7 +115,8 @@ def _consume_oauth_state(state: str | None) -> bool: status_code=400, ) -_HTML_SUCCESS = HTMLResponse(content=""" +_HTML_SUCCESS = HTMLResponse( + content=""" Slack OAuth @@ -124,13 +127,15 @@ def _consume_oauth_state(state: str | None) -> bool:

Token stored in slack_user_tokens.json.

- """) + """ +) # --------------------------------------------------------------------------- # Dynamic HTML response builders (depend on runtime values) # --------------------------------------------------------------------------- + def _html_auth_error(error: str) -> HTMLResponse: safe_error = escape(error) return HTMLResponse( @@ -175,7 +180,8 @@ def _html_invalid_oauth_state() -> HTMLResponse: def _html_authorized(table_body: str) -> HTMLResponse: - return HTMLResponse(content=f""" + return HTMLResponse( + content=f""" Authorized users @@ -192,13 +198,15 @@ def _html_authorized(table_body: str) -> HTMLResponse:

Add another user

- """) + """ + ) # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- + def _get_env(key: str) -> str: val = (os.environ.get(key) or "").strip() # If the value accidentally includes "KEY=" (e.g. pasted full .env line), strip the prefix. @@ -305,7 +313,10 @@ async def _exchange_code(code: str) -> dict: # type: ignore[type-arg] return {"ok": False, "error": f"Invalid response from Slack (not JSON): {exc}"} if not isinstance(data, dict): - return {"ok": False, "error": "Unexpected response from Slack (expected JSON object)."} + return { + "ok": False, + "error": "Unexpected response from Slack (expected JSON object).", + } return data diff --git a/cppa_slack_tracker/sync/sync_channel_user.py b/cppa_slack_tracker/sync/sync_channel_user.py index 5a773323..a1035018 100644 --- a/cppa_slack_tracker/sync/sync_channel_user.py +++ b/cppa_slack_tracker/sync/sync_channel_user.py @@ -66,18 +66,14 @@ def get_private_channels_to_sync( """ if channel_id: try: - return [ - SlackChannelPrivate.objects.get(team=team, channel_id=channel_id) - ] + return [SlackChannelPrivate.objects.get(team=team, channel_id=channel_id)] except SlackChannelPrivate.DoesNotExist: logger.warning( "Private channel %s not found in team %s; syncing all non-public channels.", channel_id, team.team_id, ) - return list( - SlackChannelPrivate.objects.filter(team=team).order_by("channel_id") - ) + return list(SlackChannelPrivate.objects.filter(team=team).order_by("channel_id")) def sync_channel_users( diff --git a/cppa_slack_tracker/sync/sync_message.py b/cppa_slack_tracker/sync/sync_message.py index d29bdab6..4c928f0d 100644 --- a/cppa_slack_tracker/sync/sync_message.py +++ b/cppa_slack_tracker/sync/sync_message.py @@ -80,9 +80,7 @@ def _messages_by_day( return dict(by_day) -def _process_message( - channel: SlackChannel | SlackChannelPrivate, msg: dict -) -> bool: +def _process_message(channel: SlackChannel | SlackChannelPrivate, msg: dict) -> bool: """ Process one message: save to SlackMessage or SlackMessagePrivate. Returns True if saved, False if skipped (e.g. ignored subtype). Raises on error. diff --git a/cppa_slack_tracker/tests/test_services.py b/cppa_slack_tracker/tests/test_services.py index d9ad7144..94c25926 100644 --- a/cppa_slack_tracker/tests/test_services.py +++ b/cppa_slack_tracker/tests/test_services.py @@ -111,9 +111,7 @@ def test_add_slack_channel( assert channel.description == "Random discussions" assert channel.creator == sample_slack_user - def test_add_slack_channel_private( - self, sample_slack_team, sample_slack_user - ): + def test_add_slack_channel_private(self, sample_slack_team, sample_slack_user): """Non-public channels are stored in SlackChannelPrivate.""" data = { "id": "G012PRIVATE1", From e9c5b1c39839e88e42c99042eedd40c1610fa7b2 Mon Sep 17 00:00:00 2001 From: AuraMindNest Date: Thu, 9 Apr 2026 11:23:38 -0600 Subject: [PATCH 5/7] Update again --- conftest.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/conftest.py b/conftest.py index 140e9057..b4791a5c 100644 --- a/conftest.py +++ b/conftest.py @@ -21,17 +21,25 @@ def __copy__(self): BaseContext.__copy__ = __copy__ +def _django_connection_created_create_slack_private_schema( + sender, connection, **kwargs +): + """Create slack_private before table sync when pytest uses --no-migrations. + + Must be a module-level function: Django's Signal.connect() defaults to weak=True, + so a nested handler can be garbage-collected after pytest_configure returns and + never run on CI (e.g. Python 3.13 + full suite). + """ + if connection.vendor != "postgresql": + return + with connection.cursor() as cursor: + cursor.execute("CREATE SCHEMA IF NOT EXISTS slack_private;") + + def _ensure_slack_private_schema_on_postgres_connect(): - """Create slack_private before table sync when pytest uses --no-migrations.""" from django.db.backends.signals import connection_created - def on_connection_created(sender, connection, **kwargs): - if connection.vendor != "postgresql": - return - with connection.cursor() as cursor: - cursor.execute("CREATE SCHEMA IF NOT EXISTS slack_private;") - - connection_created.connect(on_connection_created) + connection_created.connect(_django_connection_created_create_slack_private_schema) def pytest_configure(config): # noqa: F841 (pytest hook; name must match spec) From f58b0529711647ae0c6ad9e5722e380661693c83 Mon Sep 17 00:00:00 2001 From: AuraMindNest Date: Thu, 9 Apr 2026 14:18:43 -0600 Subject: [PATCH 6/7] Update due to the coderabbitai review. --- .env.example | 7 ++--- .../commands/run_cppa_slack_tracker.py | 15 +++++++++++ cppa_slack_tracker/services.py | 6 +++-- cppa_slack_tracker/slack_oauth_server.py | 27 +++++++++++++------ requirements.txt | 2 +- 5 files changed, 43 insertions(+), 14 deletions(-) diff --git a/.env.example b/.env.example index 61baee45..b319f075 100644 --- a/.env.example +++ b/.env.example @@ -234,14 +234,15 @@ DATABASE_URL=postgres://user:password@localhost:5432/boost_dashboard # Open https://api.slack.com/apps → your app → OAuth & Permissions: # - Add User Token Scopes (e.g. im:read, im:history, mpim:read, mpim:history, …). # - Redirect URLs: must match SLACK_REDIRECT_URI exactly (e.g. https://your-host/slack/oauth/callback). -# After a user visits /slack/connect and authorizes, tokens are stored under credential/slack_user_tokens.json -# (same path Django uses via cppa_slack_tracker.user_tokens for run_cppa_slack_tracker IM sync). +# After a user visits /slack/connect and authorizes, tokens are stored under +# /credential/slack_user_tokens.json +# (align SLACK_USER_TOKENS_PATH with Django cppa_slack_tracker.user_tokens / BASE_DIR if they differ). # SLACK_CLIENT_ID=10473611477057.xxxxxxxxx # SLACK_CLIENT_SECRET=xxxxxxxx # SLACK_REDIRECT_URI=https://your-public-host.example.com/slack/oauth/callback # Optional: comma-separated user scopes (defaults in slack_oauth_server match IM + channels + groups + mpim). # SLACK_USER_SCOPES=channels:history,channels:read,groups:history,groups:read,im:history,im:read,mpim:history,mpim:read -# Optional: override JSON path for user tokens (default: /credential/slack_user_tokens.json). +# Optional: override JSON path for user tokens (default: same dir as .env → credential/slack_user_tokens.json). # SLACK_USER_TOKENS_PATH=/absolute/path/to/slack_user_tokens.json # Optional: OAuth CSRF state TTL (seconds), server bind, debug. # OAUTH_STATE_TTL_S=600 diff --git a/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py b/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py index 23e7f927..4b4f2c15 100644 --- a/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py +++ b/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py @@ -422,6 +422,9 @@ def _sync_im_messages_user_tokens( "Syncing IM channels with user tokens (%s authorized user(s))...", len(tokens), ) + # Same IM (D…) appears in every participant's conversations.list; sync once + # per channel per run using the first token that lists it. + seen_channel_ids: set[str] = set() for user_slack_id, access_token in tokens: user_client = get_slack_client(bot_token=access_token, team_id=team.team_id) try: @@ -442,13 +445,24 @@ def _sync_im_messages_user_tokens( for ch in im_channels: if not isinstance(ch, dict) or not ch.get("id"): continue + ch_id = ch["id"] + if ch_id in seen_channel_ids: + logger.debug( + "Skipping IM channel %s for user %s (already synced this run)", + ch_id, + user_slack_id, + ) + continue + seen_channel_ids.add(ch_id) try: pub, priv, _created = get_or_create_slack_channel(ch, team) ch_obj = pub or priv if ch_obj is None: + seen_channel_ids.discard(ch_id) continue except Exception: logger.exception("Failed to upsert IM channel %s", ch.get("id")) + seen_channel_ids.discard(ch_id) continue try: @@ -471,6 +485,7 @@ def _sync_im_messages_user_tokens( ch.get("id"), user_slack_id, ) + seen_channel_ids.discard(ch_id) def sync_to_pinecone(self, team: SlackTeam): """Sync Slack messages to Pinecone after message sync.""" diff --git a/cppa_slack_tracker/services.py b/cppa_slack_tracker/services.py index 9790910a..9f98db6c 100644 --- a/cppa_slack_tracker/services.py +++ b/cppa_slack_tracker/services.py @@ -444,7 +444,8 @@ def save_slack_message( if isinstance(comment, dict): text += f"\nComment: {comment.get('comment', '')}" elif subtype: - text = _message_text_for_subtype(slack_message, subtype) or "" + subtype_text = _message_text_for_subtype(slack_message, subtype) + text = subtype_text or slack_message.get("text", "") else: text = slack_message.get("text", "") @@ -551,7 +552,8 @@ def save_slack_message_private( if isinstance(comment, dict): text += f"\nComment: {comment.get('comment', '')}" elif subtype: - text = _message_text_for_subtype(slack_message, subtype) or "" + subtype_text = _message_text_for_subtype(slack_message, subtype) + text = subtype_text or slack_message.get("text", "") else: text = slack_message.get("text", "") diff --git a/cppa_slack_tracker/slack_oauth_server.py b/cppa_slack_tracker/slack_oauth_server.py index e1cf6953..8f323279 100644 --- a/cppa_slack_tracker/slack_oauth_server.py +++ b/cppa_slack_tracker/slack_oauth_server.py @@ -1,7 +1,8 @@ """ Slack user OAuth server -Loads .env from project root. +Loads `.env` from a fixed path beside this package; default token file is under +the same directory as that `.env` (see `_SLACK_OAUTH_DOTENV_PATH`). - GET / → landing page with links (root is not Slack's callback) - GET /slack/connect → Slack authorize URL with CSRF `state` (server-side, TTL) @@ -28,9 +29,10 @@ from starlette.responses import HTMLResponse, RedirectResponse from starlette.routing import Route -# Repo root: cppa_slack_tracker/slack_oauth_server.py -> parent.parent -_REPO_ROOT = Path(__file__).resolve().parent.parent -load_dotenv(_REPO_ROOT / ".env") +# Default .env: cppa_slack_tracker/ -> project root .env +_SLACK_OAUTH_DOTENV_PATH = Path(__file__).resolve().parent.parent / ".env" +load_dotenv(_SLACK_OAUTH_DOTENV_PATH) +_DOTENV_DIR = _SLACK_OAUTH_DOTENV_PATH.parent logger = logging.getLogger(__name__) @@ -40,7 +42,7 @@ def _slack_user_tokens_file() -> Path: override = (os.environ.get("SLACK_USER_TOKENS_PATH") or "").strip() if override: return Path(override) - return _REPO_ROOT / "credential" / "slack_user_tokens.json" + return _DOTENV_DIR / "credential" / "slack_user_tokens.json" TOKENS_FILE = _slack_user_tokens_file() @@ -87,7 +89,7 @@ def _consume_oauth_state(state: str | None) -> bool: content=""" - WG21 Paralegal — Slack OAuth + Boost data collector — Slack OAuth

Slack OAuth helper

Use these links:

@@ -222,8 +224,17 @@ def _get_env(key: str) -> str: def _load_tokens() -> dict[str, dict]: # type: ignore[type-arg] if not TOKENS_FILE.exists(): return {} - with TOKENS_FILE.open(encoding="utf-8") as f: - return json.load(f) # type: ignore[no-any-return] + try: + with TOKENS_FILE.open(encoding="utf-8") as f: + data = json.load(f) + except (OSError, json.JSONDecodeError) as exc: + logger.warning( + "Could not load Slack user tokens from %s: %s", TOKENS_FILE, exc + ) + return {} + if not isinstance(data, dict): + return {} + return data # type: ignore[no-any-return] def _ensure_tokens_dir() -> None: diff --git a/requirements.txt b/requirements.txt index 989d7e78..5d66be51 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,7 +27,7 @@ selenium>=4.35 # cppa_slack_tracker slack_oauth_server (user OAuth; python -m cppa_slack_tracker.slack_oauth_server) httpx>=0.27 python-dotenv>=1.0 -starlette>=0.37 +starlette>=0.47.2 uvicorn[standard]>=0.30 # cppa_youtube_script_tracker app (YouTube Data API v3 + VTT transcript download) From c06aabfd369069ea52252831564a4a780cc08cc7 Mon Sep 17 00:00:00 2001 From: AuraMindNest Date: Thu, 9 Apr 2026 14:37:07 -0600 Subject: [PATCH 7/7] Fix lint errors. --- cppa_slack_tracker/slack_oauth_server.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cppa_slack_tracker/slack_oauth_server.py b/cppa_slack_tracker/slack_oauth_server.py index 8f323279..17158356 100644 --- a/cppa_slack_tracker/slack_oauth_server.py +++ b/cppa_slack_tracker/slack_oauth_server.py @@ -228,9 +228,7 @@ def _load_tokens() -> dict[str, dict]: # type: ignore[type-arg] with TOKENS_FILE.open(encoding="utf-8") as f: data = json.load(f) except (OSError, json.JSONDecodeError) as exc: - logger.warning( - "Could not load Slack user tokens from %s: %s", TOKENS_FILE, exc - ) + logger.warning("Could not load Slack user tokens from %s: %s", TOKENS_FILE, exc) return {} if not isinstance(data, dict): return {}