diff --git a/.env.example b/.env.example index 468461a2..b319f075 100644 --- a/.env.example +++ b/.env.example @@ -229,6 +229,27 @@ DATABASE_URL=postgres://user:password@localhost:5432/boost_dashboard # SLACK_XOXC_TOKEN=xoxc-... # SLACK_XOXD_TOKEN=... +# --- User OAuth (cppa_slack_tracker/slack_oauth_server.py + IM message sync) --- +# Run the helper: python -m cppa_slack_tracker.slack_oauth_server +# Open https://api.slack.com/apps → your app → OAuth & Permissions: +# - Add User Token Scopes (e.g. im:read, im:history, mpim:read, mpim:history, …). +# - Redirect URLs: must match SLACK_REDIRECT_URI exactly (e.g. https://your-host/slack/oauth/callback). +# After a user visits /slack/connect and authorizes, tokens are stored under +# /credential/slack_user_tokens.json +# (align SLACK_USER_TOKENS_PATH with Django cppa_slack_tracker.user_tokens / BASE_DIR if they differ). +# SLACK_CLIENT_ID=10473611477057.xxxxxxxxx +# SLACK_CLIENT_SECRET=xxxxxxxx +# SLACK_REDIRECT_URI=https://your-public-host.example.com/slack/oauth/callback +# Optional: comma-separated user scopes (defaults in slack_oauth_server match IM + channels + groups + mpim). +# SLACK_USER_SCOPES=channels:history,channels:read,groups:history,groups:read,im:history,im:read,mpim:history,mpim:read +# Optional: override JSON path for user tokens (default: same dir as .env → credential/slack_user_tokens.json). +# SLACK_USER_TOKENS_PATH=/absolute/path/to/slack_user_tokens.json +# Optional: OAuth CSRF state TTL (seconds), server bind, debug. +# OAUTH_STATE_TTL_S=600 +# SLACK_OAUTH_PORT=8000 +# HOST=127.0.0.1 +# SLACK_OAUTH_DEBUG=0 + # ============================================================================= # Selenium / Chrome (optional; for Slack xoxc/xoxd token extraction only) # ============================================================================= diff --git a/.gitignore b/.gitignore index d9be1a38..e9e583a7 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,9 @@ celerybeat-schedule celerybeat-schedule-* celerybeat.pid +# Credentials +credential/ + # Environment .env .env.local diff --git a/conftest.py b/conftest.py index 6701adc1..b4791a5c 100644 --- a/conftest.py +++ b/conftest.py @@ -21,8 +21,30 @@ def __copy__(self): BaseContext.__copy__ = __copy__ +def _django_connection_created_create_slack_private_schema( + sender, connection, **kwargs +): + """Create slack_private before table sync when pytest uses --no-migrations. + + Must be a module-level function: Django's Signal.connect() defaults to weak=True, + so a nested handler can be garbage-collected after pytest_configure returns and + never run on CI (e.g. Python 3.13 + full suite). + """ + if connection.vendor != "postgresql": + return + with connection.cursor() as cursor: + cursor.execute("CREATE SCHEMA IF NOT EXISTS slack_private;") + + +def _ensure_slack_private_schema_on_postgres_connect(): + from django.db.backends.signals import connection_created + + connection_created.connect(_django_connection_created_create_slack_private_schema) + + def pytest_configure(config): # noqa: F841 (pytest hook; name must match spec) _patch_django_context_copy_py314() + _ensure_slack_private_schema_on_postgres_connect() # Load app-level fixture modules so fixtures from each app are available everywhere. diff --git a/cppa_slack_tracker/admin.py b/cppa_slack_tracker/admin.py index 9770705f..07a932d4 100644 --- a/cppa_slack_tracker/admin.py +++ b/cppa_slack_tracker/admin.py @@ -7,9 +7,13 @@ from .models import ( SlackTeam, SlackChannel, + SlackChannelPrivate, SlackMessage, + SlackMessagePrivate, SlackChannelMembership, SlackChannelMembershipChangeLog, + SlackChannelMembershipPrivate, + SlackChannelMembershipChangeLogPrivate, ) @@ -39,6 +43,23 @@ class SlackChannelAdmin(admin.ModelAdmin): raw_id_fields = ("team", "creator") +@admin.register(SlackChannelPrivate) +class SlackChannelPrivateAdmin(admin.ModelAdmin): + list_display = ( + "channel_id", + "channel_name", + "channel_type", + "team", + "creator", + "created_at", + ) + list_filter = ("channel_type", "created_at", "updated_at") + search_fields = ("channel_id", "channel_name", "description") + readonly_fields = ("created_at", "updated_at") + date_hierarchy = "created_at" + raw_id_fields = ("team", "creator") + + @admin.register(SlackMessage) class SlackMessageAdmin(admin.ModelAdmin): list_display = ( @@ -69,6 +90,36 @@ def message_preview(self, obj): return obj.message[:50] + "..." if len(obj.message) > 50 else obj.message +@admin.register(SlackMessagePrivate) +class SlackMessagePrivateAdmin(admin.ModelAdmin): + list_display = ( + "ts", + "channel", + "user", + "message_preview", + "slack_message_created_at", + ) + list_filter = ("slack_message_created_at", "slack_message_updated_at") + search_fields = ( + "ts", + "message", + "channel__channel_name", + "user__username", + ) + readonly_fields = ( + "ts", + "slack_message_created_at", + "slack_message_updated_at", + ) + date_hierarchy = "slack_message_created_at" + raw_id_fields = ("channel", "user") + + @admin.display(description="Message Preview") + def message_preview(self, obj): + """Return a short preview of the message.""" + return obj.message[:50] + "..." if len(obj.message) > 50 else obj.message + + @admin.register(SlackChannelMembership) class SlackChannelMembershipAdmin(admin.ModelAdmin): list_display = ( @@ -102,3 +153,38 @@ class SlackChannelMembershipChangeLogAdmin(admin.ModelAdmin): readonly_fields = ("created_at",) date_hierarchy = "created_at" raw_id_fields = ("channel", "user") + + +@admin.register(SlackChannelMembershipPrivate) +class SlackChannelMembershipPrivateAdmin(admin.ModelAdmin): + list_display = ( + "channel", + "user", + "is_restricted", + "is_deleted", + "created_at", + "updated_at", + ) + list_filter = ("is_restricted", "is_deleted", "created_at", "updated_at") + search_fields = ( + "channel__channel_name", + "user__username", + "user__display_name", + ) + readonly_fields = ("created_at", "updated_at") + date_hierarchy = "created_at" + raw_id_fields = ("channel", "user") + + +@admin.register(SlackChannelMembershipChangeLogPrivate) +class SlackChannelMembershipChangeLogPrivateAdmin(admin.ModelAdmin): + list_display = ("channel", "user", "is_joined", "created_at") + list_filter = ("is_joined", "created_at") + search_fields = ( + "channel__channel_name", + "user__username", + "user__display_name", + ) + readonly_fields = ("created_at",) + date_hierarchy = "created_at" + raw_id_fields = ("channel", "user") diff --git a/cppa_slack_tracker/fetcher.py b/cppa_slack_tracker/fetcher.py index f3e43319..41b344ab 100644 --- a/cppa_slack_tracker/fetcher.py +++ b/cppa_slack_tracker/fetcher.py @@ -105,13 +105,14 @@ def fetch_team_info( def fetch_channel_list( _team_id: str, *, - types: str = "public_channel", + types: str = "public_channel,private_channel,mpim,im", exclude_archived: bool = False, client=None, ) -> list[dict]: """ Fetch channel list for the workspace (team_id). The bot token is scoped to one workspace. Returns list of channel dicts (id, name, ...). + Default types include public, private, multi-party IM, and IM (see Slack conversations.list). """ if client is None: client = get_slack_client(team_id=_team_id) @@ -136,6 +137,40 @@ def fetch_channel_list( return channels +def fetch_im_channel_list_for_user( + team_id: str, + user_token: str, + *, + exclude_archived: bool = False, +) -> list[dict]: + """ + List IM (direct message) conversations visible to the authorizing user. + + Uses a user OAuth token (``xoxp-``); the bot token cannot see user-to-user DMs. + Paginates ``conversations.list`` with ``types=im`` only. + """ + client = get_slack_client(bot_token=user_token, team_id=team_id) + channels: list[dict] = [] + cursor = None + while True: + data = client.conversations_list( + types="im", + exclude_archived=exclude_archived, + limit=500, + cursor=cursor, + ) + if not data.get("ok"): + logger.warning( + "conversations.list (im) failed: %s", data.get("error", "unknown") + ) + break + channels.extend(data.get("channels", [])) + cursor = (data.get("response_metadata") or {}).get("next_cursor") + if not cursor: + break + return channels + + def _ts_to_utc_date(ts: Optional[str]) -> Optional[date]: """Convert Slack ts string to UTC date, or None if invalid.""" if not ts: diff --git a/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py b/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py index b44e04fe..4b4f2c15 100644 --- a/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py +++ b/cppa_slack_tracker/management/commands/run_cppa_slack_tracker.py @@ -17,10 +17,16 @@ from django.conf import settings from django.core.management.base import BaseCommand, CommandError -from cppa_slack_tracker.models import SlackTeam -from cppa_slack_tracker.services import save_slack_message +from cppa_slack_tracker.models import SlackChannelPrivate, SlackTeam +from cppa_slack_tracker.services import ( + get_or_create_slack_channel, + save_slack_message, + save_slack_message_private, +) +from cppa_slack_tracker.user_tokens import iter_user_tokens_for_team from cppa_slack_tracker.sync import ( get_channels_to_sync, + get_private_channels_to_sync, sync_channel_users, sync_channels, sync_messages, @@ -302,11 +308,15 @@ def sync_messages(self, options, team: SlackTeam): Sync messages via sync.sync_messages (workspace JSONs, then fetch by day). Optional legacy: load from --messages-json path and save to DB first. """ - channels = get_channels_to_sync( - team, channel_id=(options.get("channel_id") or "").strip() or None - ) - if not channels: - logger.warning("No channels to sync. Sync channels first.") + channel_id_opt = (options.get("channel_id") or "").strip() or None + channels = get_channels_to_sync(team, channel_id=channel_id_opt) + private_channels = get_private_channels_to_sync(team, channel_id=channel_id_opt) + has_user_tokens = any(iter_user_tokens_for_team(team.team_id)) + if not channels and not private_channels and not has_user_tokens: + logger.warning( + "No channels to sync and no user OAuth tokens for this team. " + "Sync channels first or authorize users via slack_oauth_server for IMs." + ) return start_date_str = (options.get("start_date") or "").strip() or None @@ -324,6 +334,7 @@ def sync_messages(self, options, team: SlackTeam): len(all_loaded), ) channel_by_id = {c.channel_id: c for c in channels} + channel_by_id.update({c.channel_id: c for c in private_channels}) load_failures = 0 for msg in all_loaded: if not isinstance(msg, dict): @@ -344,7 +355,10 @@ def sync_messages(self, options, team: SlackTeam): ) continue try: - save_slack_message(channel, msg) + if isinstance(channel, SlackChannelPrivate): + save_slack_message_private(channel, msg) + else: + save_slack_message(channel, msg) except Exception: msg_ts = msg.get("ts", msg.get("client_msg_id", "?")) logger.exception( @@ -371,6 +385,107 @@ def sync_messages(self, options, team: SlackTeam): s, e, ) + for channel in private_channels: + s, e = sync_messages(channel, start_date=start_d, end_date=end_d) + logger.info( + " #%s (non-public): %s saved, %s errors", + channel.channel_name, + s, + e, + ) + + self._sync_im_messages_user_tokens(team, start_d, end_d, channel_id_opt) + + def _sync_im_messages_user_tokens( + self, + team: SlackTeam, + start_d, + end_d, + channel_id_opt: Optional[str], + ) -> None: + """ + List IMs per authorized user token and sync history (user-to-user DMs + not visible to the bot). + """ + from cppa_slack_tracker.fetcher import fetch_im_channel_list_for_user + from operations.slack_ops.tokens import get_slack_client + + tokens = list(iter_user_tokens_for_team(team.team_id)) + if not tokens: + logger.info( + "No Slack user OAuth tokens for team %s; skipping IM user-token sync.", + team.team_id, + ) + return + + logger.info( + "Syncing IM channels with user tokens (%s authorized user(s))...", + len(tokens), + ) + # Same IM (D…) appears in every participant's conversations.list; sync once + # per channel per run using the first token that lists it. + seen_channel_ids: set[str] = set() + for user_slack_id, access_token in tokens: + user_client = get_slack_client(bot_token=access_token, team_id=team.team_id) + try: + im_channels = fetch_im_channel_list_for_user(team.team_id, access_token) + except Exception: + logger.exception( + "Failed to list IM channels for user %s", user_slack_id + ) + continue + + if channel_id_opt: + im_channels = [ + c + for c in im_channels + if isinstance(c, dict) and c.get("id") == channel_id_opt + ] + + for ch in im_channels: + if not isinstance(ch, dict) or not ch.get("id"): + continue + ch_id = ch["id"] + if ch_id in seen_channel_ids: + logger.debug( + "Skipping IM channel %s for user %s (already synced this run)", + ch_id, + user_slack_id, + ) + continue + seen_channel_ids.add(ch_id) + try: + pub, priv, _created = get_or_create_slack_channel(ch, team) + ch_obj = pub or priv + if ch_obj is None: + seen_channel_ids.discard(ch_id) + continue + except Exception: + logger.exception("Failed to upsert IM channel %s", ch.get("id")) + seen_channel_ids.discard(ch_id) + continue + + try: + s, e = sync_messages( + ch_obj, + start_date=start_d, + end_date=end_d, + client=user_client, + ) + logger.info( + " IM #%s (user %s): %s saved, %s errors", + ch_obj.channel_name, + user_slack_id, + s, + e, + ) + except Exception: + logger.exception( + "Failed to sync messages for IM channel %s user %s", + ch.get("id"), + user_slack_id, + ) + seen_channel_ids.discard(ch_id) def sync_to_pinecone(self, team: SlackTeam): """Sync Slack messages to Pinecone after message sync.""" diff --git a/cppa_slack_tracker/migrations/0004_alter_slackteam_team_id_slackchannelprivate_and_more.py b/cppa_slack_tracker/migrations/0004_alter_slackteam_team_id_slackchannelprivate_and_more.py new file mode 100644 index 00000000..fe1dd08a --- /dev/null +++ b/cppa_slack_tracker/migrations/0004_alter_slackteam_team_id_slackchannelprivate_and_more.py @@ -0,0 +1,107 @@ +# Generated by Django 4.2.29 on 2026-04-08 18:36 + +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone + + +class Migration(migrations.Migration): + + dependencies = [ + ("cppa_user_tracker", "0007_youtubespeaker_external_id"), + ("cppa_slack_tracker", "0003_alter_slackchannel_unique_team_channel_id"), + ] + + operations = [ + migrations.RunSQL( + sql='CREATE SCHEMA IF NOT EXISTS slack_private;', + reverse_sql=migrations.RunSQL.noop, + ), + migrations.AlterField( + model_name='slackteam', + name='team_id', + field=models.CharField(max_length=50, unique=True), + ), + migrations.CreateModel( + name='SlackChannelPrivate', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('channel_id', models.CharField(db_index=True, max_length=50)), + ('channel_name', models.CharField(db_index=True, max_length=255)), + ('channel_type', models.CharField(choices=[('private_channel', 'Private channel'), ('mpim', 'Multi-party direct message'), ('im', 'Direct message')], db_index=True, help_text='Type: private_channel, mpim, or im (not public_channel).', max_length=50)), + ('description', models.TextField(blank=True, null=True)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('creator', models.ForeignKey(blank=True, db_column='creator_user_id', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='created_private_channels', to='cppa_user_tracker.slackuser')), + ('team', models.ForeignKey(db_column='team_id', on_delete=django.db.models.deletion.CASCADE, related_name='private_channels', to='cppa_slack_tracker.slackteam')), + ], + options={ + 'verbose_name': 'Slack Channel (non-public)', + 'verbose_name_plural': 'Slack Channels (non-public)', + 'db_table': 'slack_private"."cppa_slack_tracker_slackchannel_private', + }, + ), + migrations.CreateModel( + name='SlackChannelMembershipPrivate', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('is_restricted', models.BooleanField(default=False)), + ('is_deleted', models.BooleanField(default=False)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('channel', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='memberships', to='cppa_slack_tracker.slackchannelprivate')), + ('user', models.ForeignKey(db_column='slack_user_id', on_delete=django.db.models.deletion.CASCADE, related_name='private_channel_memberships', to='cppa_user_tracker.slackuser')), + ], + options={ + 'verbose_name': 'Slack Channel Membership (non-public)', + 'verbose_name_plural': 'Slack Channel Memberships (non-public)', + 'db_table': 'slack_private"."cppa_slack_tracker_slackchannelmembership_private', + }, + ), + migrations.CreateModel( + name='SlackChannelMembershipChangeLogPrivate', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('is_joined', models.BooleanField(help_text='True if joined, False if left')), + ('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)), + ('channel', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='membership_changes', to='cppa_slack_tracker.slackchannelprivate')), + ('user', models.ForeignKey(db_column='slack_user_id', on_delete=django.db.models.deletion.CASCADE, related_name='private_membership_changes', to='cppa_user_tracker.slackuser')), + ], + options={ + 'verbose_name': 'Slack Channel Membership Change Log (non-public)', + 'verbose_name_plural': 'Slack Channel Membership Change Logs (non-public)', + 'db_table': 'slack_private"."cppa_slack_tracker_slackchannelmembershipchangelog_private', + }, + ), + migrations.CreateModel( + name='SlackMessagePrivate', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('ts', models.CharField(db_index=True, help_text='Slack message timestamp (unique per channel)', max_length=50)), + ('message', models.TextField(blank=True)), + ('thread_ts', models.CharField(blank=True, db_index=True, help_text='Thread timestamp if this is a threaded message', max_length=50, null=True)), + ('slack_message_created_at', models.DateTimeField(db_index=True)), + ('slack_message_updated_at', models.DateTimeField(blank=True, db_index=True, null=True)), + ('channel', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='messages', to='cppa_slack_tracker.slackchannelprivate')), + ('user', models.ForeignKey(blank=True, db_column='slack_user_id', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='private_slack_messages', to='cppa_user_tracker.slackuser')), + ], + options={ + 'verbose_name': 'Slack Message (non-public channel)', + 'verbose_name_plural': 'Slack Messages (non-public channels)', + 'db_table': 'slack_private"."cppa_slack_tracker_slackmessage_private', + 'unique_together': {('channel', 'ts')}, + }, + ), + migrations.AddConstraint( + model_name='slackchannelprivate', + constraint=models.UniqueConstraint(fields=('team', 'channel_id'), name='unique_team_channel_id_private'), + ), + migrations.AlterUniqueTogether( + name='slackchannelmembershipprivate', + unique_together={('channel', 'user')}, + ), + migrations.AlterUniqueTogether( + name='slackchannelmembershipchangelogprivate', + unique_together={('channel', 'user', 'created_at')}, + ), + ] diff --git a/cppa_slack_tracker/models.py b/cppa_slack_tracker/models.py index b8a05756..27b9fc57 100644 --- a/cppa_slack_tracker/models.py +++ b/cppa_slack_tracker/models.py @@ -21,6 +21,14 @@ class SlackChannelType(models.TextChoices): IM = "im", "Direct message" +class SlackChannelPrivateType(models.TextChoices): + """Channel kinds stored in SlackChannelPrivate (non-public only).""" + + PRIVATE_CHANNEL = "private_channel", "Private channel" + MPIM = "mpim", "Multi-party direct message" + IM = "im", "Direct message" + + class SlackTeam(models.Model): """ Slack team (workspace) model. @@ -86,6 +94,56 @@ def __str__(self): return f"#{self.channel_name} ({self.channel_id})" +class SlackChannelPrivate(models.Model): + """ + Non-public Slack channels (private, mpim, im) stored separately from SlackChannel. + + Public channels remain in cppa_slack_tracker_slackchannel; this table holds the rest. + """ + + team = models.ForeignKey( + SlackTeam, + on_delete=models.CASCADE, + related_name="private_channels", + db_column="team_id", + ) + channel_id = models.CharField(max_length=50, db_index=True) + channel_name = models.CharField(max_length=255, db_index=True) + channel_type = models.CharField( + max_length=50, + choices=SlackChannelPrivateType.choices, + db_index=True, + help_text="Type: private_channel, mpim, or im (not public_channel).", + ) + description = models.TextField(null=True, blank=True) + creator = models.ForeignKey( + SlackUser, + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="created_private_channels", + db_column="creator_user_id", + ) + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + # Django quotes db_table as one identifier unless we inject a closing quote; + # this yields "slack_private"."cppa_slack_tracker_slackchannel_private" in SQL. + db_table = 'slack_private"."cppa_slack_tracker_slackchannel_private' + verbose_name = "Slack Channel (non-public)" + verbose_name_plural = "Slack Channels (non-public)" + constraints = [ + models.UniqueConstraint( + fields=["team", "channel_id"], + name="unique_team_channel_id_private", + ), + ] + + def __str__(self): + return f"#{self.channel_name} ({self.channel_id}) [{self.channel_type}]" + + class SlackMessage(models.Model): """ Slack message model. @@ -134,6 +192,58 @@ def __str__(self): return f"Message by {self.user} in {self.channel}: {message_preview}" +class SlackMessagePrivate(models.Model): + """ + Slack messages for non-public channels (private, mpim, im). + + Stored in cppa_slack_tracker_slackmessage_private; mirrors SlackMessage but + references SlackChannelPrivate. + """ + + channel = models.ForeignKey( + SlackChannelPrivate, + on_delete=models.CASCADE, + related_name="messages", + ) + ts = models.CharField( + max_length=50, + db_index=True, + help_text="Slack message timestamp (unique per channel)", + ) + user = models.ForeignKey( + SlackUser, + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="private_slack_messages", + db_column="slack_user_id", + ) + message = models.TextField(blank=True) + thread_ts = models.CharField( + max_length=50, + null=True, + blank=True, + db_index=True, + help_text="Thread timestamp if this is a threaded message", + ) + slack_message_created_at = models.DateTimeField(db_index=True) + slack_message_updated_at = models.DateTimeField( + db_index=True, null=True, blank=True + ) + + class Meta: + db_table = 'slack_private"."cppa_slack_tracker_slackmessage_private' + verbose_name = "Slack Message (non-public channel)" + verbose_name_plural = "Slack Messages (non-public channels)" + unique_together = [["channel", "ts"]] + + def __str__(self): + message_preview = ( + self.message[:50] + "..." if len(self.message) > 50 else self.message + ) + return f"Message by {self.user} in {self.channel}: {message_preview}" + + class SlackChannelMembership(models.Model): """ Current channel membership status. @@ -193,3 +303,68 @@ class Meta: def __str__(self): action = "joined" if self.is_joined else "left" return f"{self.user} {action} {self.channel} at {self.created_at}" + + +class SlackChannelMembershipPrivate(models.Model): + """ + Current membership for non-public channels (private_channel, mpim, im). + + Stored in schema ``slack_private`` alongside SlackChannelPrivate. + """ + + channel = models.ForeignKey( + SlackChannelPrivate, + on_delete=models.CASCADE, + related_name="memberships", + ) + user = models.ForeignKey( + SlackUser, + on_delete=models.CASCADE, + related_name="private_channel_memberships", + db_column="slack_user_id", + ) + is_restricted = models.BooleanField(default=False) + is_deleted = models.BooleanField(default=False) + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + db_table = 'slack_private"."cppa_slack_tracker_slackchannelmembership_private' + verbose_name = "Slack Channel Membership (non-public)" + verbose_name_plural = "Slack Channel Memberships (non-public)" + unique_together = [["channel", "user"]] + + def __str__(self): + return f"{self.user} in {self.channel}" + + +class SlackChannelMembershipChangeLogPrivate(models.Model): + """ + Join/leave history for non-public channels (private_channel, mpim, im). + """ + + channel = models.ForeignKey( + SlackChannelPrivate, + on_delete=models.CASCADE, + related_name="membership_changes", + ) + user = models.ForeignKey( + SlackUser, + on_delete=models.CASCADE, + related_name="private_membership_changes", + db_column="slack_user_id", + ) + is_joined = models.BooleanField(help_text="True if joined, False if left") + created_at = models.DateTimeField(default=timezone.now, db_index=True) + + class Meta: + db_table = ( + 'slack_private"."cppa_slack_tracker_slackchannelmembershipchangelog_private' + ) + verbose_name = "Slack Channel Membership Change Log (non-public)" + verbose_name_plural = "Slack Channel Membership Change Logs (non-public)" + unique_together = [["channel", "user", "created_at"]] + + def __str__(self): + action = "joined" if self.is_joined else "left" + return f"{self.user} {action} {self.channel} at {self.created_at}" diff --git a/cppa_slack_tracker/services.py b/cppa_slack_tracker/services.py index 9eab53b6..9f98db6c 100644 --- a/cppa_slack_tracker/services.py +++ b/cppa_slack_tracker/services.py @@ -22,14 +22,32 @@ from .fetcher import fetch_user_info from .models import ( SlackChannel, + SlackChannelPrivate, + SlackChannelPrivateType, SlackChannelMembership, SlackChannelMembershipChangeLog, + SlackChannelMembershipChangeLogPrivate, + SlackChannelMembershipPrivate, SlackMessage, + SlackMessagePrivate, SlackTeam, ) logger = logging.getLogger(__name__) +# Non-public channels that store membership in SlackChannelMembership*Private +# (excludes `im`: DMs are not synced here). +_PRIVATE_MEMBERSHIP_CHANNEL_TYPES = frozenset( + { + SlackChannelPrivateType.PRIVATE_CHANNEL, + SlackChannelPrivateType.MPIM, + } +) + + +def _private_channel_tracks_membership(channel: SlackChannelPrivate) -> bool: + return channel.channel_type in _PRIVATE_MEMBERSHIP_CHANNEL_TYPES + # Slack message subtypes to ignore SUBTYPE_IGNORE = [ @@ -110,15 +128,12 @@ def get_or_create_slack_team( return team, created -# --- SlackChannel --- -@transaction.atomic -def get_or_create_slack_channel( +# --- SlackChannel / SlackChannelPrivate --- +def _slack_channel_payload_fields( slack_channel: dict[str, Any], team: SlackTeam, -) -> tuple[Optional[SlackChannel], bool]: - """Get or create a Slack channel. Returns (channel, created); channel is None when skipped.""" - if not slack_channel.get("id"): - raise ValueError("Slack channel ID is required") +) -> tuple[str, str, str, Optional[SlackUser]]: + """Parse Slack API channel object: channel_type, channel_name, description, creator.""" creator = None creator_user_id = slack_channel.get("creator") if creator_user_id: @@ -141,12 +156,52 @@ def get_or_create_slack_channel( channel_type = "public_channel" else: channel_type = slack_channel.get("type", "public_channel") - if channel_type != "public_channel": - logger.warning(f"Skipping non-public channel: {slack_channel['id']}") - return None, False - channel, created = SlackChannel.objects.get_or_create( + return channel_type, channel_name, description, creator + + +@transaction.atomic +def get_or_create_slack_channel( + slack_channel: dict[str, Any], + team: SlackTeam, +) -> tuple[Optional[SlackChannel], Optional[SlackChannelPrivate], bool]: + """ + Get or create a Slack channel row. + + Public channels use SlackChannel; non-public use SlackChannelPrivate + (table cppa_slack_tracker_slackchannel_private). + + Returns (public_channel, private_channel, created). Exactly one of the first two + is set on success; both None only when the payload is invalid (e.g. missing id). + """ + if not slack_channel.get("id"): + raise ValueError("Slack channel ID is required") + channel_type, channel_name, description, creator = _slack_channel_payload_fields( + slack_channel, team + ) + cid = slack_channel["id"] + if channel_type == "public_channel": + channel, created = SlackChannel.objects.get_or_create( + team=team, + channel_id=cid, + defaults={ + "channel_name": channel_name, + "channel_type": channel_type, + "description": description, + "creator": creator, + }, + ) + if not created: + channel.channel_name = channel_name or channel.channel_name + channel.channel_type = channel_type or channel.channel_type + channel.description = description + if creator is not None: + channel.creator = creator + channel.save() + return channel, None, created + + priv, created = SlackChannelPrivate.objects.get_or_create( team=team, - channel_id=slack_channel["id"], + channel_id=cid, defaults={ "channel_name": channel_name, "channel_type": channel_type, @@ -155,13 +210,13 @@ def get_or_create_slack_channel( }, ) if not created: - channel.channel_name = channel_name or channel.channel_name - channel.channel_type = channel_type or channel.channel_type - channel.description = description + priv.channel_name = channel_name or priv.channel_name + priv.channel_type = channel_type or priv.channel_type + priv.description = description if creator is not None: - channel.creator = creator - channel.save() - return channel, created + priv.creator = creator + priv.save() + return None, priv, created # --- SlackChannelMembership --- @@ -236,6 +291,86 @@ def sync_channel_memberships(channel: SlackChannel, member_ids: list[str]) -> No continue +# --- SlackChannelMembershipPrivate (private_channel, mpim only) --- +@transaction.atomic +def add_channel_membership_change_private( + channel: SlackChannelPrivate, + slack_user_id: str, + ts: str, + is_joined: bool, +) -> SlackChannelMembershipChangeLogPrivate: + """Record join/leave for a non-public channel; updates current membership row.""" + if not _private_channel_tracks_membership(channel): + raise ValueError( + "Membership is only tracked for private_channel and mpim conversations" + ) + try: + user = SlackUser.objects.get(slack_user_id=slack_user_id) + except SlackUser.DoesNotExist: + raise ValueError(f"User {slack_user_id} not found") + created_at = _parse_slack_ts_string(ts) + change_log, _ = SlackChannelMembershipChangeLogPrivate.objects.get_or_create( + channel=channel, + user=user, + created_at=created_at, + defaults={"is_joined": is_joined}, + ) + if change_log.is_joined != is_joined: + change_log.is_joined = is_joined + change_log.save(update_fields=["is_joined"]) + if is_joined: + membership, _ = SlackChannelMembershipPrivate.objects.get_or_create( + channel=channel, + user=user, + defaults={"is_deleted": False}, + ) + if membership.is_deleted: + membership.is_deleted = False + membership.save() + else: + SlackChannelMembershipPrivate.objects.filter(channel=channel, user=user).update( + is_deleted=True + ) + return change_log + + +@transaction.atomic +def sync_channel_memberships_private( + channel: SlackChannelPrivate, member_ids: list[str] +) -> None: + """Sync memberships for a private_channel or mpim; no-op if channel type is not supported.""" + if not _private_channel_tracks_membership(channel): + return + existing_memberships = SlackChannelMembershipPrivate.objects.filter( + channel=channel, + is_deleted=False, + ).select_related("user") + existing_user_ids = {m.user.slack_user_id for m in existing_memberships} + new_member_ids = set(member_ids) - existing_user_ids + removed_member_ids = existing_user_ids - set(member_ids) + for user_id in new_member_ids: + try: + user = SlackUser.objects.get(slack_user_id=user_id) + membership, created = SlackChannelMembershipPrivate.objects.get_or_create( + channel=channel, + user=user, + defaults={"is_deleted": False}, + ) + if not created and membership.is_deleted: + membership.is_deleted = False + membership.save() + except SlackUser.DoesNotExist: + continue + for user_id in removed_member_ids: + try: + user = SlackUser.objects.get(slack_user_id=user_id) + SlackChannelMembershipPrivate.objects.filter( + channel=channel, user=user + ).update(is_deleted=True) + except SlackUser.DoesNotExist: + continue + + # --- SlackMessage --- def _message_text_for_subtype( slack_message: dict[str, Any], subtype: str @@ -309,7 +444,8 @@ def save_slack_message( if isinstance(comment, dict): text += f"\nComment: {comment.get('comment', '')}" elif subtype: - text = _message_text_for_subtype(slack_message, subtype) or "" + subtype_text = _message_text_for_subtype(slack_message, subtype) + text = subtype_text or slack_message.get("text", "") else: text = slack_message.get("text", "") @@ -350,3 +486,111 @@ def save_slack_message( message.slack_message_updated_at = updated_at message.save() return message + + +@transaction.atomic +def save_slack_message_private( + channel: SlackChannelPrivate, + slack_message: dict[str, Any], +) -> Optional[SlackMessagePrivate]: + """ + Save or update a Slack message for a non-public channel. + + Same rules as save_slack_message, but persists to SlackMessagePrivate. + For private_channel and mpim, channel_join / channel_leave update + SlackChannelMembershipPrivate / SlackChannelMembershipChangeLogPrivate. + Join/leave on im is ignored (no membership rows for DMs). + """ + subtype = slack_message.get("subtype") + if subtype in SUBTYPE_IGNORE: + return None + if subtype == "channel_join": + if not _private_channel_tracks_membership(channel): + return None + event_ts = slack_message.get("ts") + if not event_ts: + logger.warning("Skipping channel_join without ts (private channel)") + return None + if slack_message.get("user"): + user = _get_or_fetch_slack_user( + slack_message["user"], team_id=channel.team.team_id + ) + add_channel_membership_change_private( + channel, + user.slack_user_id, + event_ts, + True, + ) + return None + if subtype == "channel_leave": + if not _private_channel_tracks_membership(channel): + return None + event_ts = slack_message.get("ts") + if not event_ts: + logger.warning("Skipping channel_leave without ts (private channel)") + return None + if slack_message.get("user"): + user = _get_or_fetch_slack_user( + slack_message["user"], team_id=channel.team.team_id + ) + add_channel_membership_change_private( + channel, + user.slack_user_id, + event_ts, + False, + ) + return None + + user: Optional[SlackUser] = None + text: str + if subtype == "file_comment": + user = _get_or_fetch_slack_user( + slack_message.get("user", "") or "-1", team_id=channel.team.team_id + ) + text = slack_message.get("text", "") + comment = slack_message.get("comment") + if isinstance(comment, dict): + text += f"\nComment: {comment.get('comment', '')}" + elif subtype: + subtype_text = _message_text_for_subtype(slack_message, subtype) + text = subtype_text or slack_message.get("text", "") + else: + text = slack_message.get("text", "") + + if user is None: + user_id = slack_message.get("user") + if not user_id: + if slack_message.get("text") == "A file was commented on": + return None + raise ValueError("User not found") + user = _get_or_fetch_slack_user(user_id, team_id=channel.team.team_id) + + clean_text = text.replace("\x00", "").replace("\u0000", "") + ts = slack_message.get("ts") + if not ts: + raise ValueError("Message timestamp (ts) is required") + created_at = _parse_slack_ts_string(ts) + edited = slack_message.get("edited") + if not isinstance(edited, dict): + edited = {} + updated_at = _parse_slack_ts_string(edited.get("ts", ts)) if edited else created_at + + message, created = SlackMessagePrivate.objects.get_or_create( + channel=channel, + ts=ts, + defaults={ + "user": user, + "message": clean_text, + "thread_ts": slack_message.get("thread_ts"), + "slack_message_created_at": created_at, + "slack_message_updated_at": updated_at, + }, + ) + if not created: + message.user = user + message.message = clean_text + message.thread_ts = slack_message.get("thread_ts") + message.slack_message_created_at = created_at + message.slack_message_updated_at = updated_at + message.save() + return message diff --git a/cppa_slack_tracker/slack_oauth_server.py b/cppa_slack_tracker/slack_oauth_server.py new file mode 100644 index 00000000..17158356 --- /dev/null +++ b/cppa_slack_tracker/slack_oauth_server.py @@ -0,0 +1,443 @@ +""" +Slack user OAuth server + +Loads `.env` from a fixed path beside this package; default token file is under +the same directory as that `.env` (see `_SLACK_OAUTH_DOTENV_PATH`). + +- GET / → landing page with links (root is not Slack's callback) +- GET /slack/connect → Slack authorize URL with CSRF `state` (server-side, TTL) +- GET /slack/oauth/callback → `?code=...&state=...`; state validated before token exchange +- GET /slack/authorized → list authorized users (user_id, team_id; tokens not shown) +""" + +from __future__ import annotations + +import json +import logging +import os +import secrets +import tempfile +import time +from html import escape +from pathlib import Path +from urllib.parse import urlencode + +import httpx +from dotenv import load_dotenv +from starlette.applications import Starlette +from starlette.requests import Request +from starlette.responses import HTMLResponse, RedirectResponse +from starlette.routing import Route + +# Default .env: cppa_slack_tracker/ -> project root .env +_SLACK_OAUTH_DOTENV_PATH = Path(__file__).resolve().parent.parent / ".env" +load_dotenv(_SLACK_OAUTH_DOTENV_PATH) +_DOTENV_DIR = _SLACK_OAUTH_DOTENV_PATH.parent + +logger = logging.getLogger(__name__) + + +def _slack_user_tokens_file() -> Path: + """Path to JSON map of OAuth user tokens; override with SLACK_USER_TOKENS_PATH.""" + override = (os.environ.get("SLACK_USER_TOKENS_PATH") or "").strip() + if override: + return Path(override) + return _DOTENV_DIR / "credential" / "slack_user_tokens.json" + + +TOKENS_FILE = _slack_user_tokens_file() + +# CSRF: one-time OAuth state values (server-side). Values are secrets.token_urlsafe; +# each maps to unix expiry time. Prevents replay after TTL via expiry + pop-on-use. +_oauth_states: dict[str, float] = {} + + +def _oauth_state_ttl_s() -> int: + return max(60, int(os.environ.get("OAUTH_STATE_TTL_S", "600"))) + + +def _purge_expired_oauth_states() -> None: + now = time.time() + expired = [s for s, until in _oauth_states.items() if until < now] + for s in expired: + del _oauth_states[s] + + +def _register_oauth_state(state: str) -> None: + _purge_expired_oauth_states() + _oauth_states[state] = time.time() + _oauth_state_ttl_s() + + +def _consume_oauth_state(state: str | None) -> bool: + """Validate and consume a state (single use). False if missing, unknown, or expired.""" + if not state: + return False + _purge_expired_oauth_states() + until = _oauth_states.pop(state, None) + if until is None: + return False + if time.time() > until: + return False + return True + + +# --------------------------------------------------------------------------- +# Static HTML responses (predefined at import time) +# --------------------------------------------------------------------------- + +_HTML_INDEX = HTMLResponse( + content=""" + + + Boost data collector — Slack OAuth + +

Slack OAuth helper

+

Use these links:

+ +

Slack redirects to /slack/oauth/callback after authorization.

+ + + """ +) + +_HTML_MISSING_CODE = HTMLResponse( + content=""" + + Slack OAuth + +

Missing code

+

No code in query string. Start from + /slack/connect. +

+ + """, + status_code=400, +) + +_HTML_SUCCESS = HTMLResponse( + content=""" + + + Slack OAuth + +

You're all set

+

This user is now authorized. Your app can receive + "Subscribe to events on behalf of users" events for them.

+

Token stored in slack_user_tokens.json.

+ + + """ +) + + +# --------------------------------------------------------------------------- +# Dynamic HTML response builders (depend on runtime values) +# --------------------------------------------------------------------------- + + +def _html_auth_error(error: str) -> HTMLResponse: + safe_error = escape(error) + return HTMLResponse( + content=f""" + + Slack OAuth +

Authorization failed

Slack returned: {safe_error}

+ """, + status_code=400, + ) + + +def _html_exchange_failed(error: str) -> HTMLResponse: + safe_error = escape(error) + return HTMLResponse( + content=f""" + + Slack OAuth + +

Token exchange failed

+

{safe_error}

+ + """, + status_code=400, + ) + + +def _html_invalid_oauth_state() -> HTMLResponse: + return HTMLResponse( + content=""" + + Slack OAuth + +

Invalid or expired session

+

OAuth state was missing, invalid, or expired. Start again from + /slack/connect. +

+ + """, + status_code=400, + ) + + +def _html_authorized(table_body: str) -> HTMLResponse: + return HTMLResponse( + content=f""" + + + Authorized users + +

Authorized users

+

Users who have completed the "Connect Slack" flow. + Tokens are stored in slack_user_tokens.json (not shown here).

+ + + + {table_body} + +
User IDTeam ID
+

Add another user

+ + + """ + ) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _get_env(key: str) -> str: + val = (os.environ.get(key) or "").strip() + # If the value accidentally includes "KEY=" (e.g. pasted full .env line), strip the prefix. + if val.startswith(f"{key}="): + val = val.split("=", 1)[1].strip() + if not val or val == "your_client_id_here" or val == "your_client_secret_here": + raise RuntimeError( + f"Set {key} in .env (copy from .env.example and fill in your Slack app credentials)" + ) + return val + + +def _load_tokens() -> dict[str, dict]: # type: ignore[type-arg] + if not TOKENS_FILE.exists(): + return {} + try: + with TOKENS_FILE.open(encoding="utf-8") as f: + data = json.load(f) + except (OSError, json.JSONDecodeError) as exc: + logger.warning("Could not load Slack user tokens from %s: %s", TOKENS_FILE, exc) + return {} + if not isinstance(data, dict): + return {} + return data # type: ignore[no-any-return] + + +def _ensure_tokens_dir() -> None: + """Create credential dir with owner-only perms where possible.""" + # TODO: move token storage to a dedicated secret manager when available. + TOKENS_FILE.parent.mkdir(mode=0o700, parents=True, exist_ok=True) + try: + TOKENS_FILE.parent.chmod(0o700) + except OSError: + # Best effort on non-POSIX filesystems/platforms. + pass + + +def _save_tokens(data: dict[str, dict]) -> None: # type: ignore[type-arg] + _ensure_tokens_dir() + with tempfile.NamedTemporaryFile( + "w", + encoding="utf-8", + dir=TOKENS_FILE.parent, + delete=False, + prefix=f"{TOKENS_FILE.name}.", + suffix=".tmp", + ) as f: + json.dump(data, f, indent=2) + f.flush() + os.fsync(f.fileno()) + tmp_path = Path(f.name) + try: + tmp_path.chmod(0o600) + except OSError: + pass + tmp_path.replace(TOKENS_FILE) + try: + TOKENS_FILE.chmod(0o600) + except OSError: + # Best effort on non-POSIX filesystems/platforms. + pass + + +async def _exchange_code(code: str) -> dict: # type: ignore[type-arg] + """POST to oauth.v2.access. + + Returns Slack's JSON object on success. On transport, HTTP, or parse errors returns + a dict with ok=False and an error string so slack_oauth_callback can call + _html_exchange_failed. + """ + try: + client_id = _get_env("SLACK_CLIENT_ID") + client_secret = _get_env("SLACK_CLIENT_SECRET") + redirect_uri = _get_env("SLACK_REDIRECT_URI") + except RuntimeError as exc: + return {"ok": False, "error": str(exc)} + + try: + async with httpx.AsyncClient() as client: + resp = await client.post( + "https://slack.com/api/oauth.v2.access", + data={ + "client_id": client_id, + "client_secret": client_secret, + "code": code, + "redirect_uri": redirect_uri, + }, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + timeout=30.0, + ) + resp.raise_for_status() + except httpx.HTTPStatusError as exc: + snippet = "" + try: + snippet = (exc.response.text or "")[:400] + except Exception: + logger.debug( + "Failed to read HTTP error response body for Slack OAuth token exchange", + exc_info=True, + ) + err = f"HTTP {exc.response.status_code}" + if snippet: + err = f"{err}: {snippet}" + return {"ok": False, "error": err} + except httpx.RequestError as exc: + return {"ok": False, "error": f"Token request failed: {exc}"} + + try: + data = resp.json() + except (json.JSONDecodeError, ValueError) as exc: + return {"ok": False, "error": f"Invalid response from Slack (not JSON): {exc}"} + + if not isinstance(data, dict): + return { + "ok": False, + "error": "Unexpected response from Slack (expected JSON object).", + } + return data + + +def _store_token(data: dict) -> None: # type: ignore[type-arg] + """Persist the authed_user token from an oauth.v2.access response.""" + authed_user = data.get("authed_user") or {} + user_token = authed_user.get("access_token") + user_id = authed_user.get("id") + team_id = (data.get("team") or {}).get("id") + if not (user_token and user_id): + return + tokens = _load_tokens() + key = f"{team_id}:{user_id}" if team_id else user_id + tokens[key] = {"user_id": user_id, "team_id": team_id, "access_token": user_token} + _save_tokens(tokens) + + +async def index(_: Request) -> HTMLResponse: + """Root URL — OAuth entry points live under /slack/…""" + return _HTML_INDEX + + +async def slack_connect(_: Request) -> RedirectResponse: + """Redirect user to Slack authorize URL (entry point for 'Connect Slack').""" + client_id = _get_env("SLACK_CLIENT_ID") + redirect_uri = _get_env("SLACK_REDIRECT_URI") + user_scope = os.environ.get( + "SLACK_USER_SCOPES", + "channels:history,channels:read,groups:history,groups:read," + "im:history,im:read,mpim:history,mpim:read", + ) + state = secrets.token_urlsafe(32) + _register_oauth_state(state) + params = { + "client_id": client_id, + "user_scope": user_scope, + "redirect_uri": redirect_uri, + "state": state, + } + url = "https://slack.com/oauth/v2/authorize?" + urlencode(params) + return RedirectResponse(url=url, status_code=302) + + +async def slack_oauth_callback(request: Request) -> HTMLResponse: + """Handle redirect from Slack: exchange code for user token, store it, show success.""" + code = request.query_params.get("code") + error = request.query_params.get("error") + state = request.query_params.get("state") + + if not _consume_oauth_state(state): + return _html_invalid_oauth_state() + + if error: + return _html_auth_error(error) + + if not code: + return _HTML_MISSING_CODE + + try: + data = await _exchange_code(code) + except Exception as exc: + return _html_exchange_failed(f"Token exchange failed: {exc}") + + if not data.get("ok"): + return _html_exchange_failed(data.get("error", "Unknown error")) + + _store_token(data) + + return _HTML_SUCCESS + + +async def slack_authorized(_: Request) -> HTMLResponse: + """List authorized users (user_id, team_id); tokens are not shown.""" + tokens = _load_tokens() + rows = [ + f"{escape(str(d.get('user_id', '')))}" + f"{escape(str(d.get('team_id', '')))}" + for d in tokens.values() + ] + no_rows = 'No users authorized yet.' + table_body = "\n".join(rows) if rows else no_rows + return _html_authorized(table_body) + + +def _slack_oauth_debug_enabled() -> bool: + """Starlette debug mode — off unless SLACK_OAUTH_DEBUG is truthy (unsafe for production).""" + return os.environ.get("SLACK_OAUTH_DEBUG", "").strip().lower() in ( + "1", + "true", + "yes", + "on", + ) + + +app = Starlette( + debug=_slack_oauth_debug_enabled(), + routes=[ + Route("/", index), + Route("/slack/connect", slack_connect), + Route("/slack/oauth/callback", slack_oauth_callback), + Route("/slack/authorized", slack_authorized), + ], +) + + +def main() -> None: + import uvicorn + + port = int(os.environ.get("SLACK_OAUTH_PORT", "8000")) + host = (os.environ.get("HOST") or "127.0.0.1").strip() or "127.0.0.1" + uvicorn.run(app, host=host, port=port) + + +if __name__ == "__main__": + main() diff --git a/cppa_slack_tracker/sync/__init__.py b/cppa_slack_tracker/sync/__init__.py index 84160ec9..ca36c15d 100644 --- a/cppa_slack_tracker/sync/__init__.py +++ b/cppa_slack_tracker/sync/__init__.py @@ -3,6 +3,7 @@ from cppa_slack_tracker.sync.sync_channel import sync_channels, sync_team from cppa_slack_tracker.sync.sync_channel_user import ( get_channels_to_sync, + get_private_channels_to_sync, sync_channel_users, ) from cppa_slack_tracker.sync.sync_message import sync_messages @@ -10,6 +11,7 @@ __all__ = [ "get_channels_to_sync", + "get_private_channels_to_sync", "sync_channels", "sync_channel_users", "sync_messages", diff --git a/cppa_slack_tracker/sync/sync_channel.py b/cppa_slack_tracker/sync/sync_channel.py index a5738e77..1e63dc36 100644 --- a/cppa_slack_tracker/sync/sync_channel.py +++ b/cppa_slack_tracker/sync/sync_channel.py @@ -47,13 +47,13 @@ def sync_team(team_id: str, team_name: Optional[str] = None) -> SlackTeam: def _process_channel_info(ch: dict, team: SlackTeam) -> bool: """ - Process one channel: get_or_create_slack_channel. Returns True if synced, - False if skipped (missing id or non-public channel). Raises on error. + Process one channel: get_or_create_slack_channel. Returns True if synced + to SlackChannel (public) or SlackChannelPrivate (non-public). Raises on error. """ if not ch.get("id"): return False - channel, _ = get_or_create_slack_channel(ch, team) - return channel is not None + pub, priv, _ = get_or_create_slack_channel(ch, team) + return pub is not None or priv is not None def sync_channels( @@ -61,14 +61,15 @@ def sync_channels( *, channel_id: Optional[str] = None, team_id: Optional[str] = None, - types: str = "public_channel", + types: str = "public_channel,private_channel,mpim,im", exclude_archived: bool = False, ) -> tuple[int, int]: """ Sync channels for a team to the database. If channel_id is set, fetch only that channel (conversations.info). - Otherwise fetch via fetch_channel_list(team_id). Returns (success_count, error_count). + Otherwise fetch via fetch_channel_list(team_id). Default types are all + Slack conversation kinds (public, private, mpim, im). Returns (success_count, error_count). """ success_count = 0 error_count = 0 diff --git a/cppa_slack_tracker/sync/sync_channel_user.py b/cppa_slack_tracker/sync/sync_channel_user.py index 24d94199..a1035018 100644 --- a/cppa_slack_tracker/sync/sync_channel_user.py +++ b/cppa_slack_tracker/sync/sync_channel_user.py @@ -2,9 +2,10 @@ Sync Slack channel memberships with the database. Fetches channel member lists via cppa_slack_tracker.fetcher.fetch_channel_user_list -and syncs memberships to the database. Use get_channels_to_sync() to get the list -of channels to sync (one or all in a team); run_cppa_slack_tracker uses it to -avoid duplicating channel resolution logic. +and syncs memberships to the database. Public channels use SlackChannelMembership; +private_channel and mpim use SlackChannelMembershipPrivate (im is skipped). + +Use get_channels_to_sync() / get_private_channels_to_sync() for message sync lists. """ from __future__ import annotations @@ -13,8 +14,16 @@ from typing import Optional from cppa_slack_tracker.fetcher import fetch_channel_user_list -from cppa_slack_tracker.models import SlackChannel, SlackTeam -from cppa_slack_tracker.services import sync_channel_memberships +from cppa_slack_tracker.models import ( + SlackChannel, + SlackChannelPrivate, + SlackChannelPrivateType, + SlackTeam, +) +from cppa_slack_tracker.services import ( + sync_channel_memberships, + sync_channel_memberships_private, +) logger = logging.getLogger(__name__) @@ -43,6 +52,30 @@ def get_channels_to_sync( return list(SlackChannel.objects.filter(team=team).order_by("channel_id")) +def get_private_channels_to_sync( + team: SlackTeam, + *, + channel_id: Optional[str] = None, +) -> list[SlackChannelPrivate]: + """ + Return non-public channels to sync for a team (for message sync and similar). + + If channel_id is set and exists on SlackChannelPrivate, returns that channel; + if not found, logs a warning and returns all private channels in the team. + If channel_id is not set, returns all SlackChannelPrivate rows ordered by channel_id. + """ + if channel_id: + try: + return [SlackChannelPrivate.objects.get(team=team, channel_id=channel_id)] + except SlackChannelPrivate.DoesNotExist: + logger.warning( + "Private channel %s not found in team %s; syncing all non-public channels.", + channel_id, + team.team_id, + ) + return list(SlackChannelPrivate.objects.filter(team=team).order_by("channel_id")) + + def sync_channel_users( team: SlackTeam, *, @@ -51,9 +84,12 @@ def sync_channel_users( """ Sync channel memberships for all (or one) channels in a team. - Uses get_channels_to_sync(team, channel_id=channel_id) to get the channel - list, then for each channel fetches member IDs from the Slack API and - syncs memberships to the database. Returns (success_count, error_count). + Public channels: get_channels_to_sync → SlackChannelMembership. + + Non-public private_channel and mpim: get_private_channels_to_sync → + SlackChannelMembershipPrivate (skips im). + + Returns (success_count, error_count). """ channels = get_channels_to_sync(team, channel_id=channel_id) success_count = 0 @@ -70,4 +106,26 @@ def sync_channel_users( "Failed to fetch/sync members for %s: %s", channel.channel_id, e ) error_count += 1 + + private_channels = get_private_channels_to_sync(team, channel_id=channel_id) + membership_private = [ + c + for c in private_channels + if c.channel_type + in (SlackChannelPrivateType.PRIVATE_CHANNEL, SlackChannelPrivateType.MPIM) + ] + for channel in membership_private: + try: + member_ids = fetch_channel_user_list( + channel.channel_id, team_id=channel.team.team_id + ) + sync_channel_memberships_private(channel, member_ids) + success_count += 1 + except Exception as e: + logger.exception( + "Failed to fetch/sync members for private/mpim %s: %s", + channel.channel_id, + e, + ) + error_count += 1 return success_count, error_count diff --git a/cppa_slack_tracker/sync/sync_message.py b/cppa_slack_tracker/sync/sync_message.py index 242b2e11..4c928f0d 100644 --- a/cppa_slack_tracker/sync/sync_message.py +++ b/cppa_slack_tracker/sync/sync_message.py @@ -1,6 +1,9 @@ """ Sync Slack messages with the database. +Works for both SlackChannel (public) and SlackChannelPrivate (non-public); the +latter persists rows via SlackMessagePrivate (cppa_slack_tracker_slackmessage_private). + Flow when start_date is None: 1. Process any existing workspace JSONs for the channel (old → new), remove them. 2. Determine start_date: same day as last message in DB (to avoid missing same-day @@ -21,14 +24,22 @@ import logging from collections import defaultdict from datetime import date, datetime, timedelta, timezone -from typing import Optional +from typing import TYPE_CHECKING, Optional from django.db.models import F + +if TYPE_CHECKING: + from operations.slack_ops.client import SlackAPIClient from django.db.models.functions import Coalesce from cppa_slack_tracker.fetcher import fetch_messages -from cppa_slack_tracker.models import SlackChannel, SlackMessage -from cppa_slack_tracker.services import save_slack_message +from cppa_slack_tracker.models import ( + SlackChannel, + SlackChannelPrivate, + SlackMessage, + SlackMessagePrivate, +) +from cppa_slack_tracker.services import save_slack_message, save_slack_message_private from cppa_slack_tracker.workspace import ( get_message_json_path, get_raw_message_json_path, @@ -69,15 +80,19 @@ def _messages_by_day( return dict(by_day) -def _process_message(channel: SlackChannel, msg: dict) -> bool: +def _process_message(channel: SlackChannel | SlackChannelPrivate, msg: dict) -> bool: """ - Process one message: save_slack_message. Returns True if saved, False if - skipped (e.g. ignored subtype). Raises on error. + Process one message: save to SlackMessage or SlackMessagePrivate. Returns True + if saved, False if skipped (e.g. ignored subtype). Raises on error. """ + if isinstance(channel, SlackChannelPrivate): + return save_slack_message_private(channel, msg) is not None return save_slack_message(channel, msg) is not None -def _process_workspace_jsons(channel: SlackChannel) -> tuple[int, int]: +def _process_workspace_jsons( + channel: SlackChannel | SlackChannelPrivate, +) -> tuple[int, int]: """ Process all existing workspace JSONs for the channel in date order (old to new). Saves messages to DB and removes each workspace file. @@ -113,11 +128,16 @@ def _process_workspace_jsons(channel: SlackChannel) -> tuple[int, int]: return success_count, error_count -def _last_message_date(channel: SlackChannel) -> Optional[date]: +def _last_message_date( + channel: SlackChannel | SlackChannelPrivate, +) -> Optional[date]: """Return the date (UTC) of the most recently updated (or created) message in DB for this channel, or None.""" + if isinstance(channel, SlackChannelPrivate): + qs = SlackMessagePrivate.objects.filter(channel=channel) + else: + qs = SlackMessage.objects.filter(channel=channel) last_dt = ( - SlackMessage.objects.filter(channel=channel) - .annotate( + qs.annotate( effective=Coalesce( F("slack_message_updated_at"), F("slack_message_created_at") ) @@ -148,9 +168,11 @@ def _merge_messages_by_ts( def sync_messages( - channel: SlackChannel, + channel: SlackChannel | SlackChannelPrivate, start_date: date | datetime | None = None, end_date: date | datetime | None = None, + *, + client: Optional["SlackAPIClient"] = None, ) -> tuple[int, int]: """ Sync messages for a channel over a date range (UTC). @@ -166,6 +188,9 @@ def sync_messages( - Write JSON to workspace. Merge into raw file by ts (same ts → update, new ts → add). - Process workspace → save to DB → remove workspace file. + If ``client`` is set (e.g. user OAuth token for IMs), it is used for + ``conversations.history``; otherwise the workspace bot token is used. + Returns (success_count, error_count). """ today = datetime.now(timezone.utc).date() @@ -198,7 +223,11 @@ def sync_messages( # Step 2: fetch messages ([start_date, end_date] or all up to end_date if start_date is None) try: all_messages = fetch_messages( - channel_id, start_date, end_date, team_id=channel.team.team_id + channel_id, + start_date, + end_date, + client=client, + team_id=channel.team.team_id, ) except Exception: logger.exception( diff --git a/cppa_slack_tracker/tests/fixtures.py b/cppa_slack_tracker/tests/fixtures.py index d23c6c6a..62471f25 100644 --- a/cppa_slack_tracker/tests/fixtures.py +++ b/cppa_slack_tracker/tests/fixtures.py @@ -8,6 +8,7 @@ from cppa_slack_tracker.models import ( SlackTeam, SlackChannel, + SlackChannelPrivate, SlackMessage, SlackChannelMembership, ) @@ -78,6 +79,34 @@ def sample_slack_channel(db, sample_slack_team, sample_slack_user): ) +@pytest.fixture +def sample_slack_channel_private(db, sample_slack_team, sample_slack_user): + """Create a sample private Slack channel (private_channel).""" + _ = db + return SlackChannelPrivate.objects.create( + team=sample_slack_team, + channel_id="G012PRIVATE1", + channel_name="private-team", + channel_type="private_channel", + description="Private stuff", + creator=sample_slack_user, + ) + + +@pytest.fixture +def sample_slack_channel_im(db, sample_slack_team, sample_slack_user): + """Create a sample IM channel (membership not tracked).""" + _ = db + return SlackChannelPrivate.objects.create( + team=sample_slack_team, + channel_id="D01IM00001", + channel_name="someone", + channel_type="im", + description="", + creator=sample_slack_user, + ) + + @pytest.fixture def _sample_slack_channel(sample_slack_channel): """Alias for tests that need DB state but do not use the fixture value (silences ARG002).""" diff --git a/cppa_slack_tracker/tests/test_services.py b/cppa_slack_tracker/tests/test_services.py index 81cd4ced..94c25926 100644 --- a/cppa_slack_tracker/tests/test_services.py +++ b/cppa_slack_tracker/tests/test_services.py @@ -9,13 +9,19 @@ get_or_create_slack_channel, get_or_create_slack_team, add_channel_membership_change, + add_channel_membership_change_private, save_slack_message, + save_slack_message_private, sync_channel_memberships, + sync_channel_memberships_private, _parse_slack_ts_string, ) from cppa_slack_tracker.models import ( + SlackChannelPrivate, SlackChannelMembership, SlackChannelMembershipChangeLog, + SlackChannelMembershipPrivate, + SlackChannelMembershipChangeLogPrivate, ) from cppa_user_tracker.models import Email, SlackUser @@ -92,17 +98,40 @@ def test_add_slack_channel( self, sample_slack_team, sample_slack_user, sample_slack_channel_data ): """Test adding a Slack channel.""" - channel, _ = get_or_create_slack_channel( + channel, priv, _ = get_or_create_slack_channel( sample_slack_channel_data, sample_slack_team, ) + assert priv is None + assert channel is not None assert channel.channel_id == "C87654321" assert channel.channel_name == "random" assert channel.channel_type == "public_channel" assert channel.description == "Random discussions" assert channel.creator == sample_slack_user + def test_add_slack_channel_private(self, sample_slack_team, sample_slack_user): + """Non-public channels are stored in SlackChannelPrivate.""" + data = { + "id": "G012PRIVATE1", + "name": "private-team", + "is_private": True, + "is_channel": True, + "creator": sample_slack_user.slack_user_id, + "purpose": {"value": "Private stuff"}, + } + pub, priv, created = get_or_create_slack_channel(data, sample_slack_team) + assert pub is None + assert created is True + assert priv is not None + assert priv.channel_id == "G012PRIVATE1" + assert priv.channel_name == "private-team" + assert priv.channel_type == "private_channel" + assert priv.description == "Private stuff" + assert priv.creator == sample_slack_user + assert SlackChannelPrivate.objects.filter(channel_id="G012PRIVATE1").exists() + def test_add_channel_membership_change( self, sample_slack_channel, sample_slack_user ): @@ -305,3 +334,110 @@ def test_save_slack_message_unknown_user_fetches_info_and_creates_unknown_withou calls = [c[0][0] for c in mock_fetch.call_args_list] assert "U99999999" in calls assert "-1" not in calls + + def test_add_channel_membership_change_private( + self, sample_slack_channel_private, sample_slack_user + ): + """Join/leave for private_channel updates private membership tables.""" + log = add_channel_membership_change_private( + sample_slack_channel_private, + "U12345678", + "1609459200.123456", + is_joined=True, + ) + assert log.channel == sample_slack_channel_private + assert log.user == sample_slack_user + assert log.is_joined + m = SlackChannelMembershipPrivate.objects.get( + channel=sample_slack_channel_private, + user=sample_slack_user, + ) + assert not m.is_deleted + + def test_add_channel_membership_change_private_rejects_im( + self, sample_slack_channel_im, sample_slack_user + ): + """Membership helpers are not used for im channels.""" + with pytest.raises(ValueError, match="private_channel and mpim"): + add_channel_membership_change_private( + sample_slack_channel_im, + sample_slack_user.slack_user_id, + "1609459200.123456", + is_joined=True, + ) + + def test_save_slack_message_private_channel_join( + self, sample_slack_channel_private, sample_slack_user + ): + """channel_join on private_channel records membership; no SlackMessagePrivate row.""" + message_data = { + "user": "U12345678", + "text": "joined the channel", + "ts": "1609459200.123456", + "subtype": "channel_join", + } + msg = save_slack_message_private( + sample_slack_channel_private, + message_data, + ) + assert msg is None + assert SlackChannelMembershipChangeLogPrivate.objects.filter( + channel=sample_slack_channel_private, + user=sample_slack_user, + is_joined=True, + ).exists() + + def test_save_slack_message_private_channel_join_skipped_for_im( + self, sample_slack_channel_im, sample_slack_user + ): + """channel_join on im does not create private membership rows.""" + message_data = { + "user": "U12345678", + "text": "joined", + "ts": "1609459200.123456", + "subtype": "channel_join", + } + msg = save_slack_message_private(sample_slack_channel_im, message_data) + assert msg is None + assert not SlackChannelMembershipChangeLogPrivate.objects.filter( + channel=sample_slack_channel_im, + user=sample_slack_user, + ).exists() + + def test_sync_channel_memberships_private( + self, sample_slack_channel_private, sample_slack_user, sample_identity + ): + """sync_channel_memberships_private adds/removes rows for private_channel.""" + user2 = SlackUser.objects.create( + identity=sample_identity, + slack_user_id="U22222222", + username="user2", + ) + SlackChannelMembershipPrivate.objects.create( + channel=sample_slack_channel_private, + user=sample_slack_user, + ) + sync_channel_memberships_private( + sample_slack_channel_private, + ["U22222222"], + ) + m1 = SlackChannelMembershipPrivate.objects.get( + channel=sample_slack_channel_private, user=sample_slack_user + ) + assert m1.is_deleted + m2 = SlackChannelMembershipPrivate.objects.get( + channel=sample_slack_channel_private, user=user2 + ) + assert not m2.is_deleted + + def test_sync_channel_memberships_private_noop_for_im( + self, sample_slack_channel_im, sample_slack_user + ): + """Member list sync does not persist rows for im.""" + sync_channel_memberships_private( + sample_slack_channel_im, + [sample_slack_user.slack_user_id], + ) + assert not SlackChannelMembershipPrivate.objects.filter( + channel=sample_slack_channel_im + ).exists() diff --git a/cppa_slack_tracker/user_tokens.py b/cppa_slack_tracker/user_tokens.py new file mode 100644 index 00000000..858ed3bc --- /dev/null +++ b/cppa_slack_tracker/user_tokens.py @@ -0,0 +1,67 @@ +""" +Load Slack user OAuth tokens written by slack_oauth_server (credential file). + +Same path as [slack_oauth_server](slack_oauth_server.py): optional SLACK_USER_TOKENS_PATH, +else BASE_DIR/credential/slack_user_tokens.json. +""" + +from __future__ import annotations + +import json +import logging +import os +from pathlib import Path +from typing import Iterator + +logger = logging.getLogger(__name__) + + +def slack_user_tokens_path() -> Path: + """Absolute path to slack_user_tokens.json.""" + override = (os.environ.get("SLACK_USER_TOKENS_PATH") or "").strip() + if override: + return Path(override) + from django.conf import settings + + return Path(settings.BASE_DIR) / "credential" / "slack_user_tokens.json" + + +def load_slack_user_tokens() -> dict[str, dict]: + """ + Return the raw JSON object: keys are ``team_id:user_id`` (or ``user_id``); + values include user_id, team_id, access_token. + """ + path = slack_user_tokens_path() + if not path.exists(): + return {} + try: + with path.open(encoding="utf-8") as f: + data = json.load(f) + except (OSError, json.JSONDecodeError) as exc: + logger.warning("Could not load Slack user tokens from %s: %s", path, exc) + return {} + if not isinstance(data, dict): + return {} + return data # type: ignore[return-value] + + +def iter_user_tokens_for_team(team_id: str) -> Iterator[tuple[str, str]]: + """ + Yield (slack_user_id, access_token) for rows whose team_id matches ``team_id``. + + Skips malformed entries or empty tokens. + """ + tid = (team_id or "").strip() + if not tid: + return + for _key, row in load_slack_user_tokens().items(): + if not isinstance(row, dict): + continue + uid = (row.get("user_id") or "").strip() + token = (row.get("access_token") or "").strip() + row_team = (row.get("team_id") or "").strip() + if not uid or not token: + continue + if row_team != tid: + continue + yield (uid, token) diff --git a/requirements-dev.txt b/requirements-dev.txt index 0b2f596c..c12e0507 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,6 +1,9 @@ # Include production dependencies -r requirements.txt +# slack_oauth_server (python -m cppa_slack_tracker.slack_oauth_server): httpx, starlette, +# uvicorn, python-dotenv are listed in requirements.txt — pip install -r requirements-dev.txt + # Test stack (see docs/testing or project testing summary) pytest>=7.4 pytest-django>=4.5 diff --git a/requirements.txt b/requirements.txt index 34f77e77..5d66be51 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,6 +24,12 @@ slack-bolt>=1.18 pytz>=2024.1 selenium>=4.35 +# cppa_slack_tracker slack_oauth_server (user OAuth; python -m cppa_slack_tracker.slack_oauth_server) +httpx>=0.27 +python-dotenv>=1.0 +starlette>=0.47.2 +uvicorn[standard]>=0.30 + # cppa_youtube_script_tracker app (YouTube Data API v3 + VTT transcript download) google-api-python-client>=2.100 yt-dlp==2026.2.4