From f1d2ef5e68694cf52c2f921844e7553974947a12 Mon Sep 17 00:00:00 2001 From: Jayaram Kandukuri Date: Sat, 28 Feb 2026 16:08:44 +0530 Subject: [PATCH] =?UTF-8?q?Completed=20backend=20intern=20assignment=20inc?= =?UTF-8?q?luding=20conversation=20summaries,=20PostgreSQL=20migration,=20?= =?UTF-8?q?scheduled=20cleanup,=20and=20file=20upload/list/delete=20APIs?= =?UTF-8?q?=20with=20duplicate=20prevention=20=E2=80=93jayaramknss@gmail.c?= =?UTF-8?q?om?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/backend/__init__.py | 3 + backend/backend/celery.py | 27 +++ backend/backend/settings.py | 28 ++- backend/chat/admin.py | 28 ++- backend/chat/apps.py | 3 + .../commands/cleanup_conversations.py | 68 ++++++ ...rmission_uploadedfile_userrole_and_more.py | 162 +++++++++++++ backend/chat/models.py | 217 +++++++++++++++++- backend/chat/serializers.py | 95 ++++++++ backend/chat/signals.py | 31 +++ backend/chat/summary_service.py | 127 ++++++++++ backend/chat/tasks.py | 45 ++++ backend/chat/urls.py | 11 + backend/chat/views.py | 76 +++++- backend/requirements.txt | 79 +++++++ 15 files changed, 988 insertions(+), 12 deletions(-) create mode 100644 backend/backend/celery.py create mode 100644 backend/chat/management/commands/cleanup_conversations.py create mode 100644 backend/chat/migrations/0002_activitylog_filepermission_uploadedfile_userrole_and_more.py create mode 100644 backend/chat/signals.py create mode 100644 backend/chat/summary_service.py create mode 100644 backend/chat/tasks.py create mode 100644 backend/requirements.txt diff --git a/backend/backend/__init__.py b/backend/backend/__init__.py index e69de29bb..9e0d95fd7 100644 --- a/backend/backend/__init__.py +++ b/backend/backend/__init__.py @@ -0,0 +1,3 @@ +from .celery import app as celery_app + +__all__ = ('celery_app',) \ No newline at end of file diff --git a/backend/backend/celery.py b/backend/backend/celery.py new file mode 100644 index 000000000..70bdb1623 --- /dev/null +++ b/backend/backend/celery.py @@ -0,0 +1,27 @@ +import os +from celery import Celery +from celery.schedules import crontab + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'backend.settings') + + +app = Celery('backend') +app.config_from_object('django.conf:settings', namespace='CELERY') +app.autodiscover_tasks() + +# Celery Beat Schedule +app.conf.beat_schedule = { + 'cleanup-old-conversations': { + 'task': 'chat.tasks.cleanup_old_conversations', + 'schedule': crontab(hour=2, minute=0), # Daily at 2 AM + 'args': (30,) # Delete conversations older than 30 days + }, + 'generate-missing-summaries': { + 'task': 'chat.tasks.generate_missing_summaries', + 'schedule': crontab(hour=3, minute=0), # Daily at 3 AM + }, +} + +@app.task(bind=True) +def debug_task(self): + print(f'Request: {self.request!r}') \ No newline at end of file diff --git a/backend/backend/settings.py b/backend/backend/settings.py index 9de4f024a..50b0b30e7 100644 --- a/backend/backend/settings.py +++ b/backend/backend/settings.py @@ -15,17 +15,19 @@ from dotenv import load_dotenv -load_dotenv() + # Build paths inside the project like this: BASE_DIR / 'subdir'. BASE_DIR = Path(__file__).resolve().parent.parent +load_dotenv(os.path.join(BASE_DIR, ".env")) # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/4.2/howto/deployment/checklist/ # SECURITY WARNING: keep the secret key used in production secret! -SECRET_KEY = os.environ["DJANGO_SECRET_KEY"] -FRONTEND_URL = os.environ["FRONTEND_URL"] +SECRET_KEY = os.environ.get("DJANGO_SECRET_KEY", "fallback-secret-key") +print("SECRET:", os.environ.get("DJANGO_SECRET_KEY")) +FRONTEND_URL = os.environ.get("FRONTEND_URL", "http://localhost:3000") # SECURITY WARNING: don't run with debug turned on in production! DEBUG = True @@ -85,9 +87,13 @@ # https://docs.djangoproject.com/en/4.2/ref/settings/#databases DATABASES = { - "default": { - "ENGINE": "django.db.backends.sqlite3", - "NAME": BASE_DIR / "db.sqlite3", + 'default': { + 'ENGINE': 'django.db.backends.postgresql', + 'NAME': 'chatgpt_db', + 'USER': 'chatgpt_user', + 'PASSWORD': 'chatgpt_password', + 'HOST': 'localhost', + 'PORT': '5432', } } @@ -149,3 +155,13 @@ SESSION_COOKIE_SECURE = True CSRF_COOKIE_SECURE = True CSRF_COOKIE_SAMESITE = "None" + +# Celery Configuration +CELERY_BROKER_URL = os.getenv('CELERY_BROKER_URL', 'redis://localhost:6379/0') +CELERY_RESULT_BACKEND = os.getenv('CELERY_RESULT_BACKEND', 'redis://localhost:6379/0') +CELERY_ACCEPT_CONTENT = ['json'] +CELERY_TASK_SERIALIZER = 'json' +CELERY_RESULT_SERIALIZER = 'json' +CELERY_TIMEZONE = 'UTC' +CELERY_TASK_TRACK_STARTED = True +CELERY_TASK_TIME_LIMIT = 30 * 60 # 30 minutes \ No newline at end of file diff --git a/backend/chat/admin.py b/backend/chat/admin.py index a4e7d15fc..2917f9875 100644 --- a/backend/chat/admin.py +++ b/backend/chat/admin.py @@ -1,7 +1,7 @@ from django.contrib import admin from django.utils import timezone from nested_admin.nested import NestedModelAdmin, NestedStackedInline, NestedTabularInline - +from django.utils.html import format_html from chat.models import Conversation, Message, Role, Version @@ -51,8 +51,8 @@ def queryset(self, request, queryset): class ConversationAdmin(NestedModelAdmin): actions = ["undelete_selected", "soft_delete_selected"] inlines = [VersionInline] - list_display = ("title", "id", "created_at", "modified_at", "deleted_at", "version_count", "is_deleted", "user") - list_filter = (DeletedListFilter,) + list_display = ("title", "id", "created_at", "modified_at", "deleted_at", "version_count", "is_deleted", "has_summary", "summary_status", "user") + list_filter = (DeletedListFilter,"is_summary_stale",) ordering = ("-modified_at",) def undelete_selected(self, request, queryset): @@ -79,7 +79,27 @@ def is_deleted(self, obj): is_deleted.boolean = True is_deleted.short_description = "Deleted?" - + def has_summary(self, obj): + """Display if conversation has summary""" + if obj.summary: + return format_html( + '✓ Has Summary' + ) + return format_html( + '✗ No Summary' + ) + has_summary.short_description = "Summary Status" + + def summary_status(self, obj): + """Show if summary is stale""" + if obj.is_summary_stale: + return format_html( + 'Stale' + ) + return format_html( + 'Current' + ) + summary_status.short_description = "Summary Freshness" class VersionAdmin(NestedModelAdmin): inlines = [MessageInline] diff --git a/backend/chat/apps.py b/backend/chat/apps.py index 5f75238d2..77de7129e 100644 --- a/backend/chat/apps.py +++ b/backend/chat/apps.py @@ -4,3 +4,6 @@ class ChatConfig(AppConfig): default_auto_field = "django.db.models.BigAutoField" name = "chat" + + def ready(self): + import chat.signals \ No newline at end of file diff --git a/backend/chat/management/commands/cleanup_conversations.py b/backend/chat/management/commands/cleanup_conversations.py new file mode 100644 index 000000000..2000cf848 --- /dev/null +++ b/backend/chat/management/commands/cleanup_conversations.py @@ -0,0 +1,68 @@ +from django.core.management.base import BaseCommand +from django.utils.timezone import now +from django.db.models import Count +from datetime import timedelta +from chat.models import Conversation +import logging + +logger = logging.getLogger(__name__) + +class Command(BaseCommand): + help = 'Clean up old conversations based on age' + + def add_arguments(self, parser): + parser.add_argument( + '--days', + type=int, + default=30, + help='Delete conversations older than specified days (default: 30)' + ) + parser.add_argument( + '--dry-run', + action='store_true', + help='Show what would be deleted without actually deleting' + ) + parser.add_argument( + '--user', + type=str, + help='Only delete conversations for specific user' + ) + + def handle(self, *args, **options): + days = options['days'] + dry_run = options['dry_run'] + user_filter = options.get('user') + + cutoff_date = now() - timedelta(days=days) + queryset = Conversation.objects.filter(created_at__lt=cutoff_date) + + if user_filter: + queryset = queryset.filter(user__username=user_filter) + + count = queryset.count() + + if count == 0: + self.stdout.write( + self.style.WARNING('No conversations found matching criteria') + ) + return + + self.stdout.write(f"Conversations to delete: {count}\n") + + if dry_run: + self.stdout.write(self.style.WARNING('DRY RUN - No deletions made\n')) + for conv in queryset[:5]: + self.stdout.write( + f" - {conv.title} ({conv.messages.count()} messages)" + ) + return + + confirm = input(f"Delete {count} conversations? (yes/no): ") + + if confirm.lower() == 'yes': + deleted_count = queryset.delete()[0] + self.stdout.write( + self.style.SUCCESS(f'✓ Deleted {deleted_count} conversations') + ) + else: + self.stdout.write(self.style.WARNING('Deletion cancelled')) \ No newline at end of file diff --git a/backend/chat/migrations/0002_activitylog_filepermission_uploadedfile_userrole_and_more.py b/backend/chat/migrations/0002_activitylog_filepermission_uploadedfile_userrole_and_more.py new file mode 100644 index 000000000..99c92bf53 --- /dev/null +++ b/backend/chat/migrations/0002_activitylog_filepermission_uploadedfile_userrole_and_more.py @@ -0,0 +1,162 @@ +# Generated by Django 5.2.11 on 2026-02-28 06:51 + +import django.db.models.deletion +import uuid +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('chat', '0001_initial'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name='ActivityLog', + fields=[ + ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)), + ('action', models.CharField(choices=[('file_upload', 'File Upload'), ('file_delete', 'File Delete'), ('file_access', 'File Access'), ('conversation_create', 'Conversation Create'), ('conversation_delete', 'Conversation Delete'), ('conversation_edit', 'Conversation Edit'), ('message_send', 'Message Send'), ('summary_generate', 'Summary Generate'), ('summary_regenerate', 'Summary Regenerate')], max_length=50)), + ('resource_type', models.CharField(max_length=50)), + ('resource_id', models.CharField(blank=True, max_length=100, null=True)), + ('details', models.JSONField(blank=True, default=dict)), + ('ip_address', models.GenericIPAddressField(blank=True, null=True)), + ('user_agent', models.TextField(blank=True)), + ('status', models.CharField(choices=[('success', 'Success'), ('failed', 'Failed')], default='success', max_length=20)), + ('timestamp', models.DateTimeField(auto_now_add=True, db_index=True)), + ], + options={ + 'ordering': ['-timestamp'], + }, + ), + migrations.CreateModel( + name='FilePermission', + fields=[ + ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)), + ('permission', models.CharField(choices=[('view', 'View'), ('upload', 'Upload'), ('delete', 'Delete'), ('share', 'Share')], max_length=20)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ], + ), + migrations.CreateModel( + name='UploadedFile', + fields=[ + ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)), + ('file', models.FileField(upload_to='uploads/%Y/%m/%d/')), + ('filename', models.CharField(max_length=255)), + ('file_size', models.BigIntegerField()), + ('file_type', models.CharField(max_length=50)), + ('file_hash', models.CharField(max_length=64, unique=True)), + ('status', models.CharField(choices=[('pending', 'Pending'), ('processing', 'Processing'), ('completed', 'Completed'), ('failed', 'Failed')], default='pending', max_length=20)), + ('uploaded_at', models.DateTimeField(auto_now_add=True)), + ('processed_at', models.DateTimeField(blank=True, null=True)), + ('error_message', models.TextField(blank=True, null=True)), + ('mime_type', models.CharField(blank=True, max_length=100, null=True)), + ('page_count', models.IntegerField(blank=True, null=True)), + ('is_indexed', models.BooleanField(default=False)), + ], + options={ + 'ordering': ['-uploaded_at'], + }, + ), + migrations.CreateModel( + name='UserRole', + fields=[ + ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)), + ('role', models.CharField(choices=[('user', 'Regular User'), ('moderator', 'Moderator'), ('admin', 'Administrator')], default='user', max_length=20)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ], + ), + migrations.AlterModelOptions( + name='conversation', + options={'ordering': ['-created_at']}, + ), + migrations.AddField( + model_name='conversation', + name='is_summary_stale', + field=models.BooleanField(default=False, help_text='Indicates if summary needs to be regenerated'), + ), + migrations.AddField( + model_name='conversation', + name='summary', + field=models.TextField(blank=True, help_text='Auto-generated summary of the conversation', null=True), + ), + migrations.AddField( + model_name='conversation', + name='summary_generated_at', + field=models.DateTimeField(blank=True, help_text='Timestamp when summary was generated', null=True), + ), + migrations.AddIndex( + model_name='conversation', + index=models.Index(fields=['user', '-created_at'], name='chat_conver_user_id_49b34c_idx'), + ), + migrations.AddIndex( + model_name='conversation', + index=models.Index(fields=['is_summary_stale'], name='chat_conver_is_summ_c1de9d_idx'), + ), + migrations.AddField( + model_name='activitylog', + name='user', + field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='activity_logs', to=settings.AUTH_USER_MODEL), + ), + migrations.AddField( + model_name='filepermission', + name='granted_by', + field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='granted_permissions', to=settings.AUTH_USER_MODEL), + ), + migrations.AddField( + model_name='filepermission', + name='user', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='file_permissions', to=settings.AUTH_USER_MODEL), + ), + migrations.AddField( + model_name='uploadedfile', + name='conversation', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='files', to='chat.conversation'), + ), + migrations.AddField( + model_name='uploadedfile', + name='user', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='uploaded_files', to=settings.AUTH_USER_MODEL), + ), + migrations.AddField( + model_name='filepermission', + name='file', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='permissions', to='chat.uploadedfile'), + ), + migrations.AddField( + model_name='userrole', + name='user', + field=models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='role_profile', to=settings.AUTH_USER_MODEL), + ), + migrations.AddIndex( + model_name='activitylog', + index=models.Index(fields=['user', '-timestamp'], name='chat_activi_user_id_a7ffef_idx'), + ), + migrations.AddIndex( + model_name='activitylog', + index=models.Index(fields=['action', '-timestamp'], name='chat_activi_action_c91d2c_idx'), + ), + migrations.AddIndex( + model_name='activitylog', + index=models.Index(fields=['resource_type', 'resource_id'], name='chat_activi_resourc_9d4685_idx'), + ), + migrations.AddIndex( + model_name='uploadedfile', + index=models.Index(fields=['user', '-uploaded_at'], name='chat_upload_user_id_618957_idx'), + ), + migrations.AddIndex( + model_name='uploadedfile', + index=models.Index(fields=['file_hash'], name='chat_upload_file_ha_43c6d5_idx'), + ), + migrations.AddIndex( + model_name='uploadedfile', + index=models.Index(fields=['status'], name='chat_upload_status_97172d_idx'), + ), + migrations.AlterUniqueTogether( + name='filepermission', + unique_together={('user', 'file', 'permission')}, + ), + ] diff --git a/backend/chat/models.py b/backend/chat/models.py index 242788f14..be41820a8 100644 --- a/backend/chat/models.py +++ b/backend/chat/models.py @@ -1,5 +1,5 @@ import uuid - +import hashlib from django.db import models from authentication.models import CustomUser @@ -22,7 +22,28 @@ class Conversation(models.Model): ) deleted_at = models.DateTimeField(null=True, blank=True) user = models.ForeignKey(CustomUser, on_delete=models.CASCADE) + # TASK 1: Summary fields + summary = models.TextField( + null=True, + blank=True, + help_text="Auto-generated summary of the conversation" + ) + summary_generated_at = models.DateTimeField( + null=True, + blank=True, + help_text="Timestamp when summary was generated" + ) + is_summary_stale = models.BooleanField( + default=False, + help_text="Indicates if summary needs to be regenerated" + ) + class Meta: + ordering = ['-created_at'] + indexes = [ + models.Index(fields=['user', '-created_at']), + models.Index(fields=['is_summary_stale']), + ] def __str__(self): return self.title @@ -63,3 +84,197 @@ def save(self, *args, **kwargs): def __str__(self): return f"{self.role}: {self.content[:20]}..." + + +# TASK 3: File Upload Model +class UploadedFile(models.Model): + """ + Model for storing uploaded files with metadata. + TASK 3: File upload with duplicate detection + """ + STATUS_CHOICES = [ + ('pending', 'Pending'), + ('processing', 'Processing'), + ('completed', 'Completed'), + ('failed', 'Failed'), + ] + + id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) + user = models.ForeignKey( + CustomUser, + on_delete=models.CASCADE, + related_name='uploaded_files' + ) + conversation = models.ForeignKey( + Conversation, + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name='files' + ) + + file = models.FileField(upload_to='uploads/%Y/%m/%d/') + filename = models.CharField(max_length=255) + file_size = models.BigIntegerField() # in bytes + file_type = models.CharField(max_length=50) # e.g., 'pdf', 'docx' + file_hash = models.CharField(max_length=64, unique=True) # SHA256 + + status = models.CharField( + max_length=20, + choices=STATUS_CHOICES, + default='pending' + ) + + uploaded_at = models.DateTimeField(auto_now_add=True) + processed_at = models.DateTimeField(null=True, blank=True) + error_message = models.TextField(null=True, blank=True) + + # Metadata + mime_type = models.CharField(max_length=100, null=True, blank=True) + page_count = models.IntegerField(null=True, blank=True) # For PDFs + is_indexed = models.BooleanField(default=False) # For RAG indexing + + class Meta: + ordering = ['-uploaded_at'] + indexes = [ + models.Index(fields=['user', '-uploaded_at']), + models.Index(fields=['file_hash']), + models.Index(fields=['status']), + ] + + def __str__(self): + return f"{self.filename} ({self.user.username})" + + @staticmethod + def calculate_file_hash(file_obj): + """Calculate SHA256 hash of file""" + hash_sha256 = hashlib.sha256() + for chunk in file_obj.chunks(): + hash_sha256.update(chunk) + file_obj.seek(0) # Reset file pointer + return hash_sha256.hexdigest() + + def save(self, *args, **kwargs): + if self.file and not self.file_hash: + self.file_hash = self.calculate_file_hash(self.file) + super().save(*args, **kwargs) + + +# TASK 4: Activity Logging Model +class ActivityLog(models.Model): + """ + Model for logging all user activities. + TASK 4: Activity logging and auditing + """ + ACTION_CHOICES = [ + ('file_upload', 'File Upload'), + ('file_delete', 'File Delete'), + ('file_access', 'File Access'), + ('conversation_create', 'Conversation Create'), + ('conversation_delete', 'Conversation Delete'), + ('conversation_edit', 'Conversation Edit'), + ('message_send', 'Message Send'), + ('summary_generate', 'Summary Generate'), + ('summary_regenerate', 'Summary Regenerate'), + ] + + id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) + user = models.ForeignKey( + CustomUser, + on_delete=models.SET_NULL, + null=True, + related_name='activity_logs' + ) + action = models.CharField(max_length=50, choices=ACTION_CHOICES) + resource_type = models.CharField(max_length=50) # 'file', 'conversation', etc. + resource_id = models.CharField(max_length=100, null=True, blank=True) + details = models.JSONField(default=dict, blank=True) # Extra context + ip_address = models.GenericIPAddressField(null=True, blank=True) + user_agent = models.TextField(blank=True) + status = models.CharField( + max_length=20, + default='success', + choices=[('success', 'Success'), ('failed', 'Failed')] + ) + timestamp = models.DateTimeField(auto_now_add=True, db_index=True) + + class Meta: + ordering = ['-timestamp'] + indexes = [ + models.Index(fields=['user', '-timestamp']), + models.Index(fields=['action', '-timestamp']), + models.Index(fields=['resource_type', 'resource_id']), + ] + + def __str__(self): + return f"{self.user} - {self.action} at {self.timestamp}" + + +# TASK 4: User Role Model for RBAC +class UserRole(models.Model): + """ + Model for user roles - used for role-based access control. + TASK 4: Role-based access control + """ + ROLE_CHOICES = [ + ('user', 'Regular User'), + ('moderator', 'Moderator'), + ('admin', 'Administrator'), + ] + + id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) + user = models.OneToOneField( + CustomUser, + on_delete=models.CASCADE, + related_name='role_profile' + ) + role = models.CharField( + max_length=20, + choices=ROLE_CHOICES, + default='user' + ) + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + def __str__(self): + return f"{self.user.username} - {self.get_role_display()}" + + +# TASK 4: File Permission Model for Granular Control +class FilePermission(models.Model): + """ + Model for granular file permissions. + TASK 4: Role-based access control + """ + PERMISSION_CHOICES = [ + ('view', 'View'), + ('upload', 'Upload'), + ('delete', 'Delete'), + ('share', 'Share'), + ] + + id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) + user = models.ForeignKey( + CustomUser, + on_delete=models.CASCADE, + related_name='file_permissions' + ) + file = models.ForeignKey( + UploadedFile, + on_delete=models.CASCADE, + related_name='permissions' + ) + permission = models.CharField(max_length=20, choices=PERMISSION_CHOICES) + granted_by = models.ForeignKey( + CustomUser, + on_delete=models.SET_NULL, + null=True, + related_name='granted_permissions' + ) + created_at = models.DateTimeField(auto_now_add=True) + + class Meta: + unique_together = ('user', 'file', 'permission') + + def __str__(self): + return f"{self.user.username} - {self.permission} on {self.file.filename}" diff --git a/backend/chat/serializers.py b/backend/chat/serializers.py index 0c721c061..18cbde33e 100644 --- a/backend/chat/serializers.py +++ b/backend/chat/serializers.py @@ -4,6 +4,9 @@ from chat.models import Conversation, Message, Role, Version +# file upload related serializer +from chat.models import UploadedFile + def should_serialize(validated_data, field_name) -> bool: if validated_data.get(field_name) is not None: @@ -150,3 +153,95 @@ def update(self, instance, validated_data): version_serializer.save(conversation=instance) return instance + + +class ConversationSummarySerializer(serializers.ModelSerializer): + message_count = serializers.SerializerMethodField() + + class Meta: + model = Conversation + fields = [ + 'id', + 'title', + 'summary', + 'summary_generated_at', + 'is_summary_stale', + 'message_count', + 'created_at', + ] + + def get_message_count(self, obj): + from chat.models import Message + return Message.objects.filter( + version__conversation=obj + ).count() + + +class UploadedFileSerializer(serializers.ModelSerializer): + """Serializer for UploadedFile model. + + When a file is provided we calculate its hash and check for duplicates so + the same file cannot be uploaded more than once. Some metadata is also + populated automatically (filename, size, type, owner). + """ + + class Meta: + model = UploadedFile + # expose all relevant metadata fields so the frontend can render them + fields = [ + "id", + "user", + "conversation", + "file", + "filename", + "file_size", + "file_type", + "file_hash", + "status", + "uploaded_at", + "processed_at", + "error_message", + "mime_type", + "page_count", + "is_indexed", + ] + read_only_fields = [ + "id", + "user", + "file_hash", + "status", + "uploaded_at", + "processed_at", + "error_message", + "mime_type", + "page_count", + "is_indexed", + ] + + def validate_file(self, value): + """Compute SHA256 of the incoming file and reject duplicates.""" + # calculate and then rewind pointer so the same file object can still be + # saved by Django's storage backend + file_hash = UploadedFile.calculate_file_hash(value) + value.seek(0) + if UploadedFile.objects.filter(file_hash=file_hash).exists(): + raise serializers.ValidationError("A file with the same content has already been uploaded.") + return value + + def create(self, validated_data): + # automatically populate some meta fields that the client doesn't need + request = self.context.get("request") + if request and hasattr(request, "user"): + validated_data["user"] = request.user + + file_obj = validated_data.get("file") + if file_obj: + validated_data["filename"] = file_obj.name + validated_data["file_size"] = file_obj.size + # simple type extraction based on extension + if "." in file_obj.name: + validated_data["file_type"] = file_obj.name.rsplit('.', 1)[1] + else: + validated_data["file_type"] = "" + + return super().create(validated_data) \ No newline at end of file diff --git a/backend/chat/signals.py b/backend/chat/signals.py new file mode 100644 index 000000000..a5e7cc47d --- /dev/null +++ b/backend/chat/signals.py @@ -0,0 +1,31 @@ +from django.db.models.signals import post_save, m2m_changed +from django.dispatch import receiver +from django.core.cache import cache +from chat.models import Conversation, Message +from chat.summary_service import ConversationSummaryService +import logging + +logger = logging.getLogger(__name__) + +@receiver(post_save, sender=Message) +def regenerate_summary_on_message_save(sender, instance, created, **kwargs): + """Generate summary after new message is added""" + if created and instance.conversation: + conversation = instance.conversation + + # Only regenerate if we have enough messages + if conversation.messages.count() >= ConversationSummaryService.MIN_MESSAGES_FOR_SUMMARY: + ConversationSummaryService.update_conversation_summary(conversation) + +@receiver(post_save, sender=Conversation) +def handle_conversation_save(sender, instance, created, **kwargs): + """Handle conversation creation""" + if created: + logger.info(f"New conversation created: {instance.id}") + # Initialize summary as stale so it's generated later + instance.is_summary_stale = True + Conversation.objects.filter(id=instance.id).update(is_summary_stale=True) + +# Connect signals in apps.py +def ready(): + import chat.signals \ No newline at end of file diff --git a/backend/chat/summary_service.py b/backend/chat/summary_service.py new file mode 100644 index 000000000..9bff05301 --- /dev/null +++ b/backend/chat/summary_service.py @@ -0,0 +1,127 @@ +import logging +import json +from typing import Optional +from datetime import timedelta +from django.utils.timezone import now +from django.core.cache import cache +from src.libs import openai + +logger = logging.getLogger(__name__) + +class ConversationSummaryService: + """Service for generating and managing conversation summaries""" + + MAX_TOKENS = 200 + SUMMARY_CACHE_TIMEOUT = 3600 # 1 hour + MIN_MESSAGES_FOR_SUMMARY = 3 + + @staticmethod + def generate_summary(conversation) -> Optional[str]: + """ + Generate summary for a conversation using OpenAI + + Args: + conversation: Conversation instance + + Returns: + Generated summary string or None + """ + try: + # Check if conversation has enough messages + message_count = conversation.messages.count() + if message_count < ConversationSummaryService.MIN_MESSAGES_FOR_SUMMARY: + logger.info(f"Conversation {conversation.id} has insufficient messages for summary") + return None + + # Fetch all messages for context + messages = conversation.messages.all().order_by('created_at') + + # Build context from messages + context = "\n".join([ + f"{'User' if msg.role.name == 'user' else 'Assistant'}: {msg.content[:200]}" + for msg in messages[:10] # Limit to last 10 messages + ]) + + # Call OpenAI API + response = openai.ChatCompletion.create( + engine="gpt-35-turbo", # Azure deployment name + messages=[ + { + "role": "system", + "content": "Provide a concise 2-3 sentence summary of the conversation." + }, + { + "role": "user", + "content": f"Conversation to summarize:\n\n{context}" + } + ], + max_tokens=ConversationSummaryService.MAX_TOKENS, + temperature=0.3 + ) + + summary = response['choices'][0]['message']['content'].strip() + logger.info(f"Successfully generated summary for conversation {conversation.id}") + return summary + + except openai.error.OpenAIError as e: + logger.error(f"OpenAI API error while generating summary: {str(e)}") + return None + except Exception as e: + logger.error(f"Unexpected error in summary generation: {str(e)}") + return None + + @staticmethod + def update_conversation_summary(conversation) -> bool: + """ + Update conversation with generated summary + + Args: + conversation: Conversation instance + + Returns: + True if successful, False otherwise + """ + try: + summary = ConversationSummaryService.generate_summary(conversation) + + if summary: + conversation.summary = summary + conversation.summary_generated_at = now() + conversation.is_summary_stale = False + conversation.save(update_fields=['summary', 'summary_generated_at', 'is_summary_stale']) + + # Cache the summary + cache.set( + f"conversation_summary_{conversation.id}", + summary, + ConversationSummaryService.SUMMARY_CACHE_TIMEOUT + ) + return True + return False + + except Exception as e: + logger.error(f"Error updating conversation summary: {str(e)}") + return False + + @staticmethod + def get_cached_summary(conversation) -> Optional[str]: + """Retrieve summary from cache or database""" + cache_key = f"conversation_summary_{conversation.id}" + cached = cache.get(cache_key) + + if cached: + return cached + + if conversation.summary: + cache.set(cache_key, conversation.summary, + ConversationSummaryService.SUMMARY_CACHE_TIMEOUT) + return conversation.summary + + return None + + @staticmethod + def mark_summary_stale(conversation): + """Mark summary as needing regeneration (e.g., when conversation is edited)""" + conversation.is_summary_stale = True + conversation.save(update_fields=['is_summary_stale']) + cache.delete(f"conversation_summary_{conversation.id}") \ No newline at end of file diff --git a/backend/chat/tasks.py b/backend/chat/tasks.py new file mode 100644 index 000000000..a6dad590f --- /dev/null +++ b/backend/chat/tasks.py @@ -0,0 +1,45 @@ +from celery import shared_task +from django.utils.timezone import now +from datetime import timedelta +from chat.models import Conversation +from chat.summary_service import ConversationSummaryService +import logging + +logger = logging.getLogger(__name__) + +@shared_task(name='chat.tasks.cleanup_old_conversations') +def cleanup_old_conversations(days=30): + """Clean up conversations older than specified days""" + cutoff_date = now() - timedelta(days=days) + deleted_count, _ = Conversation.objects.filter( + created_at__lt=cutoff_date + ).delete() + + logger.info(f"Cleanup task: Deleted {deleted_count} conversations") + return {'deleted': deleted_count} + +@shared_task(name='chat.tasks.generate_missing_summaries') +def generate_missing_summaries(): + """Generate summaries for conversations that don't have one""" + conversations = Conversation.objects.filter( + summary__isnull=True, + messages__isnull=False + ).distinct()[:50] # Limit to prevent overload + + count = 0 + for conversation in conversations: + if ConversationSummaryService.update_conversation_summary(conversation): + count += 1 + + logger.info(f"Generated summaries for {count} conversations") + return {'generated': count} + +@shared_task(name='chat.tasks.generate_conversation_summary_task') +def generate_conversation_summary_task(conversation_id): + """Generate summary for a specific conversation""" + try: + conversation = Conversation.objects.get(id=conversation_id) + return ConversationSummaryService.update_conversation_summary(conversation) + except Conversation.DoesNotExist: + logger.error(f"Conversation {conversation_id} not found") + return False \ No newline at end of file diff --git a/backend/chat/urls.py b/backend/chat/urls.py index bd8ceadc0..0bc207cd2 100644 --- a/backend/chat/urls.py +++ b/backend/chat/urls.py @@ -1,7 +1,9 @@ from django.urls import path +from rest_framework.routers import DefaultRouter from chat import views + urlpatterns = [ path("", views.chat_root_view, name="chat_root_view"), path("conversations/", views.get_conversations, name="get_conversations"), @@ -20,3 +22,12 @@ path("conversations//delete/", views.conversation_soft_delete, name="conversation_delete"), path("versions//add_message/", views.version_add_message, name="version_add_message"), ] + +# router for viewsets (summaries, file uploads) +router = DefaultRouter() +router.register(r"summaries", views.ConversationSummaryViewSet, basename="conversation-summary") +router.register(r"files", views.UploadedFileViewSet, basename="uploadedfile") + +# append router-generated urls at the end so they don't conflict with the custom +# function-based endpoints defined above +urlpatterns += router.urls diff --git a/backend/chat/views.py b/backend/chat/views.py index 0d18f7a69..2d63ec004 100644 --- a/backend/chat/views.py +++ b/backend/chat/views.py @@ -4,10 +4,19 @@ from rest_framework.decorators import api_view from rest_framework.response import Response -from chat.models import Conversation, Message, Version +from chat.models import Conversation, Message, Version, UploadedFile from chat.serializers import ConversationSerializer, MessageSerializer, TitleSerializer, VersionSerializer from chat.utils.branching import make_branched_conversation +from rest_framework.pagination import PageNumberPagination +from rest_framework import viewsets, permissions +from django.db.models import Q +from .models import Conversation +from .serializers import ConversationSummarySerializer, UploadedFileSerializer +class StandardPagination(PageNumberPagination): + page_size = 10 + page_size_query_param = "page_size" + max_page_size = 100 @api_view(["GET"]) def chat_root_view(request): @@ -230,3 +239,68 @@ def version_add_message(request, pk): status=status.HTTP_201_CREATED, ) return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) + + + +class ConversationSummaryViewSet(viewsets.ReadOnlyModelViewSet): + serializer_class = ConversationSummarySerializer + permission_classes = [permissions.IsAuthenticated] + pagination_class = StandardPagination + + def get_queryset(self): + queryset = Conversation.objects.filter( + user=self.request.user + ) + + has_summary = self.request.query_params.get("has_summary") + if has_summary == "true": + queryset = queryset.filter(summary__isnull=False) + elif has_summary == "false": + queryset = queryset.filter(summary__isnull=True) + + is_stale = self.request.query_params.get("is_stale") + if is_stale == "true": + queryset = queryset.filter(is_summary_stale=True) + elif is_stale == "false": + queryset = queryset.filter(is_summary_stale=False) + + search = self.request.query_params.get("search") + if search: + queryset = queryset.filter( + Q(title__icontains=search) | + Q(summary__icontains=search) + ) + + return queryset.order_by("-created_at") + + +class UploadedFileViewSet(viewsets.ModelViewSet): + """CRUD operations for user-uploaded files. + + * POST /files/ -> upload new file with duplicate prevention + * GET /files/ -> list user's files (pagination supported) + * GET /files/{pk}/ -> retrieve metadata for a single file + * DELETE /files/{pk}/ -> delete file record and remove file from storage + """ + + serializer_class = UploadedFileSerializer + permission_classes = [permissions.IsAuthenticated] + pagination_class = StandardPagination + + def get_queryset(self): + qs = UploadedFile.objects.filter(user=self.request.user) + # optional filtering by conversation + conv = self.request.query_params.get("conversation_id") + if conv: + qs = qs.filter(conversation__id=conv) + return qs.order_by("-uploaded_at") + + def perform_create(self, serializer): + # the serializer's create() method will attach the user and metadata + serializer.save() + + def destroy(self, request, *args, **kwargs): + # remove the actual file from disk/storage before deleting the DB record + instance = self.get_object() + instance.file.delete(save=False) + return super().destroy(request, *args, **kwargs) \ No newline at end of file diff --git a/backend/requirements.txt b/backend/requirements.txt new file mode 100644 index 000000000..cdddc9863 --- /dev/null +++ b/backend/requirements.txt @@ -0,0 +1,79 @@ +# ========================= +# Core Django & REST +# ========================= +Django==4.2.7 +djangorestframework==3.14.0 +django-cors-headers==4.3.1 +django-filter==23.4 +django-extensions==3.2.3 + +# ========================= +# Database (PostgreSQL) +# ========================= +psycopg2-binary==2.9.9 + +# ========================= +# Caching & Background Tasks +# ========================= +redis==5.0.1 +django-redis==5.4.0 +celery==5.3.4 +django-celery-beat==2.5.0 +django-celery-results==2.5.1 + +# ========================= +# Authentication & API Docs +# ========================= +djangorestframework-simplejwt==5.4.0 +drf-spectacular==0.26.5 +PyJWT==2.8.1 + +# ========================= +# File Handling +# ========================= +pillow==10.1.0 + +# ========================= +# AI / Summary Generation +# ========================= +openai==0.27.8 + +# ========================= +# Utilities +# ========================= +python-dotenv==1.0.0 +python-decouple==3.8 +python-dateutil==2.8.2 +requests==2.31.0 + +# ========================= +# Logging & Monitoring +# ========================= +sentry-sdk==1.38.0 + +# ========================= +# Security +# ========================= +cryptography==41.0.7 + +# ========================= +# Testing +# ========================= +pytest==7.4.3 +pytest-django==4.7.0 +pytest-cov==4.1.0 +factory-boy==3.3.0 +faker==21.0.0 + +# ========================= +# Code Quality +# ========================= +black==23.12.0 +flake8==6.1.0 +isort==5.13.2 + +# ========================= +# Production Server +# ========================= +gunicorn==21.2.0 +uvicorn==0.24.0 \ No newline at end of file