From 17dee90790ae6bb6b0933181d2bd0105168eb629 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 6 Feb 2026 11:50:11 +0000 Subject: [PATCH 1/6] Initial plan From 798be7d99c694243819207e5bc53b8a0e7a47353 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 6 Feb 2026 11:52:59 +0000 Subject: [PATCH 2/6] Add task_migrate_institution_history_to_raw_institution to journal/tasks.py Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- journal/tasks.py | 146 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 145 insertions(+), 1 deletion(-) diff --git a/journal/tasks.py b/journal/tasks.py index 974b0157..c5a5907a 100644 --- a/journal/tasks.py +++ b/journal/tasks.py @@ -633,4 +633,148 @@ def task_export_journal_to_articlemeta( ) # Re-raise para que o Celery possa tratar a exceção adequadamente - raise \ No newline at end of file + raise + +@celery_app.task(bind=True, name="task_migrate_institution_history_to_raw_institution") +def task_migrate_institution_history_to_raw_institution( + self, + username=None, + user_id=None, + collection_acron_list=None, + journal_issns=None, +): + """ + Task to migrate Institution data to RawOrganization for a given list of collections. + + This task migrates data from Institution to RawOrganization for PublisherHistory, + OwnerHistory, SponsorHistory, and CopyrightHolderHistory records that have + institution != None. + + Args: + username: User name for authentication + user_id: User ID for authentication + collection_acron_list: List of collection acronyms to filter journals (e.g., ['scl', 'mex']) + journal_issns: Optional list of journal ISSNs to filter journals + + Returns: + Dict with processing statistics including: + - total_journals: Number of journals processed + - migrated_publishers: Total publisher history records migrated + - migrated_owners: Total owner history records migrated + - migrated_sponsors: Total sponsor history records migrated + - migrated_copyright_holders: Total copyright holder history records migrated + - error_count: Number of errors encountered + """ + user = _get_user(self.request, username=username, user_id=user_id) + + try: + # Build query parameters + params = {} + + # Filter by collection acronyms if provided + if collection_acron_list: + params["scielojournal__collection__acron3__in"] = collection_acron_list + + # Filter by journal ISSNs if provided + if journal_issns: + params["scielojournal__issn_scielo__in"] = journal_issns + + # Initialize counters + total_journals = 0 + migrated_publishers = 0 + migrated_owners = 0 + migrated_sponsors = 0 + migrated_copyright_holders = 0 + error_count = 0 + + # Get journals matching the filters + journals = Journal.objects.filter(**params).distinct() + + for journal in journals.iterator(): + try: + # Check and migrate publisher_history records with institution != None + if journal.publisher_history.filter(institution__isnull=False).exists(): + migrated_items = journal.migrate_publisher_history_to_raw() + migrated_publishers += len(migrated_items) + logger.info( + f"Migrated {len(migrated_items)} publisher history records " + f"for journal {journal.id}" + ) + + # Check and migrate owner_history records with institution != None + if journal.owner_history.filter(institution__isnull=False).exists(): + migrated_items = journal.migrate_owner_history_to_raw() + migrated_owners += len(migrated_items) + logger.info( + f"Migrated {len(migrated_items)} owner history records " + f"for journal {journal.id}" + ) + + # Check and migrate sponsor_history records with institution != None + if journal.sponsor_history.filter(institution__isnull=False).exists(): + migrated_items = journal.migrate_sponsor_history_to_raw() + migrated_sponsors += len(migrated_items) + logger.info( + f"Migrated {len(migrated_items)} sponsor history records " + f"for journal {journal.id}" + ) + + # Check and migrate copyright_holder_history records with institution != None + if journal.copyright_holder_history.filter(institution__isnull=False).exists(): + migrated_items = journal.migrate_copyright_holder_history_to_raw() + migrated_copyright_holders += len(migrated_items) + logger.info( + f"Migrated {len(migrated_items)} copyright holder history records " + f"for journal {journal.id}" + ) + + total_journals += 1 + + except Exception as e: + error_count += 1 + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=e, + exc_traceback=exc_traceback, + detail={ + "task": "task_migrate_institution_history_to_raw_institution", + "journal_id": journal.id, + "journal_title": journal.title, + }, + ) + logger.error( + f"Error migrating history for journal {journal.id}: {e}" + ) + + result = { + "total_journals": total_journals, + "migrated_publishers": migrated_publishers, + "migrated_owners": migrated_owners, + "migrated_sponsors": migrated_sponsors, + "migrated_copyright_holders": migrated_copyright_holders, + "error_count": error_count, + } + + logger.info( + f"task_migrate_institution_history_to_raw_institution completed: {result}" + ) + + return result + + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=e, + exc_traceback=exc_traceback, + detail={ + "task": "task_migrate_institution_history_to_raw_institution", + "collection_acron_list": collection_acron_list, + "journal_issns": journal_issns, + "user_id": user_id, + "username": username, + "task_id": self.request.id if hasattr(self.request, 'id') else None, + }, + ) + + # Re-raise para que o Celery possa tratar a exceção adequadamente + raise From cbea6d933566cd2375c2b9d38fd9d1066196796b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 6 Feb 2026 11:54:07 +0000 Subject: [PATCH 3/6] Add test cases for task_migrate_institution_history_to_raw_institution Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- journal/tests.py | 258 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 258 insertions(+) diff --git a/journal/tests.py b/journal/tests.py index eee04c79..10cd145c 100755 --- a/journal/tests.py +++ b/journal/tests.py @@ -675,3 +675,261 @@ def test_migrate_history_with_partial_location_data(self): # Verify institution is None self.assertIsNone(publisher_history.institution) + + +class TaskMigrateInstitutionHistoryTestCase(TestCase): + """Test cases for task_migrate_institution_history_to_raw_institution task""" + + def setUp(self): + """Set up test fixtures""" + from institution.models import ( + Publisher, + Owner, + Sponsor, + CopyrightHolder, + Institution, + InstitutionIdentification, + ) + from location.models import Country, Location + + self.user = User.objects.create_user(username="testuser") + + # Create collection + self.collection = Collection.objects.create( + acron3="scl", + name="SciELO Brazil", + creator=self.user, + ) + + # Create journal + self.journal = Journal.objects.create( + title="Test Journal", + creator=self.user, + ) + + # Create SciELOJournal to link journal with collection + self.scielo_journal = SciELOJournal.objects.create( + journal=self.journal, + collection=self.collection, + issn_scielo="1234-5678", + journal_acron="testj", + creator=self.user, + ) + + # Create location and institution + self.country = Country.objects.create( + name="Brazil", + acron3="BRA", + ) + self.location = Location.objects.create( + country=self.country, + creator=self.user, + ) + + self.institution_id = InstitutionIdentification.objects.create( + name="Test University", + acronym="TU", + creator=self.user, + ) + + self.institution = Institution.objects.create( + institution_identification=self.institution_id, + location=self.location, + creator=self.user, + ) + + # Create Publisher, Owner, Sponsor, CopyrightHolder instances + self.publisher = Publisher.objects.create( + institution=self.institution, + creator=self.user, + ) + self.owner = Owner.objects.create( + institution=self.institution, + creator=self.user, + ) + self.sponsor = Sponsor.objects.create( + institution=self.institution, + creator=self.user, + ) + self.copyright_holder = CopyrightHolder.objects.create( + institution=self.institution, + creator=self.user, + ) + + def test_task_migrates_all_history_types(self): + """Test that task migrates all four history types""" + from journal.models import ( + PublisherHistory, + OwnerHistory, + SponsorHistory, + CopyrightHolderHistory, + ) + from journal.tasks import task_migrate_institution_history_to_raw_institution + + # Create history records with institution data + PublisherHistory.objects.create( + journal=self.journal, + institution=self.publisher, + creator=self.user, + ) + OwnerHistory.objects.create( + journal=self.journal, + institution=self.owner, + creator=self.user, + ) + SponsorHistory.objects.create( + journal=self.journal, + institution=self.sponsor, + creator=self.user, + ) + CopyrightHolderHistory.objects.create( + journal=self.journal, + institution=self.copyright_holder, + creator=self.user, + ) + + # Run the task + result = task_migrate_institution_history_to_raw_institution( + task_migrate_institution_history_to_raw_institution, + username="testuser", + collection_acron_list=["scl"], + ) + + # Verify all history records were migrated + self.assertEqual(result["total_journals"], 1) + self.assertEqual(result["migrated_publishers"], 1) + self.assertEqual(result["migrated_owners"], 1) + self.assertEqual(result["migrated_sponsors"], 1) + self.assertEqual(result["migrated_copyright_holders"], 1) + self.assertEqual(result["error_count"], 0) + + # Verify institution fields are None + self.assertIsNone(self.journal.publisher_history.first().institution) + self.assertIsNone(self.journal.owner_history.first().institution) + self.assertIsNone(self.journal.sponsor_history.first().institution) + self.assertIsNone(self.journal.copyright_holder_history.first().institution) + + # Verify raw fields are populated + self.assertEqual( + self.journal.publisher_history.first().raw_institution_name, + "Test University" + ) + + def test_task_filters_by_collection(self): + """Test that task filters journals by collection""" + from journal.models import PublisherHistory + from journal.tasks import task_migrate_institution_history_to_raw_institution + + # Create another collection and journal + collection2 = Collection.objects.create( + acron3="mex", + name="SciELO Mexico", + creator=self.user, + ) + journal2 = Journal.objects.create( + title="Another Journal", + creator=self.user, + ) + SciELOJournal.objects.create( + journal=journal2, + collection=collection2, + issn_scielo="8765-4321", + journal_acron="testj2", + creator=self.user, + ) + + # Create history for both journals + PublisherHistory.objects.create( + journal=self.journal, + institution=self.publisher, + creator=self.user, + ) + PublisherHistory.objects.create( + journal=journal2, + institution=self.publisher, + creator=self.user, + ) + + # Run task only for "scl" collection + result = task_migrate_institution_history_to_raw_institution( + task_migrate_institution_history_to_raw_institution, + username="testuser", + collection_acron_list=["scl"], + ) + + # Verify only one journal was processed + self.assertEqual(result["total_journals"], 1) + self.assertEqual(result["migrated_publishers"], 1) + + # Verify journal1 was migrated but journal2 was not + self.assertIsNone(self.journal.publisher_history.first().institution) + self.assertIsNotNone(journal2.publisher_history.first().institution) + + def test_task_filters_by_issn(self): + """Test that task filters journals by ISSN""" + from journal.models import PublisherHistory + from journal.tasks import task_migrate_institution_history_to_raw_institution + + # Create another journal in same collection + journal2 = Journal.objects.create( + title="Another Journal", + creator=self.user, + ) + SciELOJournal.objects.create( + journal=journal2, + collection=self.collection, + issn_scielo="8765-4321", + journal_acron="testj2", + creator=self.user, + ) + + # Create history for both journals + PublisherHistory.objects.create( + journal=self.journal, + institution=self.publisher, + creator=self.user, + ) + PublisherHistory.objects.create( + journal=journal2, + institution=self.publisher, + creator=self.user, + ) + + # Run task only for specific ISSN + result = task_migrate_institution_history_to_raw_institution( + task_migrate_institution_history_to_raw_institution, + username="testuser", + collection_acron_list=["scl"], + journal_issns=["1234-5678"], + ) + + # Verify only one journal was processed + self.assertEqual(result["total_journals"], 1) + self.assertEqual(result["migrated_publishers"], 1) + + # Verify journal1 was migrated but journal2 was not + self.assertIsNone(self.journal.publisher_history.first().institution) + self.assertIsNotNone(journal2.publisher_history.first().institution) + + def test_task_skips_history_without_institution(self): + """Test that task only migrates history records with institution != None""" + from journal.models import PublisherHistory + from journal.tasks import task_migrate_institution_history_to_raw_institution + + # Create history without institution + PublisherHistory.objects.create( + journal=self.journal, + institution=None, + creator=self.user, + ) + + # Run the task + result = task_migrate_institution_history_to_raw_institution( + task_migrate_institution_history_to_raw_institution, + username="testuser", + collection_acron_list=["scl"], + ) + + # Verify no records were migrated (since institution is None) + self.assertEqual(result["total_journals"], 1) + self.assertEqual(result["migrated_publishers"], 0) From e3c5e566d8ff17387c4cc90456fc122046ec69da Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 6 Feb 2026 11:55:52 +0000 Subject: [PATCH 4/6] Fix test calls to use MagicMock for Celery task self parameter Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- journal/tests.py | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/journal/tests.py b/journal/tests.py index 10cd145c..1e461815 100755 --- a/journal/tests.py +++ b/journal/tests.py @@ -1,5 +1,5 @@ import json -from unittest.mock import patch +from unittest.mock import patch, MagicMock from django.test import TestCase from django_test_migrations.migrator import Migrator @@ -24,6 +24,7 @@ child_load_license_of_use_in_journal, fetch_and_process_journal_logos_in_collection, load_license_of_use_in_journal, + task_migrate_institution_history_to_raw_institution, ) from journal.formats.articlemeta_format import get_articlemeta_format_title from thematic_areas.models import ThematicArea @@ -764,7 +765,6 @@ def test_task_migrates_all_history_types(self): SponsorHistory, CopyrightHolderHistory, ) - from journal.tasks import task_migrate_institution_history_to_raw_institution # Create history records with institution data PublisherHistory.objects.create( @@ -788,9 +788,13 @@ def test_task_migrates_all_history_types(self): creator=self.user, ) + # Create mock self with request attribute for Celery task + mock_self = MagicMock() + mock_self.request = MagicMock() + # Run the task result = task_migrate_institution_history_to_raw_institution( - task_migrate_institution_history_to_raw_institution, + mock_self, username="testuser", collection_acron_list=["scl"], ) @@ -808,6 +812,7 @@ def test_task_migrates_all_history_types(self): self.assertIsNone(self.journal.owner_history.first().institution) self.assertIsNone(self.journal.sponsor_history.first().institution) self.assertIsNone(self.journal.copyright_holder_history.first().institution) + self.assertIsNone(self.journal.copyright_holder_history.first().institution) # Verify raw fields are populated self.assertEqual( @@ -818,7 +823,6 @@ def test_task_migrates_all_history_types(self): def test_task_filters_by_collection(self): """Test that task filters journals by collection""" from journal.models import PublisherHistory - from journal.tasks import task_migrate_institution_history_to_raw_institution # Create another collection and journal collection2 = Collection.objects.create( @@ -850,9 +854,13 @@ def test_task_filters_by_collection(self): creator=self.user, ) + # Create mock self with request attribute for Celery task + mock_self = MagicMock() + mock_self.request = MagicMock() + # Run task only for "scl" collection result = task_migrate_institution_history_to_raw_institution( - task_migrate_institution_history_to_raw_institution, + mock_self, username="testuser", collection_acron_list=["scl"], ) @@ -868,7 +876,6 @@ def test_task_filters_by_collection(self): def test_task_filters_by_issn(self): """Test that task filters journals by ISSN""" from journal.models import PublisherHistory - from journal.tasks import task_migrate_institution_history_to_raw_institution # Create another journal in same collection journal2 = Journal.objects.create( @@ -895,9 +902,13 @@ def test_task_filters_by_issn(self): creator=self.user, ) + # Create mock self with request attribute for Celery task + mock_self = MagicMock() + mock_self.request = MagicMock() + # Run task only for specific ISSN result = task_migrate_institution_history_to_raw_institution( - task_migrate_institution_history_to_raw_institution, + mock_self, username="testuser", collection_acron_list=["scl"], journal_issns=["1234-5678"], @@ -914,7 +925,6 @@ def test_task_filters_by_issn(self): def test_task_skips_history_without_institution(self): """Test that task only migrates history records with institution != None""" from journal.models import PublisherHistory - from journal.tasks import task_migrate_institution_history_to_raw_institution # Create history without institution PublisherHistory.objects.create( @@ -923,9 +933,13 @@ def test_task_skips_history_without_institution(self): creator=self.user, ) + # Create mock self with request attribute for Celery task + mock_self = MagicMock() + mock_self.request = MagicMock() + # Run the task result = task_migrate_institution_history_to_raw_institution( - task_migrate_institution_history_to_raw_institution, + mock_self, username="testuser", collection_acron_list=["scl"], ) From 30980556d7c0724cfd6adb9d04376bea97f67763 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 6 Feb 2026 11:56:25 +0000 Subject: [PATCH 5/6] Remove duplicate assertion in test Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- journal/tests.py | 1 - 1 file changed, 1 deletion(-) diff --git a/journal/tests.py b/journal/tests.py index 1e461815..9280a5c5 100755 --- a/journal/tests.py +++ b/journal/tests.py @@ -812,7 +812,6 @@ def test_task_migrates_all_history_types(self): self.assertIsNone(self.journal.owner_history.first().institution) self.assertIsNone(self.journal.sponsor_history.first().institution) self.assertIsNone(self.journal.copyright_holder_history.first().institution) - self.assertIsNone(self.journal.copyright_holder_history.first().institution) # Verify raw fields are populated self.assertEqual( From 32c15f6a2d24300f9dca00922ea51136abd53cdd Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 6 Feb 2026 11:57:33 +0000 Subject: [PATCH 6/6] Fix comments to use correct variable names Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- journal/tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/journal/tests.py b/journal/tests.py index 9280a5c5..9b316255 100755 --- a/journal/tests.py +++ b/journal/tests.py @@ -868,7 +868,7 @@ def test_task_filters_by_collection(self): self.assertEqual(result["total_journals"], 1) self.assertEqual(result["migrated_publishers"], 1) - # Verify journal1 was migrated but journal2 was not + # Verify self.journal was migrated but journal2 was not self.assertIsNone(self.journal.publisher_history.first().institution) self.assertIsNotNone(journal2.publisher_history.first().institution) @@ -917,7 +917,7 @@ def test_task_filters_by_issn(self): self.assertEqual(result["total_journals"], 1) self.assertEqual(result["migrated_publishers"], 1) - # Verify journal1 was migrated but journal2 was not + # Verify self.journal was migrated but journal2 was not self.assertIsNone(self.journal.publisher_history.first().institution) self.assertIsNotNone(journal2.publisher_history.first().institution)