Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 145 additions & 1 deletion journal/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,4 +633,148 @@ def task_export_journal_to_articlemeta(
)

# Re-raise para que o Celery possa tratar a exceção adequadamente
raise
raise

@celery_app.task(bind=True, name="task_migrate_institution_history_to_raw_institution")
def task_migrate_institution_history_to_raw_institution(
self,
username=None,
user_id=None,
collection_acron_list=None,
journal_issns=None,
):
"""
Task to migrate Institution data to RawOrganization for a given list of collections.

This task migrates data from Institution to RawOrganization for PublisherHistory,
OwnerHistory, SponsorHistory, and CopyrightHolderHistory records that have
institution != None.

Args:
username: User name for authentication
user_id: User ID for authentication
collection_acron_list: List of collection acronyms to filter journals (e.g., ['scl', 'mex'])
journal_issns: Optional list of journal ISSNs to filter journals

Returns:
Dict with processing statistics including:
- total_journals: Number of journals processed
- migrated_publishers: Total publisher history records migrated
- migrated_owners: Total owner history records migrated
- migrated_sponsors: Total sponsor history records migrated
- migrated_copyright_holders: Total copyright holder history records migrated
- error_count: Number of errors encountered
"""
user = _get_user(self.request, username=username, user_id=user_id)
Copy link

Copilot AI Feb 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Variable user is not used.

Suggested change
user = _get_user(self.request, username=username, user_id=user_id)
_get_user(self.request, username=username, user_id=user_id)

Copilot uses AI. Check for mistakes.

try:
# Build query parameters
params = {}

# Filter by collection acronyms if provided
if collection_acron_list:
params["scielojournal__collection__acron3__in"] = collection_acron_list

# Filter by journal ISSNs if provided
if journal_issns:
params["scielojournal__issn_scielo__in"] = journal_issns

# Initialize counters
total_journals = 0
migrated_publishers = 0
migrated_owners = 0
migrated_sponsors = 0
migrated_copyright_holders = 0
error_count = 0

# Get journals matching the filters
journals = Journal.objects.filter(**params).distinct()

for journal in journals.iterator():
try:
# Check and migrate publisher_history records with institution != None
if journal.publisher_history.filter(institution__isnull=False).exists():
migrated_items = journal.migrate_publisher_history_to_raw()
migrated_publishers += len(migrated_items)
logger.info(
f"Migrated {len(migrated_items)} publisher history records "
f"for journal {journal.id}"
)

# Check and migrate owner_history records with institution != None
if journal.owner_history.filter(institution__isnull=False).exists():
migrated_items = journal.migrate_owner_history_to_raw()
migrated_owners += len(migrated_items)
logger.info(
f"Migrated {len(migrated_items)} owner history records "
f"for journal {journal.id}"
)

# Check and migrate sponsor_history records with institution != None
if journal.sponsor_history.filter(institution__isnull=False).exists():
migrated_items = journal.migrate_sponsor_history_to_raw()
migrated_sponsors += len(migrated_items)
logger.info(
f"Migrated {len(migrated_items)} sponsor history records "
f"for journal {journal.id}"
)

# Check and migrate copyright_holder_history records with institution != None
if journal.copyright_holder_history.filter(institution__isnull=False).exists():
migrated_items = journal.migrate_copyright_holder_history_to_raw()
migrated_copyright_holders += len(migrated_items)
logger.info(
f"Migrated {len(migrated_items)} copyright holder history records "
Comment on lines +696 to +727
Copy link

Copilot AI Feb 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The counters here use len(migrated_items), but Journal.migrate_*_history_to_raw() appends all history items (including those where institution is already null and therefore nothing is migrated). This will inflate the "migrated_*" stats whenever a journal has a mix of null/non-null institutions. Prefer counting the queryset that actually needs migration (e.g., publisher_history.filter(institution__isnull=False).count()) and increment by that value (and you can use that count to decide whether to call the migration method, avoiding the extra .exists() query too).

Suggested change
if journal.publisher_history.filter(institution__isnull=False).exists():
migrated_items = journal.migrate_publisher_history_to_raw()
migrated_publishers += len(migrated_items)
logger.info(
f"Migrated {len(migrated_items)} publisher history records "
f"for journal {journal.id}"
)
# Check and migrate owner_history records with institution != None
if journal.owner_history.filter(institution__isnull=False).exists():
migrated_items = journal.migrate_owner_history_to_raw()
migrated_owners += len(migrated_items)
logger.info(
f"Migrated {len(migrated_items)} owner history records "
f"for journal {journal.id}"
)
# Check and migrate sponsor_history records with institution != None
if journal.sponsor_history.filter(institution__isnull=False).exists():
migrated_items = journal.migrate_sponsor_history_to_raw()
migrated_sponsors += len(migrated_items)
logger.info(
f"Migrated {len(migrated_items)} sponsor history records "
f"for journal {journal.id}"
)
# Check and migrate copyright_holder_history records with institution != None
if journal.copyright_holder_history.filter(institution__isnull=False).exists():
migrated_items = journal.migrate_copyright_holder_history_to_raw()
migrated_copyright_holders += len(migrated_items)
logger.info(
f"Migrated {len(migrated_items)} copyright holder history records "
publisher_to_migrate = journal.publisher_history.filter(
institution__isnull=False
).count()
if publisher_to_migrate:
journal.migrate_publisher_history_to_raw()
migrated_publishers += publisher_to_migrate
logger.info(
f"Migrated {publisher_to_migrate} publisher history records "
f"for journal {journal.id}"
)
# Check and migrate owner_history records with institution != None
owner_to_migrate = journal.owner_history.filter(
institution__isnull=False
).count()
if owner_to_migrate:
journal.migrate_owner_history_to_raw()
migrated_owners += owner_to_migrate
logger.info(
f"Migrated {owner_to_migrate} owner history records "
f"for journal {journal.id}"
)
# Check and migrate sponsor_history records with institution != None
sponsor_to_migrate = journal.sponsor_history.filter(
institution__isnull=False
).count()
if sponsor_to_migrate:
journal.migrate_sponsor_history_to_raw()
migrated_sponsors += sponsor_to_migrate
logger.info(
f"Migrated {sponsor_to_migrate} sponsor history records "
f"for journal {journal.id}"
)
# Check and migrate copyright_holder_history records with institution != None
copyright_holder_to_migrate = journal.copyright_holder_history.filter(
institution__isnull=False
).count()
if copyright_holder_to_migrate:
journal.migrate_copyright_holder_history_to_raw()
migrated_copyright_holders += copyright_holder_to_migrate
logger.info(
f"Migrated {copyright_holder_to_migrate} copyright holder history records "

Copilot uses AI. Check for mistakes.
f"for journal {journal.id}"
)

total_journals += 1

except Exception as e:
error_count += 1
exc_type, exc_value, exc_traceback = sys.exc_info()
UnexpectedEvent.create(
exception=e,
exc_traceback=exc_traceback,
detail={
"task": "task_migrate_institution_history_to_raw_institution",
"journal_id": journal.id,
"journal_title": journal.title,
},
)
logger.error(
f"Error migrating history for journal {journal.id}: {e}"
)

result = {
"total_journals": total_journals,
"migrated_publishers": migrated_publishers,
"migrated_owners": migrated_owners,
"migrated_sponsors": migrated_sponsors,
"migrated_copyright_holders": migrated_copyright_holders,
"error_count": error_count,
}

logger.info(
f"task_migrate_institution_history_to_raw_institution completed: {result}"
)

return result

except Exception as e:
exc_type, exc_value, exc_traceback = sys.exc_info()
UnexpectedEvent.create(
exception=e,
exc_traceback=exc_traceback,
detail={
"task": "task_migrate_institution_history_to_raw_institution",
"collection_acron_list": collection_acron_list,
"journal_issns": journal_issns,
"user_id": user_id,
"username": username,
"task_id": self.request.id if hasattr(self.request, 'id') else None,
},
)

# Re-raise para que o Celery possa tratar a exceção adequadamente
raise
Loading
Loading