Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 25 additions & 11 deletions geonode/harvesting/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from django.db.models.functions import Concat
from django.utils import timezone
from django.conf import settings
from django.db import transaction
from django.db import transaction, IntegrityError

from geonode.resource.models import ExecutionRequest
from geonode.resource.enumerator import ExecutionRequestAction
Expand Down Expand Up @@ -741,16 +741,30 @@ def _update_harvestable_resources_batch(self, refresh_session_id: int, page: int
else:
processed = 0
for remote_resource in found_resources:
resource, created = models.HarvestableResource.objects.get_or_create(
harvester=harvester,
unique_identifier=remote_resource.unique_identifier,
title=remote_resource.title,
defaults={
"should_be_harvested": harvester.harvest_new_resources_by_default,
"remote_resource_type": remote_resource.resource_type,
"last_refreshed": timezone.now(),
},
)
try:
resource, created = models.HarvestableResource.objects.get_or_create(
harvester=harvester,
unique_identifier=remote_resource.unique_identifier,
defaults={
"title": remote_resource.title,
"should_be_harvested": harvester.harvest_new_resources_by_default,
"remote_resource_type": remote_resource.resource_type,
"last_refreshed": timezone.now(),
},
)
except IntegrityError:
# RACE CONDITION: Another worker created this between our SELECT and INSERT.
# We catch the error and simply fetch the one they created.
resource = models.HarvestableResource.objects.get(
harvester=harvester, unique_identifier=remote_resource.unique_identifier
)
created = False
# If the resource wasn't just created, check if the title changed
# (e.g. from 'copy title' to '28409') and update it.
if not created:
resource.title = remote_resource.title
resource.remote_resource_type = remote_resource.resource_type

processed += 1
# NOTE: make sure to save the resource because we need to have its
# `last_updated` property be refreshed - this is done in order to be able
Expand Down
39 changes: 39 additions & 0 deletions geonode/harvesting/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,45 @@ def test_update_harvestable_resources_sends_batched_requests(
# Assert chord was called with expiration
mock_chord.return_value.apply_async.assert_called_once_with(args=(), expires=123)

@mock.patch("geonode.harvesting.models.Harvester.get_harvester_worker")
def test_update_batch_corrects_title_mismatch(self, mock_get_worker):
"""
Verify that if a remote resource title changes, the existing
HarvestableResource is updated instead of causing an IntegrityError.
"""
# Setup the session to be "ON_GOING" so the task doesn't skip it
self.harvesting_session.status = models.AsynchronousHarvestingSession.STATUS_ON_GOING
self.harvesting_session.save()

# Pick one of the resources created in setUpTestData
# Let's say we target 'fake-identifier-0' which currently has 'fake-title-0'
target_uid = "fake-identifier-0"
new_remote_title = "COMPLETELY_NEW_TITLE_FROM_WMS"

# Mock the worker to return the resource with the new title
mock_remote_resource = mock.MagicMock()
mock_remote_resource.unique_identifier = target_uid
mock_remote_resource.title = new_remote_title
mock_remote_resource.resource_type = "fake-remote-resource-type"

mock_worker = mock.MagicMock()
mock_worker.list_resources.return_value = [mock_remote_resource]
mock_get_worker.return_value = mock_worker

# Run the batch task using the session ID from setUpTestData
# We process page 0 with page_size 1 to just handle our mocked resource
tasks._update_harvestable_resources_batch(self.harvesting_session.pk, page=0, page_size=1)

# ASSERTIONS
# Fetch the resource from the DB to see if it updated
resource = models.HarvestableResource.objects.get(unique_identifier=target_uid)

# Verify the title was updated correctly
self.assertEqual(resource.title, new_remote_title)

# Verify no new records were created (count should still be 3 from setUpTestData)
self.assertEqual(models.HarvestableResource.objects.count(), 3)

def test_harvesting_scheduler(self):
mock_harvester = mock.MagicMock(spec=models.Harvester).return_value
mock_harvester.scheduling_enabled = True
Expand Down