From b7ce91ef0ced785b848d2bad8ff89a2252c0c6e2 Mon Sep 17 00:00:00 2001 From: Minhajuddin Mohammed Date: Wed, 10 Dec 2025 11:18:19 -0500 Subject: [PATCH] fixed the auto view creation when datastore resubmit process runs nightly --- ckanext/datapusher_plus/config.py | 10 +++++ ckanext/datapusher_plus/logic/action.py | 57 ++++++++++++++++++++----- 2 files changed, 56 insertions(+), 11 deletions(-) diff --git a/ckanext/datapusher_plus/config.py b/ckanext/datapusher_plus/config.py index a8077bca..f97940eb 100644 --- a/ckanext/datapusher_plus/config.py +++ b/ckanext/datapusher_plus/config.py @@ -183,3 +183,13 @@ AUTO_CREATE_ZIP_MANIFEST = tk.asbool( tk.config.get("ckanext.datapusher_plus.auto_create_zip_manifest", True) ) + +# Time threshold (in seconds) for skipping view recreation on resubmission +# If a resource is older than this threshold when datapusher completes, +# views will NOT be automatically created (assumes they were intentionally deleted) +# Default is 300 seconds (5 minutes) - resources created more than 5 minutes ago +# will not have views auto-created on datastore resubmission +# Set to 0 to always create views (old behavior) +SKIP_VIEW_RECREATION_THRESHOLD = tk.asint( + tk.config.get("ckanext.datapusher_plus.skip_view_recreation_threshold", "300") +) diff --git a/ckanext/datapusher_plus/logic/action.py b/ckanext/datapusher_plus/logic/action.py index 6e5fdf7f..6d93c079 100644 --- a/ckanext/datapusher_plus/logic/action.py +++ b/ckanext/datapusher_plus/logic/action.py @@ -21,6 +21,7 @@ import ckanext.datapusher_plus.interfaces as interfaces import ckanext.datapusher_plus.jobs as jobs import ckanext.datapusher_plus.utils as utils +import ckanext.datapusher_plus.config as dpconfig from ckanext.datapusher_plus.model import get_job_details @@ -255,17 +256,51 @@ def datapusher_hook(context: Context, data_dict: dict[str, Any]): cast("dict[str, Any]", context), resource_dict, dataset_dict ) - try: - logic.get_action("resource_create_default_resource_views")( - context, - { - "resource": resource_dict, - "package": dataset_dict, - "create_datastore_views": True, - }, - ) - except Exception as e: - log.error("Error creating default views for resource %s: %s", res_id, e) + # Check if we should create default views based on resource age + # This prevents recreating views that were intentionally deleted + should_create_views = True + skip_threshold = dpconfig.SKIP_VIEW_RECREATION_THRESHOLD + + if skip_threshold > 0: + # Check if resource has a created timestamp + resource_created = resource_dict.get("created") + if resource_created: + try: + created_datetime = parse_date(resource_created) + current_datetime = datetime.datetime.utcnow() + # Make created_datetime timezone-naive if it has timezone info + if created_datetime.tzinfo is not None: + created_datetime = created_datetime.replace(tzinfo=None) + + age_seconds = (current_datetime - created_datetime).total_seconds() + + if age_seconds > skip_threshold: + should_create_views = False + log.info( + "Skipping view creation for resource %s: " + "Resource is %.1f seconds old (threshold: %d seconds). " + "Views were likely intentionally removed.", + res_id, age_seconds, skip_threshold + ) + except (ValueError, AttributeError) as e: + log.warning( + "Could not parse resource created timestamp for %s: %s. " + "Will create views anyway.", + res_id, e + ) + + if should_create_views: + try: + logic.get_action("resource_create_default_resource_views")( + context, + { + "resource": resource_dict, + "package": dataset_dict, + "create_datastore_views": True, + }, + ) + except Exception as e: + log.error("Error creating default views for resource %s: %s", res_id, e) # Check if the uploaded file has been modified in the meantime if resource_dict.get("last_modified") and metadata.get("task_created"):