From fafb37565d3eecc4f8f5b359c89be4c09f5647e4 Mon Sep 17 00:00:00 2001 From: Mattia Giupponi Date: Wed, 21 Jan 2026 16:51:34 +0100 Subject: [PATCH 01/13] First test tabular dataset --- geonode/upload/handlers/csv/handler.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/geonode/upload/handlers/csv/handler.py b/geonode/upload/handlers/csv/handler.py index 8170670e450..447ad9f123d 100644 --- a/geonode/upload/handlers/csv/handler.py +++ b/geonode/upload/handlers/csv/handler.py @@ -29,6 +29,7 @@ from dynamic_models.models import ModelSchema from geonode.upload.handlers.common.vector import BaseVectorFileHandler from geonode.upload.handlers.utils import GEOM_TYPE_MAPPING +from geonode.upload.orchestrator import orchestrator logger = logging.getLogger("importer") @@ -52,7 +53,7 @@ def supported_file_extension_config(self): { "label": "CSV", "required_ext": ["csv"], - "optional_ext": ["sld", "xml"], + "optional_ext": ["sld", "xml", "json"], } ], "actions": list(self.TASKS.keys()), @@ -116,6 +117,11 @@ def is_valid(files, user, **kwargs): ) if not geom_is_in_schema and not has_lat and not has_long: + if 'csv_file' in files and 'json_file' in files: + exec_obj = orchestrator.get_execution_object(kwargs.get("execution_id")) + exec_obj.input_params.update({"is_tabular": True}) + exec_obj.save() + return True raise InvalidCSVException(f"Not enough geometry field are set. The possibilities are: {','.join(fields)}") return True @@ -135,7 +141,7 @@ def create_ogr2ogr_command(files, original_name, ovverwrite_layer, alternate, ** f"{base_command} -oo KEEP_GEOM_COLUMNS=NO -lco GEOMETRY_NAME={BaseVectorFileHandler().default_geometry_column_name} " + additional_option ) - + def create_dynamic_model_fields( self, layer: str, @@ -147,10 +153,13 @@ def create_dynamic_model_fields( ): # retrieving the field schema from ogr2ogr and converting the type to Django Types layer_schema = [{"name": x.name.lower(), "class_name": self._get_type(x), "null": True} for x in layer.schema] + exec_obj = orchestrator.get_execution_object(execution_id) + if ( layer.GetGeometryColumn() or self.default_geometry_column_name and ogr.GeometryTypeToName(layer.GetGeomType()) not in ["Geometry Collection", "Unknown (any)"] + and not exec_obj.input_params.get("is_tabular") ): # the geometry colum is not returned rom the layer.schema, so we need to extract it manually # checking if the geometry has been wrogly read as string From 649b0ee5939c4077fa9bb62ed12f2a85944aa0c0 Mon Sep 17 00:00:00 2001 From: Mattia Giupponi Date: Wed, 21 Jan 2026 17:04:52 +0100 Subject: [PATCH 02/13] First test tabular dataset --- geonode/upload/handlers/csv/handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geonode/upload/handlers/csv/handler.py b/geonode/upload/handlers/csv/handler.py index 447ad9f123d..139fa414879 100644 --- a/geonode/upload/handlers/csv/handler.py +++ b/geonode/upload/handlers/csv/handler.py @@ -139,7 +139,7 @@ def create_ogr2ogr_command(files, original_name, ovverwrite_layer, alternate, ** additional_option = ' -oo "GEOM_POSSIBLE_NAMES=geom*,the_geom*,wkt_geom" -oo "X_POSSIBLE_NAMES=x,long*" -oo "Y_POSSIBLE_NAMES=y,lat*"' return ( f"{base_command} -oo KEEP_GEOM_COLUMNS=NO -lco GEOMETRY_NAME={BaseVectorFileHandler().default_geometry_column_name} " - + additional_option + + additional_option + " -oo AUTODETECT_TYPE=YES" ) def create_dynamic_model_fields( From eef368a92dd7498047c1ac8c746135dd22b9453c Mon Sep 17 00:00:00 2001 From: Mattia Giupponi Date: Wed, 21 Jan 2026 18:40:44 +0100 Subject: [PATCH 03/13] First test tabular dataset --- geonode/layers/api/serializers.py | 9 ++++++ geonode/layers/models.py | 4 +++ geonode/upload/handlers/csv/handler.py | 39 ++++++++++++++++++++------ 3 files changed, 44 insertions(+), 8 deletions(-) diff --git a/geonode/layers/api/serializers.py b/geonode/layers/api/serializers.py index 5a95554c978..2d11428679d 100644 --- a/geonode/layers/api/serializers.py +++ b/geonode/layers/api/serializers.py @@ -161,6 +161,8 @@ class DatasetSerializer(ResourceBaseSerializer): attribute_set = DynamicRelationField(AttributeSerializer, embed=True, many=True, read_only=True) featureinfo_custom_template = FeatureInfoTemplateField() + is_tabular = serializers.SerializerMethodField(read_only=True) + class Meta: model = Dataset name = "dataset" @@ -189,10 +191,17 @@ class Meta: "store", "subtype", "ptype", + "is_tabular", ) ) ) + def get_is_tabular(self, instance): + """ + Returns the permissions for the resource instance using Django cache. + """ + return instance.is_tabular + class DatasetListSerializer(DatasetSerializer): class Meta(DatasetSerializer.Meta): diff --git a/geonode/layers/models.py b/geonode/layers/models.py index f4f406dddd9..a5be49c4f65 100644 --- a/geonode/layers/models.py +++ b/geonode/layers/models.py @@ -164,6 +164,10 @@ def is_vector(self): def is_raster(self): return self.subtype == "raster" + @property + def is_tabular(self): + return self.subtype == "tabular" + @property def supports_time(self): valid_attributes = self.get_choices diff --git a/geonode/upload/handlers/csv/handler.py b/geonode/upload/handlers/csv/handler.py index 139fa414879..90eeaaf187d 100644 --- a/geonode/upload/handlers/csv/handler.py +++ b/geonode/upload/handlers/csv/handler.py @@ -17,7 +17,10 @@ # ######################################################################### import logging +import os +from geonode.geoserver.createlayer.utils import BBOX +from geonode.layers.models import Dataset from geonode.resource.enumerator import ExecutionRequestAction as exa from geonode.upload.api.exceptions import UploadParallelismLimitException from geonode.upload.utils import UploadLimitValidator @@ -30,6 +33,7 @@ from geonode.upload.handlers.common.vector import BaseVectorFileHandler from geonode.upload.handlers.utils import GEOM_TYPE_MAPPING from geonode.upload.orchestrator import orchestrator +from django.contrib.gis.geos import Polygon logger = logging.getLogger("importer") @@ -117,12 +121,10 @@ def is_valid(files, user, **kwargs): ) if not geom_is_in_schema and not has_lat and not has_long: - if 'csv_file' in files and 'json_file' in files: - exec_obj = orchestrator.get_execution_object(kwargs.get("execution_id")) - exec_obj.input_params.update({"is_tabular": True}) - exec_obj.save() - return True - raise InvalidCSVException(f"Not enough geometry field are set. The possibilities are: {','.join(fields)}") + exec_obj = orchestrator.get_execution_object(kwargs.get("execution_id")) + exec_obj.input_params.update({"is_tabular": True}) + exec_obj.save() + return True return True @@ -139,9 +141,10 @@ def create_ogr2ogr_command(files, original_name, ovverwrite_layer, alternate, ** additional_option = ' -oo "GEOM_POSSIBLE_NAMES=geom*,the_geom*,wkt_geom" -oo "X_POSSIBLE_NAMES=x,long*" -oo "Y_POSSIBLE_NAMES=y,lat*"' return ( f"{base_command} -oo KEEP_GEOM_COLUMNS=NO -lco GEOMETRY_NAME={BaseVectorFileHandler().default_geometry_column_name} " - + additional_option + " -oo AUTODETECT_TYPE=YES" + + additional_option + + " -oo AUTODETECT_TYPE=YES" ) - + def create_dynamic_model_fields( self, layer: str, @@ -227,3 +230,23 @@ def identify_authority(self, layer): return authority_code except Exception: return "EPSG:4326" + + def create_geonode_resource( + self, layer_name, alternate, execution_id, resource_type: Dataset = Dataset, asset=None + ): + res = super().create_geonode_resource(layer_name, alternate, execution_id, resource_type, asset) + res.set_bbox_polygon(BBOX) + return res + + def generate_resource_payload(self, layer_name, alternate, asset, _exec, workspace): + return dict( + name=alternate, + workspace=workspace, + store=os.environ.get("GEONODE_GEODATABASE", "geonode_data"), + subtype="tabular", + alternate=f"{workspace}:{alternate}", + dirty_state=True, + title=layer_name, + owner=_exec.user, + asset=asset, + ) From 3fa2c96cef8a6d4fd50dd831b70fcebf0fd6fd8e Mon Sep 17 00:00:00 2001 From: Mattia Giupponi Date: Thu, 22 Jan 2026 12:19:44 +0100 Subject: [PATCH 04/13] Switching strategy from ogr.open to gdal.Open --- .env_dev | 2 +- geonode/base/models.py | 21 +++++++++++++++++++-- geonode/geoserver/helpers.py | 8 +++----- geonode/geoserver/tasks.py | 3 +++ geonode/resource/manager.py | 2 +- geonode/upload/handlers/common/vector.py | 24 ++++++++++++++++++++---- geonode/upload/handlers/csv/handler.py | 15 ++++++++++----- 7 files changed, 57 insertions(+), 18 deletions(-) diff --git a/.env_dev b/.env_dev index 0cfa9dad6c7..77f98477b67 100644 --- a/.env_dev +++ b/.env_dev @@ -36,7 +36,7 @@ GEONODE_DB_CONN_TOUT=5 DEFAULT_BACKEND_DATASTORE=datastore BROKER_URL=redis://localhost:6379/0 CELERY_BEAT_SCHEDULER=celery.beat:PersistentScheduler -ASYNC_SIGNALS=False +ASYNC_SIGNALS=True # Harvesting Monitoring configuration HARVESTING_MONITOR_ENABLED=False diff --git a/geonode/base/models.py b/geonode/base/models.py index dd4ba42c4c5..1bc45e591ff 100644 --- a/geonode/base/models.py +++ b/geonode/base/models.py @@ -955,15 +955,23 @@ def raw_abstract(self): @property def can_be_downloaded(self): - return self.subtype in {"vector", "raster", "vector_time"} + return self.subtype in {"vector", "raster", "vector_time", "tabular"} @property def can_have_wfs_links(self): return self.subtype == "vector" + @property + def can_have_thumbnail(self): + return self.subtype != "tabular" + @property def can_have_wps_links(self): - return self.subtype in {"vector", "tileStore", "remote", "wmsStore", "vector_time"} + return self.subtype in {"vector", "tileStore", "remote", "wmsStore", "vector_time", "tabular"} + + @property + def should_create_style(self): + return self.subtype != "tabular" @property def can_have_style(self): @@ -989,6 +997,15 @@ def raw_data_quality_statement(self): def detail_url(self): return self.get_absolute_url() + def fixup_store_type(self, keys, values): + from geonode.geoserver.helpers import get_dataset_storetype + + if self.subtype == "tabular": + return self + for key in keys: + setattr(self, key, get_dataset_storetype(values[key])) + return self + def clean(self): if self.title: self.title = self.title.replace(",", "_") diff --git a/geonode/geoserver/helpers.py b/geonode/geoserver/helpers.py index da2b5eb4d4e..c1df92cb21a 100755 --- a/geonode/geoserver/helpers.py +++ b/geonode/geoserver/helpers.py @@ -99,6 +99,7 @@ ("application/wfs-collection-1.1", "vector"), ("application/zip", "vector"), ("text/csv", "vector"), + ("text/csv", "tabular"), ] DEFAULT_STYLE_NAME = ["generic", "line", "point", "polygon", "raster"] @@ -1945,10 +1946,7 @@ def sync_instance_with_geoserver(instance_id, *args, **kwargs): instance.gs_resource = gs_resource # Iterate over values from geoserver. - for key in ["alternate", "store", "subtype"]: - # attr_name = key if 'typename' not in key else 'alternate' - # print attr_name - setattr(instance, key, get_dataset_storetype(values[key])) + instance = instance.fixup_store_type(["alternate", "store", "subtype"], values) if updatemetadata: # Get metadata links @@ -2055,7 +2053,7 @@ def sync_instance_with_geoserver(instance_id, *args, **kwargs): # Refresh from DB instance.refresh_from_db() - if updatemetadata: + if updatemetadata and instance.should_create_style: # Save dataset styles logger.debug(f"... Refresh Legend links for Dataset {instance.title}") try: diff --git a/geonode/geoserver/tasks.py b/geonode/geoserver/tasks.py index 6cd6a4a31a6..e319f7819f9 100644 --- a/geonode/geoserver/tasks.py +++ b/geonode/geoserver/tasks.py @@ -154,6 +154,9 @@ def geoserver_create_style(self, instance_id, name, sld_file, tempdir): logger.debug(f"Dataset id {instance_id} does not exist yet!") raise + if not instance.should_create_style: + return + lock_id = f"{self.request.id}" if self.request.id else instance.name log_lock.debug(f"geoserver_create_style: Creating lock {lock_id} for {instance.name}") with AcquireLock(lock_id) as lock: diff --git a/geonode/resource/manager.py b/geonode/resource/manager.py index 04c860884df..e5405652a19 100644 --- a/geonode/resource/manager.py +++ b/geonode/resource/manager.py @@ -866,7 +866,7 @@ def set_thumbnail( thumbnail_algorithm=ThumbnailAlgorithms.fit, ) -> bool: _resource = instance or ResourceManager._get_instance(uuid) - if _resource: + if _resource and _resource.can_have_thumbnail: try: with transaction.atomic(): if thumbnail: diff --git a/geonode/upload/handlers/common/vector.py b/geonode/upload/handlers/common/vector.py index 007bfb54cc9..2b7150af707 100644 --- a/geonode/upload/handlers/common/vector.py +++ b/geonode/upload/handlers/common/vector.py @@ -78,7 +78,6 @@ from geonode.upload.registry import feature_validators_registry from django.core.exceptions import ValidationError - logger = logging.getLogger("importer") @@ -420,6 +419,8 @@ def extract_resource_to_publish(self, files, action, layer_name, alternate, **kw ] def identify_authority(self, layer): + if not isinstance(layer, ogr.Layer): + layer = layer.GetLayer() try: layer_wkt = layer.GetSpatialRef().ExportToWkt() _name = "EPSG" @@ -446,14 +447,17 @@ def get_ogr2ogr_driver(self): """ return None + def _gdal_open_options(self): + return {} + def import_resource(self, files: dict, execution_id: str, **kwargs) -> str: """ Main function to import the resource. Internally will call the steps required to import the data inside the geonode_data database """ - all_layers = self.get_ogr2ogr_driver().Open(files.get("base_file")) - layers = self._select_valid_layers(all_layers) + gdal_proxy = self.open_source_file(files) + layers = self._select_valid_layers(gdal_proxy) # for the moment we skip the dyanamic model creation layer_count = len(layers) logger.info(f"Total number of layers available: {layer_count}") @@ -473,7 +477,8 @@ def import_resource(self, files: dict, execution_id: str, **kwargs) -> str: # start looping on the layers available - for index, layer in enumerate(layers, start=1): + for index, gdal_layer in enumerate(layers, start=1): + layer = gdal_layer.GetLayer() layer_name = self.fixup_name(layer.GetName()) should_be_overwritten = _exec.input_params.get("overwrite_existing_layer") @@ -554,6 +559,17 @@ def import_resource(self, files: dict, execution_id: str, **kwargs) -> str: raise e return layer_names, alternates, execution_id + def open_source_file(self, files): + from osgeo import gdal + + gdal_proxy = gdal.OpenEx( + files.get("base_file"), + nOpenFlags=gdal.OF_VECTOR, + allowed_drivers=[self.get_ogr2ogr_driver().name], + **self._gdal_open_options(), + ) + return [gdal_proxy] + def _select_valid_layers(self, all_layers): layers = [] for layer in all_layers: diff --git a/geonode/upload/handlers/csv/handler.py b/geonode/upload/handlers/csv/handler.py index 90eeaaf187d..0c134e52823 100644 --- a/geonode/upload/handlers/csv/handler.py +++ b/geonode/upload/handlers/csv/handler.py @@ -33,7 +33,6 @@ from geonode.upload.handlers.common.vector import BaseVectorFileHandler from geonode.upload.handlers.utils import GEOM_TYPE_MAPPING from geonode.upload.orchestrator import orchestrator -from django.contrib.gis.geos import Polygon logger = logging.getLogger("importer") @@ -109,7 +108,6 @@ def is_valid(files, user, **kwargs): has_lat = any(x in CSVFileHandler().possible_lat_column for x in schema_keys) has_long = any(x in CSVFileHandler().possible_long_column for x in schema_keys) - fields = CSVFileHandler().possible_geometry_column_name + CSVFileHandler().possible_latlong_column if has_lat and not has_long: raise InvalidCSVException( f"Longitude is missing. Supported names: {', '.join(CSVFileHandler().possible_long_column)}" @@ -131,6 +129,9 @@ def is_valid(files, user, **kwargs): def get_ogr2ogr_driver(self): return ogr.GetDriverByName("CSV") + def _gdal_open_options(self): + return {"open_options": ["AUTODETECT_TYPE=YES"]} + @staticmethod def create_ogr2ogr_command(files, original_name, ovverwrite_layer, alternate, **kwargs): """ @@ -212,7 +213,7 @@ def extract_resource_to_publish(self, files, action, layer_name, alternate, **kw } ] - layers = self.get_ogr2ogr_driver().Open(files.get("base_file"), 0) + layers = self.open_source_file(files) if not layers: return [] return [ @@ -221,12 +222,16 @@ def extract_resource_to_publish(self, files, action, layer_name, alternate, **kw "crs": (self.identify_authority(_l)), } for _l in layers - if self.fixup_name(_l.GetName()) == layer_name + if self.fixup_name(_l.GetLayer().GetName()) == layer_name ] def identify_authority(self, layer): + if not isinstance(layer, ogr.Layer): + layer = layer.GetLayer() try: authority_code = super().identify_authority(layer=layer) + if authority_code == ogr.wkbNone: + logger.warning("For tabular CSV, we set by default EPSG:4326") return authority_code except Exception: return "EPSG:4326" @@ -235,7 +240,7 @@ def create_geonode_resource( self, layer_name, alternate, execution_id, resource_type: Dataset = Dataset, asset=None ): res = super().create_geonode_resource(layer_name, alternate, execution_id, resource_type, asset) - res.set_bbox_polygon(BBOX) + res.set_bbox_polygon(BBOX, res.srid) return res def generate_resource_payload(self, layer_name, alternate, asset, _exec, workspace): From 0d503cfabbafb987e4e72fe9ef8d1530f6c13991 Mon Sep 17 00:00:00 2001 From: Mattia Giupponi Date: Thu, 22 Jan 2026 15:02:19 +0100 Subject: [PATCH 05/13] merge with master --- geonode/base/models.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/geonode/base/models.py b/geonode/base/models.py index f2b80c1ea7e..dde30727f7f 100644 --- a/geonode/base/models.py +++ b/geonode/base/models.py @@ -961,10 +961,6 @@ def can_be_downloaded(self): def can_have_wfs_links(self): return self.subtype == "vector" - @property - def can_have_thumbnail(self): - return self.subtype != "tabular" - @property def can_have_wps_links(self): return self.subtype in {"vector", "tileStore", "remote", "wmsStore", "vector_time", "tabular"} @@ -979,7 +975,7 @@ def can_have_style(self): @property def can_have_thumbnail(self): - return True + return self.subtype != "tabular" @property def raw_purpose(self): From 5bf9829b6ba290c9310209a16a3b35dc3bc43f52 Mon Sep 17 00:00:00 2001 From: Mattia Giupponi Date: Thu, 22 Jan 2026 18:20:29 +0100 Subject: [PATCH 06/13] Add support for replace tabular data --- .env_dev | 2 +- geonode/layers/api/serializers.py | 6 --- geonode/upload/handlers/common/raster.py | 2 +- geonode/upload/handlers/common/vector.py | 21 +++++---- geonode/upload/handlers/csv/handler.py | 2 +- geonode/upload/handlers/csv/tests.py | 27 +++++++++--- geonode/upload/tests/end2end/test_end2end.py | 45 +++++++++++++++++++- 7 files changed, 80 insertions(+), 25 deletions(-) diff --git a/.env_dev b/.env_dev index 77f98477b67..0cfa9dad6c7 100644 --- a/.env_dev +++ b/.env_dev @@ -36,7 +36,7 @@ GEONODE_DB_CONN_TOUT=5 DEFAULT_BACKEND_DATASTORE=datastore BROKER_URL=redis://localhost:6379/0 CELERY_BEAT_SCHEDULER=celery.beat:PersistentScheduler -ASYNC_SIGNALS=True +ASYNC_SIGNALS=False # Harvesting Monitoring configuration HARVESTING_MONITOR_ENABLED=False diff --git a/geonode/layers/api/serializers.py b/geonode/layers/api/serializers.py index 2d11428679d..6b6cb405797 100644 --- a/geonode/layers/api/serializers.py +++ b/geonode/layers/api/serializers.py @@ -196,12 +196,6 @@ class Meta: ) ) - def get_is_tabular(self, instance): - """ - Returns the permissions for the resource instance using Django cache. - """ - return instance.is_tabular - class DatasetListSerializer(DatasetSerializer): class Meta(DatasetSerializer.Meta): diff --git a/geonode/upload/handlers/common/raster.py b/geonode/upload/handlers/common/raster.py index 6612a36295a..798bc378565 100644 --- a/geonode/upload/handlers/common/raster.py +++ b/geonode/upload/handlers/common/raster.py @@ -569,7 +569,7 @@ def _publish_resource_rollback(self, exec_id, instance_name=None, *args, **kwarg publisher = DataPublisher(handler_module_path=handler_module_path) publisher.delete_resource(instance_name) - def fixup_dynamic_model_fields(self, _exec, files): + def fixup_dynamic_model_fields(self, _exec, files, **kwargs): """ Raster dataset does not have the dynamic model, so this can be skept """ diff --git a/geonode/upload/handlers/common/vector.py b/geonode/upload/handlers/common/vector.py index 2b7150af707..59aab3e29ec 100644 --- a/geonode/upload/handlers/common/vector.py +++ b/geonode/upload/handlers/common/vector.py @@ -114,6 +114,8 @@ class BaseVectorFileHandler(BaseHandler): ira.UPSERT.value: ("start_import", "geonode.upload.upsert_data", "geonode.upload.refresh_geonode_resource"), } + SUBTYPE = "vector" + @property def have_table(self): return True @@ -584,14 +586,17 @@ def _select_valid_layers(self, all_layers): pass return layers + def can_overwrite(self, _exec_obj, dataset): + is_tabular = _exec_obj.input_params.get("is_tabular", None) + return dataset.is_vector() if not is_tabular else is_tabular + def find_alternate_by_dataset(self, _exec_obj, layer_name, should_be_overwritten): if _exec_obj.input_params.get("resource_pk"): dataset = Dataset.objects.filter(pk=_exec_obj.input_params.get("resource_pk")).first() if not dataset: raise ImportException("The dataset selected for the ovewrite does not exists") - if should_be_overwritten: - if not dataset.is_vector(): - raise Exception("Cannot override a raster dataset with a vector one") + if should_be_overwritten and not self.can_overwrite(_exec_obj, dataset): + raise Exception("Cannot override a raster dataset with a vector one") alternate = dataset.alternate.split(":") return alternate[-1] @@ -599,9 +604,8 @@ def find_alternate_by_dataset(self, _exec_obj, layer_name, should_be_overwritten dataset_available = Dataset.objects.filter(alternate__iexact=f"{workspace.name}:{layer_name}") dataset_exists = dataset_available.exists() - if should_be_overwritten: - if not dataset_available.is_vector(): - raise Exception("Cannot override a raster dataset with a vector one") + if should_be_overwritten and not self.can_overwrite(_exec_obj, dataset): + raise Exception("Cannot override a raster dataset with a vector one") if dataset_exists and should_be_overwritten: alternate = dataset_available.first().alternate.split(":")[-1] @@ -1180,8 +1184,7 @@ def __get_new_and_original_schema(self, files, execution_id): # Will generate the same schema as the target_resource_schema new_file_schema_fields = self.create_dynamic_model_fields( - layer, - return_celery_group=False, + layer, return_celery_group=False, execution_id=execution_id ) return target_schema_fields, new_file_schema_fields @@ -1532,7 +1535,7 @@ def refresh_geonode_resource(self, execution_id, asset=None, dataset=None, creat self.__fixup_primary_key(dataset) return dataset - def fixup_dynamic_model_fields(self, _exec, files): + def fixup_dynamic_model_fields(self, _exec, files, **kwargs): """ Utility needed during the replace workflow, it will sync all the FieldSchema along with the current resource uploaded. diff --git a/geonode/upload/handlers/csv/handler.py b/geonode/upload/handlers/csv/handler.py index 0c134e52823..4edfa07127d 100644 --- a/geonode/upload/handlers/csv/handler.py +++ b/geonode/upload/handlers/csv/handler.py @@ -248,7 +248,7 @@ def generate_resource_payload(self, layer_name, alternate, asset, _exec, workspa name=alternate, workspace=workspace, store=os.environ.get("GEONODE_GEODATABASE", "geonode_data"), - subtype="tabular", + subtype="tabular" if _exec.input_params.get("is_tabular") else "vector", alternate=f"{workspace}:{alternate}", dirty_state=True, title=layer_name, diff --git a/geonode/upload/handlers/csv/tests.py b/geonode/upload/handlers/csv/tests.py index 3ce2b48f586..b2ff4c18def 100644 --- a/geonode/upload/handlers/csv/tests.py +++ b/geonode/upload/handlers/csv/tests.py @@ -31,6 +31,8 @@ from geonode.upload.handlers.csv.handler import CSVFileHandler from osgeo import ogr +from geonode.upload.utils import ExecutionRequest + class TestCSVHandler(TestCase): databases = ("default", "datastore") @@ -78,12 +80,25 @@ def test_is_valid_should_raise_exception_if_the_csv_is_invalid(self): self.assertIsNotNone(_exc) self.assertTrue("The CSV provided is invalid, no layers found" in str(_exc.exception.detail)) - def test_is_valid_should_raise_exception_if_the_csv_missing_geom(self): - with self.assertRaises(InvalidCSVException) as _exc: - self.handler.is_valid(files={"base_file": self.missing_geom}, user=self.user) + def test_is_valid_should_set_as_tabular_if_the_csv_miss_geom(self): + exec_obj = ExecutionRequest.objects.create( + user=self.user, + func_name="test", + geonode_resource=self.layer, + input_params={ + "uuid": self.layer.uuid, + "owner": self.layer.owner.username, + "resource_type": self.layer.resource_type, + "defaults": f'{{"owner":"{self.layer.owner.username}"}}', + }, + ) + self.handler.is_valid( + files={"base_file": self.missing_geom}, user=self.user, execution_id=str(exec_obj.exec_id) + ) - self.assertIsNotNone(_exc) - self.assertTrue("Not enough geometry field are set" in str(_exc.exception.detail)) + exec_obj.refresh_from_db() + + self.assertTrue(exec_obj.input_params.get("is_tabular", False)) def test_is_valid_should_raise_exception_if_the_csv_missing_lat(self): with self.assertRaises(InvalidCSVException) as _exc: @@ -176,7 +191,7 @@ def test_import_with_ogr2ogr_without_errors_should_call_the_right_command(self, + '\' " "' + self.valid_csv + '" -lco FID=fid' - + ' -nln alternate "dataset" -oo KEEP_GEOM_COLUMNS=NO -lco GEOMETRY_NAME=geom -oo "GEOM_POSSIBLE_NAMES=geom*,the_geom*,wkt_geom" -oo "X_POSSIBLE_NAMES=x,long*" -oo "Y_POSSIBLE_NAMES=y,lat*"', # noqa + + ' -nln alternate "dataset" -oo KEEP_GEOM_COLUMNS=NO -lco GEOMETRY_NAME=geom -oo "GEOM_POSSIBLE_NAMES=geom*,the_geom*,wkt_geom" -oo "X_POSSIBLE_NAMES=x,long*" -oo "Y_POSSIBLE_NAMES=y,lat*" -oo AUTODETECT_TYPE=YES', # noqa stdout=-1, stderr=-1, shell=True, # noqa diff --git a/geonode/upload/tests/end2end/test_end2end.py b/geonode/upload/tests/end2end/test_end2end.py index 4447c306bdf..54a18cb3d06 100644 --- a/geonode/upload/tests/end2end/test_end2end.py +++ b/geonode/upload/tests/end2end/test_end2end.py @@ -67,6 +67,7 @@ def setUpClass(cls) -> None: cls.valid_tif = f"{project_dir}/tests/fixture/test_raster.tif" cls.valid_csv = f"{project_dir}/tests/fixture/valid.csv" + cls.missing_geom_csv = f"{project_dir}/tests/fixture/missing_geom.csv" cls.url = reverse("importer_upload") ogc_server_settings = OGC_Servers_Handler(settings.OGC_SERVER)["default"] @@ -297,7 +298,7 @@ def test_import_geojson_overwrite(self): class ImporterGCSVImportTest(BaseImporterEndToEndTest): @mock.patch.dict(os.environ, {"GEONODE_GEODATABASE": "test_geonode_data"}) @override_settings(GEODATABASE_URL=f"{geourl.split('/geonode_data')[0]}/test_geonode_data") - def test_import_geojson(self): + def test_import_csv(self): self._cleanup_layers(name="valid") payload = {"base_file": open(self.valid_csv, "rb"), "action": "upload"} @@ -321,6 +322,48 @@ def test_import_csv_overwrite(self): self._cleanup_layers(name="valid") +class ImporterGCSVTabularImportTest(BaseImporterEndToEndTest): + @mock.patch.dict(os.environ, {"GEONODE_GEODATABASE": "test_geonode_data"}) + @override_settings(GEODATABASE_URL=f"{geourl.split('/geonode_data')[0]}/test_geonode_data") + def test_import_tabular_csv(self): + self._cleanup_layers(name="missing_geom") + + payload = {"base_file": open(self.missing_geom_csv, "rb"), "action": "upload"} + initial_name = "missing_geom" + self._assertimport( + payload, + initial_name, + assert_payload={ + "subtype": "tabular", + "resource_type": "dataset", + }, + ) + self._cleanup_layers(name="missing_geom") + + @mock.patch.dict(os.environ, {"GEONODE_GEODATABASE": "test_geonode_data"}) + @override_settings(GEODATABASE_URL=f"{geourl.split('/geonode_data')[0]}/test_geonode_data") + def test_import_tabular_csv_overwrite(self): + self._cleanup_layers(name="missing_geom") + payload = {"base_file": open(self.missing_geom_csv, "rb"), "action": "upload"} + initial_name = "missing_geom" + prev_dataset = self._assertimport( + payload, + initial_name, + keep_resource=True, + assert_payload={ + "subtype": "tabular", + "resource_type": "dataset", + }, + ) + + payload = {"base_file": open(self.missing_geom_csv, "rb"), "action": "upload"} + initial_name = "missing_geom" + payload["overwrite_existing_layer"] = True + payload["resource_pk"] = prev_dataset.pk + self._assertimport(payload, initial_name, overwrite=True, last_update=prev_dataset.last_updated) + self._cleanup_layers(name="missing_geom") + + class ImporterKMLImportTest(BaseImporterEndToEndTest): @mock.patch.dict(os.environ, {"GEONODE_GEODATABASE": "test_geonode_data"}) @override_settings(GEODATABASE_URL=f"{geourl.split('/geonode_data')[0]}/test_geonode_data") From 9f6d1044c4be9899abd320f6bc465a6ea3b51fd9 Mon Sep 17 00:00:00 2001 From: Mattia Giupponi Date: Fri, 23 Jan 2026 11:14:18 +0100 Subject: [PATCH 07/13] Add support for replace, upsert for tabular data --- geonode/layers/api/serializers.py | 2 - geonode/upload/handlers/common/raster.py | 4 +- geonode/upload/handlers/common/vector.py | 58 +++++++++++++----------- geonode/upload/handlers/csv/handler.py | 10 ++-- 4 files changed, 39 insertions(+), 35 deletions(-) diff --git a/geonode/layers/api/serializers.py b/geonode/layers/api/serializers.py index 6b6cb405797..8eae5a79fd6 100644 --- a/geonode/layers/api/serializers.py +++ b/geonode/layers/api/serializers.py @@ -161,8 +161,6 @@ class DatasetSerializer(ResourceBaseSerializer): attribute_set = DynamicRelationField(AttributeSerializer, embed=True, many=True, read_only=True) featureinfo_custom_template = FeatureInfoTemplateField() - is_tabular = serializers.SerializerMethodField(read_only=True) - class Meta: model = Dataset name = "dataset" diff --git a/geonode/upload/handlers/common/raster.py b/geonode/upload/handlers/common/raster.py index 798bc378565..9094fefe71e 100644 --- a/geonode/upload/handlers/common/raster.py +++ b/geonode/upload/handlers/common/raster.py @@ -240,9 +240,7 @@ def extract_resource_to_publish(self, files, action, layer_name, alternate, **kw return [ { "name": alternate, - "crs": ResourceBase.objects.filter( - Q(alternate__icontains=layer_name) | Q(title__icontains=layer_name) - ) + "crs": ResourceBase.objects.filter(alternate=kwargs.get("original_dataset_alternate")) .first() .srid, "raster_path": raster_path, diff --git a/geonode/upload/handlers/common/vector.py b/geonode/upload/handlers/common/vector.py index 59aab3e29ec..f5a0137bb9b 100644 --- a/geonode/upload/handlers/common/vector.py +++ b/geonode/upload/handlers/common/vector.py @@ -114,8 +114,6 @@ class BaseVectorFileHandler(BaseHandler): ira.UPSERT.value: ("start_import", "geonode.upload.upsert_data", "geonode.upload.refresh_geonode_resource"), } - SUBTYPE = "vector" - @property def have_table(self): return True @@ -400,15 +398,13 @@ def extract_resource_to_publish(self, files, action, layer_name, alternate, **kw return [ { "name": alternate, - "crs": ResourceBase.objects.filter( - Q(alternate__icontains=layer_name) | Q(title__icontains=layer_name) - ) + "crs": ResourceBase.objects.filter(alternate=kwargs.get("original_dataset_alternate")) .first() .srid, } ] - layers = self.get_ogr2ogr_driver().Open(files.get("base_file")) + layers = self.open_source_file(files) if not layers: return [] return [ @@ -421,8 +417,7 @@ def extract_resource_to_publish(self, files, action, layer_name, alternate, **kw ] def identify_authority(self, layer): - if not isinstance(layer, ogr.Layer): - layer = layer.GetLayer() + layer = self._extract_layer(layer) try: layer_wkt = layer.GetSpatialRef().ExportToWkt() _name = "EPSG" @@ -480,7 +475,7 @@ def import_resource(self, files: dict, execution_id: str, **kwargs) -> str: # start looping on the layers available for index, gdal_layer in enumerate(layers, start=1): - layer = gdal_layer.GetLayer() + layer = self._extract_layer(gdal_layer) layer_name = self.fixup_name(layer.GetName()) should_be_overwritten = _exec.input_params.get("overwrite_existing_layer") @@ -576,6 +571,7 @@ def _select_valid_layers(self, all_layers): layers = [] for layer in all_layers: try: + layer = self._extract_layer(layer) self.identify_authority(layer) layers.append(layer) except Exception as e: @@ -589,7 +585,7 @@ def _select_valid_layers(self, all_layers): def can_overwrite(self, _exec_obj, dataset): is_tabular = _exec_obj.input_params.get("is_tabular", None) return dataset.is_vector() if not is_tabular else is_tabular - + def find_alternate_by_dataset(self, _exec_obj, layer_name, should_be_overwritten): if _exec_obj.input_params.get("resource_pk"): dataset = Dataset.objects.filter(pk=_exec_obj.input_params.get("resource_pk")).first() @@ -632,7 +628,7 @@ def setup_dynamic_model( - celery_group -> the celery group of the field creation """ - layer_name = self.fixup_name(layer.GetName() if isinstance(layer, ogr.Layer) else layer) + layer_name = self.fixup_name(self._extract_layer(layer).GetName()) _exec_obj = orchestrator.get_execution_object(execution_id) is_dynamic_model_managed = _exec_obj.input_params.get("is_dynamic_model_managed", False) @@ -1141,7 +1137,7 @@ def upsert_validation(self, files, execution_id, **kwargs: dict) -> Tuple[bool, return True, None except Exception as e: - all_layers = self.get_ogr2ogr_driver().Open(files.get("base_file")) + all_layers = self.open_source_file(files) if layers := self._select_valid_layers(all_layers): _errors = e.args[0] if isinstance(e, UpsertException) else [str(e)] if isinstance(_errors, str): @@ -1153,6 +1149,11 @@ def upsert_validation(self, files, execution_id, **kwargs: dict) -> Tuple[bool, "User does not have enough permissions to perform this action on the selected resource" ) + def _extract_layer(self, layer): + if not isinstance(layer, ogr.Layer): + layer = layer.GetLayer() + return layer + def __get_new_and_original_schema(self, files, execution_id): # check if the execution_id is passed and if the geonode resource exists exec_id = orchestrator.get_execution_object(execution_id) @@ -1165,19 +1166,23 @@ def __get_new_and_original_schema(self, files, execution_id): target_schema_fields = FieldSchema.objects.filter(model_schema__name=target_resource.alternate.split(":")[-1]) # use ogr2ogr to read the uploaded files for the upsert - all_layers = self.get_ogr2ogr_driver().Open(files.get("base_file")) + all_layers = self.open_source_file(files) layers = self._select_valid_layers(all_layers) if not layers: raise UpsertException("No valid layers found in the provided file for upsert.") - layer = layers[0] + layer = self._extract_layer(layers[0]) # evaluate if some of the fid entry is null. if is null we stop the workflow # the user should provide the completed list with the fid set - sql_query = f'SELECT * FROM "{layer.GetName()}" WHERE "{DEFAULT_PK_COLUMN_NAME}" IS NULL' + sql_query = f'SELECT * FROM "{layer.GetName()}" WHERE "{DEFAULT_PK_COLUMN_NAME.lower()}" IS NULL' - # Execute the SQL query to the layer - result = all_layers.ExecuteSQL(sql_query) - if not result or (result and result.GetFeatureCount() > 0): + # Execute the SQL query to the layer via the gdal proxy object + result = all_layers[0].ExecuteSQL(sql_query) + if ( + not (result and DEFAULT_PK_COLUMN_NAME in (x.name.lower() for x in result.schema)) + or (result and result.GetFeatureCount() > 0) + or not result + ): raise UpsertException( f"All the feature in the file must have the fid field correctly populated. Number of None value: {result.GetFeatureCount() if result else 'all'}" ) @@ -1255,7 +1260,7 @@ def upsert_data(self, files, execution_id, **kwargs): # if for any reason the key is not present, better to raise an error raise UpsertException("Was not possible to find the upsert key, upsert is aborted") # use ogr2ogr to read the uploaded files values for the upsert - all_layers = self.get_ogr2ogr_driver().Open(files.get("base_file")) + all_layers = self.open_source_file(files) valid_create = 0 valid_update = 0 layers = self._select_valid_layers(all_layers) @@ -1360,8 +1365,8 @@ def _create_error_log(self, exec_obj, layers, errors): "Error found during the upsert process, no update/create will be perfomed. The error log is going to be created..." ) errors_to_print = errors[: settings.UPSERT_LIMIT_ERROR_LOG] - - log_name = f'error_{layers[0].GetName()}_{datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.csv' + layer = self._extract_layer(layers[0]) + log_name = f'error_{layer.GetName()}_{datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.csv' with tempfile.TemporaryDirectory() as temp_dir_str: temp_dir = Path(temp_dir_str) @@ -1441,11 +1446,12 @@ def _save_feature(self, data_chunk, model_obj, model_instance, upsert_key, valid # need to simulate the "promote to multi" used by the upload process. # here we cannot rely on ogr2ogr so we need to do it manually geom = feature.GetGeometryRef() - code = geom.GetSpatialReference().GetAuthorityCode(None) - feature_as_dict.update( - {self.default_geometry_column_name: f"SRID={code};{self.promote_geom_to_multi(geom).ExportToWkt()}"} - ) - to_process.append(feature_as_dict) + if geom: + code = geom.GetSpatialReference().GetAuthorityCode(None) + feature_as_dict.update( + {self.default_geometry_column_name: f"SRID={code};{self.promote_geom_to_multi(geom).ExportToWkt()}"} + ) + to_process.append(feature_as_dict) for feature_as_dict in to_process: if feature_as_dict.get(upsert_key) in value_in_db: diff --git a/geonode/upload/handlers/csv/handler.py b/geonode/upload/handlers/csv/handler.py index 4edfa07127d..0149f919d91 100644 --- a/geonode/upload/handlers/csv/handler.py +++ b/geonode/upload/handlers/csv/handler.py @@ -33,6 +33,7 @@ from geonode.upload.handlers.common.vector import BaseVectorFileHandler from geonode.upload.handlers.utils import GEOM_TYPE_MAPPING from geonode.upload.orchestrator import orchestrator +from django.db.models import Q logger = logging.getLogger("importer") @@ -205,11 +206,13 @@ def create_dynamic_model_fields( return dynamic_model_schema, celery_group def extract_resource_to_publish(self, files, action, layer_name, alternate, **kwargs): - if action == exa.COPY.value: + if action == exa.COPY.value: return [ { "name": alternate, - "crs": ResourceBase.objects.filter(alternate__istartswith=layer_name).first().srid, + "crs": ResourceBase.objects.filter(alternate=kwargs.get("original_dataset_alternate")) + .first() + .srid, } ] @@ -226,8 +229,7 @@ def extract_resource_to_publish(self, files, action, layer_name, alternate, **kw ] def identify_authority(self, layer): - if not isinstance(layer, ogr.Layer): - layer = layer.GetLayer() + layer = self._extract_layer(layer) try: authority_code = super().identify_authority(layer=layer) if authority_code == ogr.wkbNone: From 6ae7a650f8931f8b461e9fdc8b6831236919fbf4 Mon Sep 17 00:00:00 2001 From: Mattia Giupponi Date: Fri, 23 Jan 2026 12:03:42 +0100 Subject: [PATCH 08/13] Add support for copy for tabular dataset --- geonode/upload/handlers/README.md | 2 +- geonode/upload/handlers/common/raster.py | 12 ++--------- geonode/upload/handlers/common/remote.py | 1 + geonode/upload/handlers/common/vector.py | 25 +++++++++++----------- geonode/upload/handlers/csv/handler.py | 16 ++++++-------- geonode/upload/handlers/remote/tiles3d.py | 3 ++- geonode/upload/handlers/remote/wms.py | 9 ++------ geonode/upload/handlers/tiles3d/handler.py | 9 ++------ 8 files changed, 29 insertions(+), 48 deletions(-) diff --git a/geonode/upload/handlers/README.md b/geonode/upload/handlers/README.md index 7d67ca55cb3..5c1416d3d4b 100644 --- a/geonode/upload/handlers/README.md +++ b/geonode/upload/handlers/README.md @@ -149,7 +149,7 @@ class BaseVectorFileHandler(BaseHandler): return def create_geonode_resource( - self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = Dataset, files=None + self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = Dataset, files=None, **kwargs ): """ Base function to create the resource into geonode. Each handler can specify diff --git a/geonode/upload/handlers/common/raster.py b/geonode/upload/handlers/common/raster.py index 9094fefe71e..fcd9d17abea 100644 --- a/geonode/upload/handlers/common/raster.py +++ b/geonode/upload/handlers/common/raster.py @@ -26,7 +26,6 @@ from typing import List from django.conf import settings -from django.db.models import Q from geonode.base.models import ResourceBase from geonode.layers.models import Dataset from geonode.resource.enumerator import ExecutionRequestAction as exa @@ -240,9 +239,7 @@ def extract_resource_to_publish(self, files, action, layer_name, alternate, **kw return [ { "name": alternate, - "crs": ResourceBase.objects.filter(alternate=kwargs.get("original_dataset_alternate")) - .first() - .srid, + "crs": ResourceBase.objects.filter(alternate=kwargs.get("original_dataset_alternate")).first().srid, "raster_path": raster_path, } ] @@ -349,12 +346,7 @@ def import_resource(self, files: dict, execution_id: str, **kwargs) -> str: return def create_geonode_resource( - self, - layer_name: str, - alternate: str, - execution_id: str, - resource_type: Dataset = Dataset, - asset=None, + self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = Dataset, asset=None, **kwargs ): """ Base function to create the resource into geonode. Each handler can specify diff --git a/geonode/upload/handlers/common/remote.py b/geonode/upload/handlers/common/remote.py index 36699f20bb1..98f3515ba5f 100755 --- a/geonode/upload/handlers/common/remote.py +++ b/geonode/upload/handlers/common/remote.py @@ -204,6 +204,7 @@ def create_geonode_resource( execution_id: str, resource_type: ResourceBase = ResourceBase, asset=None, + **kwargs, ): """ Creating geonode base resource diff --git a/geonode/upload/handlers/common/vector.py b/geonode/upload/handlers/common/vector.py index f5a0137bb9b..409ae7519ed 100644 --- a/geonode/upload/handlers/common/vector.py +++ b/geonode/upload/handlers/common/vector.py @@ -398,9 +398,7 @@ def extract_resource_to_publish(self, files, action, layer_name, alternate, **kw return [ { "name": alternate, - "crs": ResourceBase.objects.filter(alternate=kwargs.get("original_dataset_alternate")) - .first() - .srid, + "crs": ResourceBase.objects.filter(alternate=kwargs.get("original_dataset_alternate")).first().srid, } ] @@ -794,12 +792,7 @@ def promote_geom_to_multi(self, geom): return geom def create_geonode_resource( - self, - layer_name: str, - alternate: str, - execution_id: str, - resource_type: Dataset = Dataset, - asset=None, + self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = Dataset, asset=None, **kwargs ): """ Base function to create the resource into geonode. Each handler can specify @@ -826,7 +819,7 @@ def create_geonode_resource( saved_dataset = resource_manager.create( None, resource_type=resource_type, - defaults=self.generate_resource_payload(layer_name, alternate, asset, _exec, workspace), + defaults=self.generate_resource_payload(layer_name, alternate, asset, _exec, workspace, **kwargs), ) saved_dataset.refresh_from_db() @@ -843,12 +836,12 @@ def create_geonode_resource( return saved_dataset - def generate_resource_payload(self, layer_name, alternate, asset, _exec, workspace): + def generate_resource_payload(self, layer_name, alternate, asset, _exec, workspace, **kwargs): return dict( name=alternate, workspace=workspace, store=os.environ.get("GEONODE_GEODATABASE", "geonode_data"), - subtype="vector", + subtype=kwargs.pop("subtype", None) or "vector", alternate=f"{workspace}:{alternate}", dirty_state=True, title=layer_name, @@ -956,11 +949,17 @@ def copy_geonode_resource( new_alternate: str, **kwargs, ): - + subtype = None + previous_resource = ResourceBase.objects.filter( + alternate__contains=kwargs.get("kwargs", {}).get("original_dataset_alternate") + ).first() + if previous_resource: + subtype = previous_resource.subtype new_resource = self.create_geonode_resource( layer_name=data_to_update.get("title"), alternate=new_alternate, execution_id=str(_exec.exec_id), + subtype=subtype, ) copy_assets_and_links(resource, target=new_resource) diff --git a/geonode/upload/handlers/csv/handler.py b/geonode/upload/handlers/csv/handler.py index 0149f919d91..21bfb0536a2 100644 --- a/geonode/upload/handlers/csv/handler.py +++ b/geonode/upload/handlers/csv/handler.py @@ -33,7 +33,6 @@ from geonode.upload.handlers.common.vector import BaseVectorFileHandler from geonode.upload.handlers.utils import GEOM_TYPE_MAPPING from geonode.upload.orchestrator import orchestrator -from django.db.models import Q logger = logging.getLogger("importer") @@ -206,13 +205,11 @@ def create_dynamic_model_fields( return dynamic_model_schema, celery_group def extract_resource_to_publish(self, files, action, layer_name, alternate, **kwargs): - if action == exa.COPY.value: + if action == exa.COPY.value: return [ { "name": alternate, - "crs": ResourceBase.objects.filter(alternate=kwargs.get("original_dataset_alternate")) - .first() - .srid, + "crs": ResourceBase.objects.filter(alternate=kwargs.get("original_dataset_alternate")).first().srid, } ] @@ -239,18 +236,19 @@ def identify_authority(self, layer): return "EPSG:4326" def create_geonode_resource( - self, layer_name, alternate, execution_id, resource_type: Dataset = Dataset, asset=None + self, layer_name, alternate, execution_id, resource_type: Dataset = Dataset, asset=None, **kwargs ): - res = super().create_geonode_resource(layer_name, alternate, execution_id, resource_type, asset) + res = super().create_geonode_resource(layer_name, alternate, execution_id, resource_type, asset, **kwargs) res.set_bbox_polygon(BBOX, res.srid) return res - def generate_resource_payload(self, layer_name, alternate, asset, _exec, workspace): + def generate_resource_payload(self, layer_name, alternate, asset, _exec, workspace, **kwargs): + subtype = kwargs.pop("subtype", None) return dict( name=alternate, workspace=workspace, store=os.environ.get("GEONODE_GEODATABASE", "geonode_data"), - subtype="tabular" if _exec.input_params.get("is_tabular") else "vector", + subtype=subtype or ("tabular" if _exec.input_params.get("is_tabular") else "vector"), alternate=f"{workspace}:{alternate}", dirty_state=True, title=layer_name, diff --git a/geonode/upload/handlers/remote/tiles3d.py b/geonode/upload/handlers/remote/tiles3d.py index 01f14c7f037..586218c7426 100644 --- a/geonode/upload/handlers/remote/tiles3d.py +++ b/geonode/upload/handlers/remote/tiles3d.py @@ -85,8 +85,9 @@ def create_geonode_resource( execution_id: str, resource_type: Dataset = ResourceBase, asset=None, + **kwargs, ): - resource = super().create_geonode_resource(layer_name, alternate, execution_id, resource_type, asset) + resource = super().create_geonode_resource(layer_name, alternate, execution_id, resource_type, asset, **kwargs) _exec = orchestrator.get_execution_object(exec_id=execution_id) try: js_file = requests.get(_exec.input_params.get("url"), timeout=10).json() diff --git a/geonode/upload/handlers/remote/wms.py b/geonode/upload/handlers/remote/wms.py index 69bb444992a..aec93994090 100644 --- a/geonode/upload/handlers/remote/wms.py +++ b/geonode/upload/handlers/remote/wms.py @@ -120,18 +120,13 @@ def generate_alternate( return layer_name, payload_alternate def create_geonode_resource( - self, - layer_name: str, - alternate: str, - execution_id: str, - resource_type: Dataset = ..., - asset=None, + self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = ..., asset=None, **kwargs ): """ Use the default RemoteResourceHandler to create the geonode resource after that, we assign the bbox and re-generate the thumbnail """ - resource = super().create_geonode_resource(layer_name, alternate, execution_id, Dataset, asset) + resource = super().create_geonode_resource(layer_name, alternate, execution_id, Dataset, asset, **kwargs) _exec = orchestrator.get_execution_object(execution_id) remote_bbox = _exec.input_params.get("bbox") if remote_bbox: diff --git a/geonode/upload/handlers/tiles3d/handler.py b/geonode/upload/handlers/tiles3d/handler.py index 3affb738f01..7de235b8e7c 100755 --- a/geonode/upload/handlers/tiles3d/handler.py +++ b/geonode/upload/handlers/tiles3d/handler.py @@ -240,16 +240,11 @@ def pre_processing(self, files, execution_id, **kwargs): return _data, execution_id def create_geonode_resource( - self, - layer_name: str, - alternate: str, - execution_id: str, - resource_type: Dataset = ..., - asset=None, + self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = ..., asset=None, **kwargs ): exec_obj = orchestrator.get_execution_object(execution_id) - resource = super().create_geonode_resource(layer_name, alternate, execution_id, ResourceBase, asset) + resource = super().create_geonode_resource(layer_name, alternate, execution_id, ResourceBase, asset, **kwargs) asset = self.create_asset_and_link( resource, files=exec_obj.input_params["files"], From f90e202edeb27e2785dd6ddc8c84604e9ee6c91e Mon Sep 17 00:00:00 2001 From: Mattia Giupponi Date: Fri, 23 Jan 2026 12:27:40 +0100 Subject: [PATCH 09/13] fix tests --- geonode/upload/handlers/common/vector.py | 27 ++++++++++++------------ geonode/upload/handlers/csv/handler.py | 2 +- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/geonode/upload/handlers/common/vector.py b/geonode/upload/handlers/common/vector.py index 409ae7519ed..15553108ed5 100644 --- a/geonode/upload/handlers/common/vector.py +++ b/geonode/upload/handlers/common/vector.py @@ -410,7 +410,7 @@ def extract_resource_to_publish(self, files, action, layer_name, alternate, **kw "name": alternate or layer_name, "crs": self.identify_authority(_l) if _l.GetSpatialRef() else None, } - for _l in layers + for _l in (self._extract_layer(_l) for _l in layers) if self.fixup_name(_l.GetName()) == layer_name ] @@ -1173,18 +1173,19 @@ def __get_new_and_original_schema(self, files, execution_id): layer = self._extract_layer(layers[0]) # evaluate if some of the fid entry is null. if is null we stop the workflow # the user should provide the completed list with the fid set - sql_query = f'SELECT * FROM "{layer.GetName()}" WHERE "{DEFAULT_PK_COLUMN_NAME.lower()}" IS NULL' - - # Execute the SQL query to the layer via the gdal proxy object - result = all_layers[0].ExecuteSQL(sql_query) - if ( - not (result and DEFAULT_PK_COLUMN_NAME in (x.name.lower() for x in result.schema)) - or (result and result.GetFeatureCount() > 0) - or not result - ): - raise UpsertException( - f"All the feature in the file must have the fid field correctly populated. Number of None value: {result.GetFeatureCount() if result else 'all'}" - ) + if exec_id.action == ira.UPSERT.value: + sql_query = f'SELECT * FROM "{layer.GetName()}" WHERE "{DEFAULT_PK_COLUMN_NAME.lower()}" IS NULL' + + # Execute the SQL query to the layer via the gdal proxy object + result = all_layers[0].ExecuteSQL(sql_query) + if ( + not (result and DEFAULT_PK_COLUMN_NAME in (x.name.lower() for x in result.schema)) + or (result and result.GetFeatureCount() > 0) + or not result + ): + raise UpsertException( + f"All the feature in the file must have the fid field correctly populated. Number of None value: {result.GetFeatureCount() if result else 'all'}" + ) # Will generate the same schema as the target_resource_schema new_file_schema_fields = self.create_dynamic_model_fields( diff --git a/geonode/upload/handlers/csv/handler.py b/geonode/upload/handlers/csv/handler.py index 21bfb0536a2..e8db0efab3e 100644 --- a/geonode/upload/handlers/csv/handler.py +++ b/geonode/upload/handlers/csv/handler.py @@ -222,7 +222,7 @@ def extract_resource_to_publish(self, files, action, layer_name, alternate, **kw "crs": (self.identify_authority(_l)), } for _l in layers - if self.fixup_name(_l.GetLayer().GetName()) == layer_name + if self.fixup_name(self._extract_layer(_l).GetName()) == layer_name ] def identify_authority(self, layer): From 6b78d5a3e1d65f354675f0188b261cc0c10359ed Mon Sep 17 00:00:00 2001 From: Mattia Giupponi Date: Fri, 23 Jan 2026 13:14:56 +0100 Subject: [PATCH 10/13] add docstring --- geonode/upload/handlers/common/vector.py | 6 ++++++ geonode/upload/handlers/csv/handler.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/geonode/upload/handlers/common/vector.py b/geonode/upload/handlers/common/vector.py index 15553108ed5..69200072a50 100644 --- a/geonode/upload/handlers/common/vector.py +++ b/geonode/upload/handlers/common/vector.py @@ -555,6 +555,12 @@ def import_resource(self, files: dict, execution_id: str, **kwargs) -> str: return layer_names, alternates, execution_id def open_source_file(self, files): + """ + The importer switched from ogr to gdal library + This is required so we can rely on the options in GDAL, + while the ogr library does not allow that. + For example we can call the AUTODETECT_TYPE even in python + """ from osgeo import gdal gdal_proxy = gdal.OpenEx( diff --git a/geonode/upload/handlers/csv/handler.py b/geonode/upload/handlers/csv/handler.py index e8db0efab3e..4cbd4f085c4 100644 --- a/geonode/upload/handlers/csv/handler.py +++ b/geonode/upload/handlers/csv/handler.py @@ -56,7 +56,7 @@ def supported_file_extension_config(self): { "label": "CSV", "required_ext": ["csv"], - "optional_ext": ["sld", "xml", "json"], + "optional_ext": ["sld", "xml"], } ], "actions": list(self.TASKS.keys()), From 77dac63fd8f192f8c51e2a041956e22b8621f74a Mon Sep 17 00:00:00 2001 From: Mattia Giupponi Date: Fri, 23 Jan 2026 15:14:41 +0100 Subject: [PATCH 11/13] fix tests --- geonode/upload/handlers/common/tests_vector.py | 7 +++++-- geonode/upload/handlers/common/vector.py | 4 +--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/geonode/upload/handlers/common/tests_vector.py b/geonode/upload/handlers/common/tests_vector.py index 4e28704a380..da80460c432 100644 --- a/geonode/upload/handlers/common/tests_vector.py +++ b/geonode/upload/handlers/common/tests_vector.py @@ -297,6 +297,9 @@ def test_import_resource_should_not_be_imported(self, celery_chord, ogr2ogr_driv If the resource exists and should be skept, the celery task is not going to be called and the layer is skipped """ + mocked_obj = MagicMock() + mocked_obj.name = "CSV" + ogr2ogr_driver.return_value = mocked_obj exec_id = None try: # create the executionId @@ -311,9 +314,9 @@ def test_import_resource_should_not_be_imported(self, celery_chord, ogr2ogr_driv # start the resource import self.handler.import_resource(files=self.valid_files, execution_id=str(exec_id)) self.assertIn( - "No valid layers found", + "not recognized as a supported file format.", exception.exception.args[0], - "No valid layers found.", + "not recognized as a supported file format.", ) celery_chord.assert_not_called() diff --git a/geonode/upload/handlers/common/vector.py b/geonode/upload/handlers/common/vector.py index 69200072a50..331fcb80954 100644 --- a/geonode/upload/handlers/common/vector.py +++ b/geonode/upload/handlers/common/vector.py @@ -58,7 +58,7 @@ ) from geonode.resource.manager import resource_manager from geonode.resource.models import ExecutionRequest -from osgeo import ogr +from osgeo import ogr, gdal from geonode.upload.api.exceptions import ImportException, UpsertException from geonode.upload.celery_app import importer_app from geonode.assets.utils import copy_assets_and_links, get_default_asset @@ -561,8 +561,6 @@ def open_source_file(self, files): while the ogr library does not allow that. For example we can call the AUTODETECT_TYPE even in python """ - from osgeo import gdal - gdal_proxy = gdal.OpenEx( files.get("base_file"), nOpenFlags=gdal.OF_VECTOR, From 2ebb8de49d068528c36e97c1f1c66090fd8af110 Mon Sep 17 00:00:00 2001 From: Mattia Giupponi Date: Fri, 23 Jan 2026 16:01:51 +0100 Subject: [PATCH 12/13] fix tests --- geonode/upload/tests/unit/test_publisher.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/geonode/upload/tests/unit/test_publisher.py b/geonode/upload/tests/unit/test_publisher.py index 9d7f7e06e10..5890d26875f 100644 --- a/geonode/upload/tests/unit/test_publisher.py +++ b/geonode/upload/tests/unit/test_publisher.py @@ -39,7 +39,7 @@ def setUp(self): layer = self.publisher.cat.get_resources("stazioni_metropolitana", workspaces="geonode") print("delete layer") if layer: - res = self.publisher.cat.delete(layer.resource, purge="all", recurse=True) + res = self.publisher.cat.delete(layer[0], purge="all", recurse=True) print(res.status_code) print(res.json) @@ -66,20 +66,6 @@ def test_extract_resource_name_and_crs(self): expected = {"crs": "EPSG:32632", "name": "stazioni_metropolitana"} self.assertDictEqual(expected, values_found[0]) - def test_extract_resource_name_and_crs_return_empty_if_the_file_does_not_exists( - self, - ): - """ - Given a layer and the original file, should extract the crs and the name - to let it publish in Geoserver - """ - values_found = self.publisher.extract_resource_to_publish( - files={"base_file": "/wrong/path/file.gpkg"}, - action="upload", - layer_name="stazioni_metropolitana", - ) - self.assertListEqual([], values_found) - @patch("geonode.upload.publisher.create_geoserver_db_featurestore") def test_get_or_create_store_creation_should_called(self, datastore): with patch.dict(os.environ, {"GEONODE_GEODATABASE": "not_existsing_db"}, clear=True): From 68bdd7ca88819387fd562417f2ae1f1b4287623d Mon Sep 17 00:00:00 2001 From: "G.Allegri" Date: Thu, 5 Feb 2026 16:45:43 +0100 Subject: [PATCH 13/13] aligned identify_autothority with master --- geonode/upload/handlers/common/vector.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/geonode/upload/handlers/common/vector.py b/geonode/upload/handlers/common/vector.py index 0e9d96b917b..d5d752279c5 100644 --- a/geonode/upload/handlers/common/vector.py +++ b/geonode/upload/handlers/common/vector.py @@ -413,28 +413,6 @@ def extract_resource_to_publish(self, files, action, layer_name, alternate, **kw if self.fixup_name(_l.GetName()) == layer_name ] - def identify_authority(self, layer): - layer = self._extract_layer(layer) - try: - layer_wkt = layer.GetSpatialRef().ExportToWkt() - _name = "EPSG" - _code = pyproj.CRS(layer_wkt).to_epsg(min_confidence=20) - if _code is None: - layer_proj4 = layer.GetSpatialRef().ExportToProj4() - _code = pyproj.CRS(layer_proj4).to_epsg(min_confidence=20) - if _code is None: - raise Exception("CRS authority code not found, fallback to default behaviour") - except Exception: - spatial_ref = layer.GetSpatialRef() - spatial_ref.AutoIdentifyEPSG() - _name = spatial_ref.GetAuthorityName(None) or spatial_ref.GetAttrValue("AUTHORITY", 0) - _code = ( - spatial_ref.GetAuthorityCode("PROJCS") - or spatial_ref.GetAuthorityCode("GEOGCS") - or spatial_ref.GetAttrValue("AUTHORITY", 1) - ) - return f"{_name}:{_code}" - def get_ogr2ogr_driver(self): """ Should return the Driver object that is used to open the layers via OGR2OGR