diff --git a/geonode/base/models.py b/geonode/base/models.py index 6ead4558734..0416262163d 100644 --- a/geonode/base/models.py +++ b/geonode/base/models.py @@ -955,7 +955,7 @@ def raw_abstract(self): @property def can_be_downloaded(self): - return self.subtype in {"vector", "raster", "vector_time"} + return self.subtype in {"vector", "raster", "vector_time", "tabular"} @property def can_have_wfs_links(self): @@ -963,7 +963,11 @@ def can_have_wfs_links(self): @property def can_have_wps_links(self): - return self.subtype in {"vector", "tileStore", "remote", "wmsStore", "vector_time"} + return self.subtype in {"vector", "tileStore", "remote", "wmsStore", "vector_time", "tabular"} + + @property + def should_create_style(self): + return self.subtype != "tabular" @property def can_have_style(self): @@ -971,7 +975,7 @@ def can_have_style(self): @property def can_have_thumbnail(self): - return self.subtype not in {"3dtiles", "cog", "flatgeobuf"} + return self.subtype not in {"tabular", "3dtiles", "cog", "flatgeobuf"} @property def raw_purpose(self): @@ -993,6 +997,15 @@ def raw_data_quality_statement(self): def detail_url(self): return self.get_absolute_url() + def fixup_store_type(self, keys, values): + from geonode.geoserver.helpers import get_dataset_storetype + + if self.subtype == "tabular": + return self + for key in keys: + setattr(self, key, get_dataset_storetype(values[key])) + return self + def clean(self): if self.title: self.title = self.title.replace(",", "_") diff --git a/geonode/geoserver/helpers.py b/geonode/geoserver/helpers.py index 0b6a598a929..c42c970e394 100755 --- a/geonode/geoserver/helpers.py +++ b/geonode/geoserver/helpers.py @@ -100,6 +100,7 @@ ("application/wfs-collection-1.1", "vector"), ("application/zip", "vector"), ("text/csv", "vector"), + ("text/csv", "tabular"), ] DEFAULT_STYLE_NAME = ["generic", "line", "point", "polygon", "raster"] @@ -1973,10 +1974,7 @@ def sync_instance_with_geoserver(instance_id, *args, **kwargs): instance.gs_resource = gs_resource # Iterate over values from geoserver. - for key in ["alternate", "store", "subtype"]: - # attr_name = key if 'typename' not in key else 'alternate' - # print attr_name - setattr(instance, key, get_dataset_storetype(values[key])) + instance = instance.fixup_store_type(["alternate", "store", "subtype"], values) if updatemetadata: _sync_geoserver_keywords_to_instance(instance, gs_resource.keywords) @@ -2085,7 +2083,7 @@ def sync_instance_with_geoserver(instance_id, *args, **kwargs): # Refresh from DB instance.refresh_from_db() - if updatemetadata: + if updatemetadata and instance.should_create_style: # Save dataset styles logger.debug(f"... Refresh Legend links for Dataset {instance.title}") try: diff --git a/geonode/geoserver/tasks.py b/geonode/geoserver/tasks.py index 6cd6a4a31a6..e319f7819f9 100644 --- a/geonode/geoserver/tasks.py +++ b/geonode/geoserver/tasks.py @@ -154,6 +154,9 @@ def geoserver_create_style(self, instance_id, name, sld_file, tempdir): logger.debug(f"Dataset id {instance_id} does not exist yet!") raise + if not instance.should_create_style: + return + lock_id = f"{self.request.id}" if self.request.id else instance.name log_lock.debug(f"geoserver_create_style: Creating lock {lock_id} for {instance.name}") with AcquireLock(lock_id) as lock: diff --git a/geonode/layers/api/serializers.py b/geonode/layers/api/serializers.py index 5a95554c978..8eae5a79fd6 100644 --- a/geonode/layers/api/serializers.py +++ b/geonode/layers/api/serializers.py @@ -189,6 +189,7 @@ class Meta: "store", "subtype", "ptype", + "is_tabular", ) ) ) diff --git a/geonode/layers/models.py b/geonode/layers/models.py index f4f406dddd9..a5be49c4f65 100644 --- a/geonode/layers/models.py +++ b/geonode/layers/models.py @@ -164,6 +164,10 @@ def is_vector(self): def is_raster(self): return self.subtype == "raster" + @property + def is_tabular(self): + return self.subtype == "tabular" + @property def supports_time(self): valid_attributes = self.get_choices diff --git a/geonode/upload/handlers/README.md b/geonode/upload/handlers/README.md index 7d67ca55cb3..5c1416d3d4b 100644 --- a/geonode/upload/handlers/README.md +++ b/geonode/upload/handlers/README.md @@ -149,7 +149,7 @@ class BaseVectorFileHandler(BaseHandler): return def create_geonode_resource( - self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = Dataset, files=None + self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = Dataset, files=None, **kwargs ): """ Base function to create the resource into geonode. Each handler can specify diff --git a/geonode/upload/handlers/common/raster.py b/geonode/upload/handlers/common/raster.py index b8fdfa3de78..e89e6e06e20 100644 --- a/geonode/upload/handlers/common/raster.py +++ b/geonode/upload/handlers/common/raster.py @@ -25,7 +25,6 @@ from typing import List from django.conf import settings -from django.db.models import Q from geonode.base.models import ResourceBase from geonode.layers.models import Dataset from geonode.resource.enumerator import ExecutionRequestAction as exa @@ -239,11 +238,7 @@ def extract_resource_to_publish(self, files, action, layer_name, alternate, **kw return [ { "name": alternate, - "crs": ResourceBase.objects.filter( - Q(alternate__icontains=layer_name) | Q(title__icontains=layer_name) - ) - .first() - .srid, + "crs": ResourceBase.objects.filter(alternate=kwargs.get("original_dataset_alternate")).first().srid, "raster_path": raster_path, } ] @@ -329,12 +324,7 @@ def import_resource(self, files: dict, execution_id: str, **kwargs) -> str: return def create_geonode_resource( - self, - layer_name: str, - alternate: str, - execution_id: str, - resource_type: Dataset = Dataset, - asset=None, + self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = Dataset, asset=None, **kwargs ): """ Base function to create the resource into geonode. Each handler can specify @@ -547,7 +537,7 @@ def _publish_resource_rollback(self, exec_id, instance_name=None, *args, **kwarg publisher = DataPublisher(handler_module_path=handler_module_path) publisher.delete_resource(instance_name) - def fixup_dynamic_model_fields(self, _exec, files): + def fixup_dynamic_model_fields(self, _exec, files, **kwargs): """ Raster dataset does not have the dynamic model, so this can be skept """ diff --git a/geonode/upload/handlers/common/remote.py b/geonode/upload/handlers/common/remote.py index 97113fbc6dc..68485c0b04a 100755 --- a/geonode/upload/handlers/common/remote.py +++ b/geonode/upload/handlers/common/remote.py @@ -205,6 +205,7 @@ def create_geonode_resource( execution_id: str, resource_type: ResourceBase = ResourceBase, asset=None, + **kwargs, ): """ Creating geonode base resource diff --git a/geonode/upload/handlers/common/tests_vector.py b/geonode/upload/handlers/common/tests_vector.py index 4e28704a380..da80460c432 100644 --- a/geonode/upload/handlers/common/tests_vector.py +++ b/geonode/upload/handlers/common/tests_vector.py @@ -297,6 +297,9 @@ def test_import_resource_should_not_be_imported(self, celery_chord, ogr2ogr_driv If the resource exists and should be skept, the celery task is not going to be called and the layer is skipped """ + mocked_obj = MagicMock() + mocked_obj.name = "CSV" + ogr2ogr_driver.return_value = mocked_obj exec_id = None try: # create the executionId @@ -311,9 +314,9 @@ def test_import_resource_should_not_be_imported(self, celery_chord, ogr2ogr_driv # start the resource import self.handler.import_resource(files=self.valid_files, execution_id=str(exec_id)) self.assertIn( - "No valid layers found", + "not recognized as a supported file format.", exception.exception.args[0], - "No valid layers found.", + "not recognized as a supported file format.", ) celery_chord.assert_not_called() diff --git a/geonode/upload/handlers/common/vector.py b/geonode/upload/handlers/common/vector.py index d4db7d746ca..d5d752279c5 100644 --- a/geonode/upload/handlers/common/vector.py +++ b/geonode/upload/handlers/common/vector.py @@ -58,7 +58,7 @@ ) from geonode.resource.manager import resource_manager from geonode.resource.models import ExecutionRequest -from osgeo import ogr +from osgeo import ogr, gdal from geonode.upload.api.exceptions import ImportException, UpsertException from geonode.upload.celery_app import importer_app from geonode.assets.utils import copy_assets_and_links, get_default_asset @@ -77,7 +77,6 @@ from geonode.upload.registry import feature_validators_registry from django.core.exceptions import ValidationError - logger = logging.getLogger("importer") @@ -398,15 +397,11 @@ def extract_resource_to_publish(self, files, action, layer_name, alternate, **kw return [ { "name": alternate, - "crs": ResourceBase.objects.filter( - Q(alternate__icontains=layer_name) | Q(title__icontains=layer_name) - ) - .first() - .srid, + "crs": ResourceBase.objects.filter(alternate=kwargs.get("original_dataset_alternate")).first().srid, } ] - layers = self.get_ogr2ogr_driver().Open(files.get("base_file")) + layers = self.open_source_file(files) if not layers: return [] return [ @@ -414,7 +409,7 @@ def extract_resource_to_publish(self, files, action, layer_name, alternate, **kw "name": alternate or layer_name, "crs": self.identify_authority(_l) if _l.GetSpatialRef() else None, } - for _l in layers + for _l in (self._extract_layer(_l) for _l in layers) if self.fixup_name(_l.GetName()) == layer_name ] @@ -424,14 +419,17 @@ def get_ogr2ogr_driver(self): """ return None + def _gdal_open_options(self): + return {} + def import_resource(self, files: dict, execution_id: str, **kwargs) -> str: """ Main function to import the resource. Internally will call the steps required to import the data inside the geonode_data database """ - all_layers = self.get_ogr2ogr_driver().Open(files.get("base_file")) - layers = self._select_valid_layers(all_layers) + gdal_proxy = self.open_source_file(files) + layers = self._select_valid_layers(gdal_proxy) # for the moment we skip the dyanamic model creation layer_count = len(layers) logger.info(f"Total number of layers available: {layer_count}") @@ -451,7 +449,8 @@ def import_resource(self, files: dict, execution_id: str, **kwargs) -> str: # start looping on the layers available - for index, layer in enumerate(layers, start=1): + for index, gdal_layer in enumerate(layers, start=1): + layer = self._extract_layer(gdal_layer) layer_name = self.fixup_name(layer.GetName()) should_be_overwritten = _exec.input_params.get("overwrite_existing_layer") @@ -532,10 +531,26 @@ def import_resource(self, files: dict, execution_id: str, **kwargs) -> str: raise e return layer_names, alternates, execution_id + def open_source_file(self, files): + """ + The importer switched from ogr to gdal library + This is required so we can rely on the options in GDAL, + while the ogr library does not allow that. + For example we can call the AUTODETECT_TYPE even in python + """ + gdal_proxy = gdal.OpenEx( + files.get("base_file"), + nOpenFlags=gdal.OF_VECTOR, + allowed_drivers=[self.get_ogr2ogr_driver().name], + **self._gdal_open_options(), + ) + return [gdal_proxy] + def _select_valid_layers(self, all_layers): layers = [] for layer in all_layers: try: + layer = self._extract_layer(layer) self.identify_authority(layer) layers.append(layer) except Exception as e: @@ -546,14 +561,17 @@ def _select_valid_layers(self, all_layers): pass return layers + def can_overwrite(self, _exec_obj, dataset): + is_tabular = _exec_obj.input_params.get("is_tabular", None) + return dataset.is_vector() if not is_tabular else is_tabular + def find_alternate_by_dataset(self, _exec_obj, layer_name, should_be_overwritten): if _exec_obj.input_params.get("resource_pk"): dataset = Dataset.objects.filter(pk=_exec_obj.input_params.get("resource_pk")).first() if not dataset: raise ImportException("The dataset selected for the ovewrite does not exists") - if should_be_overwritten: - if not dataset.is_vector(): - raise Exception("Cannot override a raster dataset with a vector one") + if should_be_overwritten and not self.can_overwrite(_exec_obj, dataset): + raise Exception("Cannot override a raster dataset with a vector one") alternate = dataset.alternate.split(":") return alternate[-1] @@ -561,9 +579,8 @@ def find_alternate_by_dataset(self, _exec_obj, layer_name, should_be_overwritten dataset_available = Dataset.objects.filter(alternate__iexact=f"{workspace.name}:{layer_name}") dataset_exists = dataset_available.exists() - if should_be_overwritten: - if not dataset_available.is_vector(): - raise Exception("Cannot override a raster dataset with a vector one") + if should_be_overwritten and not self.can_overwrite(_exec_obj, dataset): + raise Exception("Cannot override a raster dataset with a vector one") if dataset_exists and should_be_overwritten: alternate = dataset_available.first().alternate.split(":")[-1] @@ -590,7 +607,7 @@ def setup_dynamic_model( - celery_group -> the celery group of the field creation """ - layer_name = self.fixup_name(layer.GetName() if isinstance(layer, ogr.Layer) else layer) + layer_name = self.fixup_name(self._extract_layer(layer).GetName()) _exec_obj = orchestrator.get_execution_object(execution_id) is_dynamic_model_managed = _exec_obj.input_params.get("is_dynamic_model_managed", False) @@ -756,12 +773,7 @@ def promote_geom_to_multi(self, geom): return geom def create_geonode_resource( - self, - layer_name: str, - alternate: str, - execution_id: str, - resource_type: Dataset = Dataset, - asset=None, + self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = Dataset, asset=None, **kwargs ): """ Base function to create the resource into geonode. Each handler can specify @@ -788,7 +800,7 @@ def create_geonode_resource( saved_dataset = resource_manager.create( None, resource_type=resource_type, - defaults=self.generate_resource_payload(layer_name, alternate, asset, _exec, workspace), + defaults=self.generate_resource_payload(layer_name, alternate, asset, _exec, workspace, **kwargs), ) saved_dataset.refresh_from_db() @@ -805,12 +817,12 @@ def create_geonode_resource( return saved_dataset - def generate_resource_payload(self, layer_name, alternate, asset, _exec, workspace): + def generate_resource_payload(self, layer_name, alternate, asset, _exec, workspace, **kwargs): return dict( name=alternate, workspace=workspace, store=os.environ.get("GEONODE_GEODATABASE", "geonode_data"), - subtype="vector", + subtype=kwargs.pop("subtype", None) or "vector", alternate=f"{workspace}:{alternate}", dirty_state=True, title=layer_name, @@ -918,11 +930,17 @@ def copy_geonode_resource( new_alternate: str, **kwargs, ): - + subtype = None + previous_resource = ResourceBase.objects.filter( + alternate__contains=kwargs.get("kwargs", {}).get("original_dataset_alternate") + ).first() + if previous_resource: + subtype = previous_resource.subtype new_resource = self.create_geonode_resource( layer_name=data_to_update.get("title"), alternate=new_alternate, execution_id=str(_exec.exec_id), + subtype=subtype, ) copy_assets_and_links(resource, target=new_resource) @@ -1099,7 +1117,7 @@ def upsert_validation(self, files, execution_id, **kwargs: dict) -> Tuple[bool, return True, None except Exception as e: - all_layers = self.get_ogr2ogr_driver().Open(files.get("base_file")) + all_layers = self.open_source_file(files) if layers := self._select_valid_layers(all_layers): _errors = e.args[0] if isinstance(e, UpsertException) else [str(e)] if isinstance(_errors, str): @@ -1111,6 +1129,11 @@ def upsert_validation(self, files, execution_id, **kwargs: dict) -> Tuple[bool, "User does not have enough permissions to perform this action on the selected resource" ) + def _extract_layer(self, layer): + if not isinstance(layer, ogr.Layer): + layer = layer.GetLayer() + return layer + def __get_new_and_original_schema(self, files, execution_id): # check if the execution_id is passed and if the geonode resource exists exec_id = orchestrator.get_execution_object(execution_id) @@ -1123,27 +1146,31 @@ def __get_new_and_original_schema(self, files, execution_id): target_schema_fields = FieldSchema.objects.filter(model_schema__name=target_resource.alternate.split(":")[-1]) # use ogr2ogr to read the uploaded files for the upsert - all_layers = self.get_ogr2ogr_driver().Open(files.get("base_file")) + all_layers = self.open_source_file(files) layers = self._select_valid_layers(all_layers) if not layers: raise UpsertException("No valid layers found in the provided file for upsert.") - layer = layers[0] + layer = self._extract_layer(layers[0]) # evaluate if some of the fid entry is null. if is null we stop the workflow # the user should provide the completed list with the fid set - sql_query = f'SELECT * FROM "{layer.GetName()}" WHERE "{DEFAULT_PK_COLUMN_NAME}" IS NULL' - - # Execute the SQL query to the layer - result = all_layers.ExecuteSQL(sql_query) - if not result or (result and result.GetFeatureCount() > 0): - raise UpsertException( - f"All the feature in the file must have the fid field correctly populated. Number of None value: {result.GetFeatureCount() if result else 'all'}" - ) + if exec_id.action == ira.UPSERT.value: + sql_query = f'SELECT * FROM "{layer.GetName()}" WHERE "{DEFAULT_PK_COLUMN_NAME.lower()}" IS NULL' + + # Execute the SQL query to the layer via the gdal proxy object + result = all_layers[0].ExecuteSQL(sql_query) + if ( + not (result and DEFAULT_PK_COLUMN_NAME in (x.name.lower() for x in result.schema)) + or (result and result.GetFeatureCount() > 0) + or not result + ): + raise UpsertException( + f"All the feature in the file must have the fid field correctly populated. Number of None value: {result.GetFeatureCount() if result else 'all'}" + ) # Will generate the same schema as the target_resource_schema new_file_schema_fields = self.create_dynamic_model_fields( - layer, - return_celery_group=False, + layer, return_celery_group=False, execution_id=execution_id ) return target_schema_fields, new_file_schema_fields @@ -1214,7 +1241,7 @@ def upsert_data(self, files, execution_id, **kwargs): # if for any reason the key is not present, better to raise an error raise UpsertException("Was not possible to find the upsert key, upsert is aborted") # use ogr2ogr to read the uploaded files values for the upsert - all_layers = self.get_ogr2ogr_driver().Open(files.get("base_file")) + all_layers = self.open_source_file(files) valid_create = 0 valid_update = 0 layers = self._select_valid_layers(all_layers) @@ -1319,8 +1346,8 @@ def _create_error_log(self, exec_obj, layers, errors): "Error found during the upsert process, no update/create will be perfomed. The error log is going to be created..." ) errors_to_print = errors[: settings.UPSERT_LIMIT_ERROR_LOG] - - log_name = f'error_{layers[0].GetName()}_{datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.csv' + layer = self._extract_layer(layers[0]) + log_name = f'error_{layer.GetName()}_{datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.csv' with tempfile.TemporaryDirectory() as temp_dir_str: temp_dir = Path(temp_dir_str) @@ -1400,11 +1427,12 @@ def _save_feature(self, data_chunk, model_obj, model_instance, upsert_key, valid # need to simulate the "promote to multi" used by the upload process. # here we cannot rely on ogr2ogr so we need to do it manually geom = feature.GetGeometryRef() - code = geom.GetSpatialReference().GetAuthorityCode(None) - feature_as_dict.update( - {self.default_geometry_column_name: f"SRID={code};{self.promote_geom_to_multi(geom).ExportToWkt()}"} - ) - to_process.append(feature_as_dict) + if geom: + code = geom.GetSpatialReference().GetAuthorityCode(None) + feature_as_dict.update( + {self.default_geometry_column_name: f"SRID={code};{self.promote_geom_to_multi(geom).ExportToWkt()}"} + ) + to_process.append(feature_as_dict) for feature_as_dict in to_process: if feature_as_dict.get(upsert_key) in value_in_db: @@ -1494,7 +1522,7 @@ def refresh_geonode_resource(self, execution_id, asset=None, dataset=None, creat self.__fixup_primary_key(dataset) return dataset - def fixup_dynamic_model_fields(self, _exec, files): + def fixup_dynamic_model_fields(self, _exec, files, **kwargs): """ Utility needed during the replace workflow, it will sync all the FieldSchema along with the current resource uploaded. diff --git a/geonode/upload/handlers/csv/handler.py b/geonode/upload/handlers/csv/handler.py index 8170670e450..4cbd4f085c4 100644 --- a/geonode/upload/handlers/csv/handler.py +++ b/geonode/upload/handlers/csv/handler.py @@ -17,7 +17,10 @@ # ######################################################################### import logging +import os +from geonode.geoserver.createlayer.utils import BBOX +from geonode.layers.models import Dataset from geonode.resource.enumerator import ExecutionRequestAction as exa from geonode.upload.api.exceptions import UploadParallelismLimitException from geonode.upload.utils import UploadLimitValidator @@ -29,6 +32,7 @@ from dynamic_models.models import ModelSchema from geonode.upload.handlers.common.vector import BaseVectorFileHandler from geonode.upload.handlers.utils import GEOM_TYPE_MAPPING +from geonode.upload.orchestrator import orchestrator logger = logging.getLogger("importer") @@ -104,7 +108,6 @@ def is_valid(files, user, **kwargs): has_lat = any(x in CSVFileHandler().possible_lat_column for x in schema_keys) has_long = any(x in CSVFileHandler().possible_long_column for x in schema_keys) - fields = CSVFileHandler().possible_geometry_column_name + CSVFileHandler().possible_latlong_column if has_lat and not has_long: raise InvalidCSVException( f"Longitude is missing. Supported names: {', '.join(CSVFileHandler().possible_long_column)}" @@ -116,13 +119,19 @@ def is_valid(files, user, **kwargs): ) if not geom_is_in_schema and not has_lat and not has_long: - raise InvalidCSVException(f"Not enough geometry field are set. The possibilities are: {','.join(fields)}") + exec_obj = orchestrator.get_execution_object(kwargs.get("execution_id")) + exec_obj.input_params.update({"is_tabular": True}) + exec_obj.save() + return True return True def get_ogr2ogr_driver(self): return ogr.GetDriverByName("CSV") + def _gdal_open_options(self): + return {"open_options": ["AUTODETECT_TYPE=YES"]} + @staticmethod def create_ogr2ogr_command(files, original_name, ovverwrite_layer, alternate, **kwargs): """ @@ -134,6 +143,7 @@ def create_ogr2ogr_command(files, original_name, ovverwrite_layer, alternate, ** return ( f"{base_command} -oo KEEP_GEOM_COLUMNS=NO -lco GEOMETRY_NAME={BaseVectorFileHandler().default_geometry_column_name} " + additional_option + + " -oo AUTODETECT_TYPE=YES" ) def create_dynamic_model_fields( @@ -147,10 +157,13 @@ def create_dynamic_model_fields( ): # retrieving the field schema from ogr2ogr and converting the type to Django Types layer_schema = [{"name": x.name.lower(), "class_name": self._get_type(x), "null": True} for x in layer.schema] + exec_obj = orchestrator.get_execution_object(execution_id) + if ( layer.GetGeometryColumn() or self.default_geometry_column_name and ogr.GeometryTypeToName(layer.GetGeomType()) not in ["Geometry Collection", "Unknown (any)"] + and not exec_obj.input_params.get("is_tabular") ): # the geometry colum is not returned rom the layer.schema, so we need to extract it manually # checking if the geometry has been wrogly read as string @@ -196,11 +209,11 @@ def extract_resource_to_publish(self, files, action, layer_name, alternate, **kw return [ { "name": alternate, - "crs": ResourceBase.objects.filter(alternate__istartswith=layer_name).first().srid, + "crs": ResourceBase.objects.filter(alternate=kwargs.get("original_dataset_alternate")).first().srid, } ] - layers = self.get_ogr2ogr_driver().Open(files.get("base_file"), 0) + layers = self.open_source_file(files) if not layers: return [] return [ @@ -209,12 +222,36 @@ def extract_resource_to_publish(self, files, action, layer_name, alternate, **kw "crs": (self.identify_authority(_l)), } for _l in layers - if self.fixup_name(_l.GetName()) == layer_name + if self.fixup_name(self._extract_layer(_l).GetName()) == layer_name ] def identify_authority(self, layer): + layer = self._extract_layer(layer) try: authority_code = super().identify_authority(layer=layer) + if authority_code == ogr.wkbNone: + logger.warning("For tabular CSV, we set by default EPSG:4326") return authority_code except Exception: return "EPSG:4326" + + def create_geonode_resource( + self, layer_name, alternate, execution_id, resource_type: Dataset = Dataset, asset=None, **kwargs + ): + res = super().create_geonode_resource(layer_name, alternate, execution_id, resource_type, asset, **kwargs) + res.set_bbox_polygon(BBOX, res.srid) + return res + + def generate_resource_payload(self, layer_name, alternate, asset, _exec, workspace, **kwargs): + subtype = kwargs.pop("subtype", None) + return dict( + name=alternate, + workspace=workspace, + store=os.environ.get("GEONODE_GEODATABASE", "geonode_data"), + subtype=subtype or ("tabular" if _exec.input_params.get("is_tabular") else "vector"), + alternate=f"{workspace}:{alternate}", + dirty_state=True, + title=layer_name, + owner=_exec.user, + asset=asset, + ) diff --git a/geonode/upload/handlers/csv/tests.py b/geonode/upload/handlers/csv/tests.py index 3ce2b48f586..b2ff4c18def 100644 --- a/geonode/upload/handlers/csv/tests.py +++ b/geonode/upload/handlers/csv/tests.py @@ -31,6 +31,8 @@ from geonode.upload.handlers.csv.handler import CSVFileHandler from osgeo import ogr +from geonode.upload.utils import ExecutionRequest + class TestCSVHandler(TestCase): databases = ("default", "datastore") @@ -78,12 +80,25 @@ def test_is_valid_should_raise_exception_if_the_csv_is_invalid(self): self.assertIsNotNone(_exc) self.assertTrue("The CSV provided is invalid, no layers found" in str(_exc.exception.detail)) - def test_is_valid_should_raise_exception_if_the_csv_missing_geom(self): - with self.assertRaises(InvalidCSVException) as _exc: - self.handler.is_valid(files={"base_file": self.missing_geom}, user=self.user) + def test_is_valid_should_set_as_tabular_if_the_csv_miss_geom(self): + exec_obj = ExecutionRequest.objects.create( + user=self.user, + func_name="test", + geonode_resource=self.layer, + input_params={ + "uuid": self.layer.uuid, + "owner": self.layer.owner.username, + "resource_type": self.layer.resource_type, + "defaults": f'{{"owner":"{self.layer.owner.username}"}}', + }, + ) + self.handler.is_valid( + files={"base_file": self.missing_geom}, user=self.user, execution_id=str(exec_obj.exec_id) + ) - self.assertIsNotNone(_exc) - self.assertTrue("Not enough geometry field are set" in str(_exc.exception.detail)) + exec_obj.refresh_from_db() + + self.assertTrue(exec_obj.input_params.get("is_tabular", False)) def test_is_valid_should_raise_exception_if_the_csv_missing_lat(self): with self.assertRaises(InvalidCSVException) as _exc: @@ -176,7 +191,7 @@ def test_import_with_ogr2ogr_without_errors_should_call_the_right_command(self, + '\' " "' + self.valid_csv + '" -lco FID=fid' - + ' -nln alternate "dataset" -oo KEEP_GEOM_COLUMNS=NO -lco GEOMETRY_NAME=geom -oo "GEOM_POSSIBLE_NAMES=geom*,the_geom*,wkt_geom" -oo "X_POSSIBLE_NAMES=x,long*" -oo "Y_POSSIBLE_NAMES=y,lat*"', # noqa + + ' -nln alternate "dataset" -oo KEEP_GEOM_COLUMNS=NO -lco GEOMETRY_NAME=geom -oo "GEOM_POSSIBLE_NAMES=geom*,the_geom*,wkt_geom" -oo "X_POSSIBLE_NAMES=x,long*" -oo "Y_POSSIBLE_NAMES=y,lat*" -oo AUTODETECT_TYPE=YES', # noqa stdout=-1, stderr=-1, shell=True, # noqa diff --git a/geonode/upload/handlers/remote/tiles3d.py b/geonode/upload/handlers/remote/tiles3d.py index 01f14c7f037..586218c7426 100644 --- a/geonode/upload/handlers/remote/tiles3d.py +++ b/geonode/upload/handlers/remote/tiles3d.py @@ -85,8 +85,9 @@ def create_geonode_resource( execution_id: str, resource_type: Dataset = ResourceBase, asset=None, + **kwargs, ): - resource = super().create_geonode_resource(layer_name, alternate, execution_id, resource_type, asset) + resource = super().create_geonode_resource(layer_name, alternate, execution_id, resource_type, asset, **kwargs) _exec = orchestrator.get_execution_object(exec_id=execution_id) try: js_file = requests.get(_exec.input_params.get("url"), timeout=10).json() diff --git a/geonode/upload/handlers/remote/wms.py b/geonode/upload/handlers/remote/wms.py index 69bb444992a..aec93994090 100644 --- a/geonode/upload/handlers/remote/wms.py +++ b/geonode/upload/handlers/remote/wms.py @@ -120,18 +120,13 @@ def generate_alternate( return layer_name, payload_alternate def create_geonode_resource( - self, - layer_name: str, - alternate: str, - execution_id: str, - resource_type: Dataset = ..., - asset=None, + self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = ..., asset=None, **kwargs ): """ Use the default RemoteResourceHandler to create the geonode resource after that, we assign the bbox and re-generate the thumbnail """ - resource = super().create_geonode_resource(layer_name, alternate, execution_id, Dataset, asset) + resource = super().create_geonode_resource(layer_name, alternate, execution_id, Dataset, asset, **kwargs) _exec = orchestrator.get_execution_object(execution_id) remote_bbox = _exec.input_params.get("bbox") if remote_bbox: diff --git a/geonode/upload/handlers/tiles3d/handler.py b/geonode/upload/handlers/tiles3d/handler.py index 3affb738f01..7de235b8e7c 100755 --- a/geonode/upload/handlers/tiles3d/handler.py +++ b/geonode/upload/handlers/tiles3d/handler.py @@ -240,16 +240,11 @@ def pre_processing(self, files, execution_id, **kwargs): return _data, execution_id def create_geonode_resource( - self, - layer_name: str, - alternate: str, - execution_id: str, - resource_type: Dataset = ..., - asset=None, + self, layer_name: str, alternate: str, execution_id: str, resource_type: Dataset = ..., asset=None, **kwargs ): exec_obj = orchestrator.get_execution_object(execution_id) - resource = super().create_geonode_resource(layer_name, alternate, execution_id, ResourceBase, asset) + resource = super().create_geonode_resource(layer_name, alternate, execution_id, ResourceBase, asset, **kwargs) asset = self.create_asset_and_link( resource, files=exec_obj.input_params["files"], diff --git a/geonode/upload/tests/end2end/test_end2end.py b/geonode/upload/tests/end2end/test_end2end.py index 4447c306bdf..54a18cb3d06 100644 --- a/geonode/upload/tests/end2end/test_end2end.py +++ b/geonode/upload/tests/end2end/test_end2end.py @@ -67,6 +67,7 @@ def setUpClass(cls) -> None: cls.valid_tif = f"{project_dir}/tests/fixture/test_raster.tif" cls.valid_csv = f"{project_dir}/tests/fixture/valid.csv" + cls.missing_geom_csv = f"{project_dir}/tests/fixture/missing_geom.csv" cls.url = reverse("importer_upload") ogc_server_settings = OGC_Servers_Handler(settings.OGC_SERVER)["default"] @@ -297,7 +298,7 @@ def test_import_geojson_overwrite(self): class ImporterGCSVImportTest(BaseImporterEndToEndTest): @mock.patch.dict(os.environ, {"GEONODE_GEODATABASE": "test_geonode_data"}) @override_settings(GEODATABASE_URL=f"{geourl.split('/geonode_data')[0]}/test_geonode_data") - def test_import_geojson(self): + def test_import_csv(self): self._cleanup_layers(name="valid") payload = {"base_file": open(self.valid_csv, "rb"), "action": "upload"} @@ -321,6 +322,48 @@ def test_import_csv_overwrite(self): self._cleanup_layers(name="valid") +class ImporterGCSVTabularImportTest(BaseImporterEndToEndTest): + @mock.patch.dict(os.environ, {"GEONODE_GEODATABASE": "test_geonode_data"}) + @override_settings(GEODATABASE_URL=f"{geourl.split('/geonode_data')[0]}/test_geonode_data") + def test_import_tabular_csv(self): + self._cleanup_layers(name="missing_geom") + + payload = {"base_file": open(self.missing_geom_csv, "rb"), "action": "upload"} + initial_name = "missing_geom" + self._assertimport( + payload, + initial_name, + assert_payload={ + "subtype": "tabular", + "resource_type": "dataset", + }, + ) + self._cleanup_layers(name="missing_geom") + + @mock.patch.dict(os.environ, {"GEONODE_GEODATABASE": "test_geonode_data"}) + @override_settings(GEODATABASE_URL=f"{geourl.split('/geonode_data')[0]}/test_geonode_data") + def test_import_tabular_csv_overwrite(self): + self._cleanup_layers(name="missing_geom") + payload = {"base_file": open(self.missing_geom_csv, "rb"), "action": "upload"} + initial_name = "missing_geom" + prev_dataset = self._assertimport( + payload, + initial_name, + keep_resource=True, + assert_payload={ + "subtype": "tabular", + "resource_type": "dataset", + }, + ) + + payload = {"base_file": open(self.missing_geom_csv, "rb"), "action": "upload"} + initial_name = "missing_geom" + payload["overwrite_existing_layer"] = True + payload["resource_pk"] = prev_dataset.pk + self._assertimport(payload, initial_name, overwrite=True, last_update=prev_dataset.last_updated) + self._cleanup_layers(name="missing_geom") + + class ImporterKMLImportTest(BaseImporterEndToEndTest): @mock.patch.dict(os.environ, {"GEONODE_GEODATABASE": "test_geonode_data"}) @override_settings(GEODATABASE_URL=f"{geourl.split('/geonode_data')[0]}/test_geonode_data") diff --git a/geonode/upload/tests/unit/test_publisher.py b/geonode/upload/tests/unit/test_publisher.py index 9d7f7e06e10..5890d26875f 100644 --- a/geonode/upload/tests/unit/test_publisher.py +++ b/geonode/upload/tests/unit/test_publisher.py @@ -39,7 +39,7 @@ def setUp(self): layer = self.publisher.cat.get_resources("stazioni_metropolitana", workspaces="geonode") print("delete layer") if layer: - res = self.publisher.cat.delete(layer.resource, purge="all", recurse=True) + res = self.publisher.cat.delete(layer[0], purge="all", recurse=True) print(res.status_code) print(res.json) @@ -66,20 +66,6 @@ def test_extract_resource_name_and_crs(self): expected = {"crs": "EPSG:32632", "name": "stazioni_metropolitana"} self.assertDictEqual(expected, values_found[0]) - def test_extract_resource_name_and_crs_return_empty_if_the_file_does_not_exists( - self, - ): - """ - Given a layer and the original file, should extract the crs and the name - to let it publish in Geoserver - """ - values_found = self.publisher.extract_resource_to_publish( - files={"base_file": "/wrong/path/file.gpkg"}, - action="upload", - layer_name="stazioni_metropolitana", - ) - self.assertListEqual([], values_found) - @patch("geonode.upload.publisher.create_geoserver_db_featurestore") def test_get_or_create_store_creation_should_called(self, datastore): with patch.dict(os.environ, {"GEONODE_GEODATABASE": "not_existsing_db"}, clear=True):