diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3e7c2268..a010a076 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -28,7 +28,7 @@ jobs: matrix: os: [ubuntu-latest] dev: [false] - python: ["3.10", "3.11", "3.12"] + python: ["3.10", "3.11", "3.12", "3.13"] env: ["latest"] # Use openblas instead of mkl saves 600 MB. Linux OK, 50% slower on Windows and OSX! extra: ["nomkl"] @@ -44,7 +44,7 @@ jobs: - env: latest os: windows-latest dev: false - python: "3.12" + python: "3.13" steps: - uses: actions/checkout@v6 diff --git a/CHANGELOG.md b/CHANGELOG.md index b6605f2a..c5e63175 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,6 +52,7 @@ - Add first version of a cover/bare soil marker (#168, #200) - Avoid high committed memory for zonal stats calculation (#197) - General small improvements, e.g. save randomforest models compressed,.. (#144) +- Use Exactextract as default engine for zonalstats calculation (#181) ### Bugs fixed diff --git a/cropclassification/__init__.py b/cropclassification/__init__.py index cce4f0c2..afa44509 100644 --- a/cropclassification/__init__.py +++ b/cropclassification/__init__.py @@ -1 +1,7 @@ """Package with functionalities to support agricultural parcel monitoring.""" + +import logging + +# Disable info logging pf botocore.credentials +logger_botocore_credentials = logging.getLogger("botocore.credentials") +logger_botocore_credentials.setLevel(logging.WARNING) diff --git a/cropclassification/calc_cropclass.py b/cropclassification/calc_cropclass.py index 4c774f15..68e2dcec 100644 --- a/cropclassification/calc_cropclass.py +++ b/cropclassification/calc_cropclass.py @@ -189,6 +189,7 @@ def run_cropclass( parceldata_aggregations_to_use = conf.marker.getlist( "parceldata_aggregations_to_use" ) + ts.calc_timeseries_data( input_parcel_path=imagedata_input_parcel_path, roi_bounds=tuple(conf.roi.getlistfloat("roi_bounds")), diff --git a/cropclassification/general.ini b/cropclassification/general.ini index 9888e7ab..7708c4cb 100644 --- a/cropclassification/general.ini +++ b/cropclassification/general.ini @@ -138,14 +138,30 @@ on_missing_image = calculate_raise # Configuration on how/which periodic images/timeseries statistics should be generated. [timeseries] +# Engine to use for the timeseries calculation. +# Possible values are pyqgis, rasterstats and exactextract. +engine = exactextract + +# Stats to calculate for the timeseries. The following stats are available: +# - "rasterstats": documentation: https://pythonhosted.org/rasterstats/manual.html#statistics +# - "pyqgis": "count", "sum", "mean", "median", "std", "min", "max", "range", "minority", "majority" and "variance". +# - "exactextract": documentation: https://isciences.github.io/exactextract/operations.html +stats = [ + "count(min_coverage_frac=1,coverage_weight=none)", + "mean(min_coverage_frac=1,coverage_weight=none)", + "median(min_coverage_frac=1,coverage_weight=none)", + "stdev(min_coverage_frac=1,coverage_weight=none)", + "min(min_coverage_frac=1,coverage_weight=none)", + "max(min_coverage_frac=1,coverage_weight=none)" + ] # Negative buffer to apply to input parcels to account for mixels. -buffer = 5 +buffer = 0 # The maximum percentage cloudcover an (S2) image can have to be used. max_cloudcover_pct = 15 # The min percentage of parcels that need to have valid data for a time+sensor to use it -min_parcels_with_data_pct = 80 +min_parcels_with_data_pct = 75 # Configuration specific to the marker being calculated. [marker] diff --git a/cropclassification/helpers/config_helper.py b/cropclassification/helpers/config_helper.py index 1fefeb10..1f534654 100644 --- a/cropclassification/helpers/config_helper.py +++ b/cropclassification/helpers/config_helper.py @@ -146,6 +146,7 @@ def read_config( "list": lambda x: [i.strip() for i in x.split(",")], "listint": lambda x: [int(i.strip()) for i in x.split(",")], "listfloat": lambda x: [float(i.strip()) for i in x.split(",")], + "jsonlist": lambda x: None if x is None else json.loads(x), "dict": lambda x: None if x is None else json.loads(x), "path": lambda x: None if x is None else Path(x), }, diff --git a/cropclassification/preprocess/_timeseries_calc_openeo.py b/cropclassification/preprocess/_timeseries_calc_openeo.py index 88a3cd3e..766a3c9e 100644 --- a/cropclassification/preprocess/_timeseries_calc_openeo.py +++ b/cropclassification/preprocess/_timeseries_calc_openeo.py @@ -25,6 +25,8 @@ def calculate_periodic_timeseries( imageprofiles: dict[str, ImageProfile], images_periodic_dir: Path, timeseries_periodic_dir: Path, + engine: str, + stats: list[str], nb_parallel: int, on_missing_image: str, force: bool = False, @@ -44,6 +46,16 @@ def calculate_periodic_timeseries( images_periodic_dir (Path): directory where the images are stored. timeseries_periodic_dir (Path): directory where the timeseries data will be saved. + engine (str): the engine to use for the calculation. Options are + "exactextract", "rasterstats" and "pyqgis". + stats (list[str]): statistics to calculate. Available statistics and + special options are dependent on the `engine` specified: + + - "rasterstats": `rasterstats documentation `_ + - "pyqgis": "count", "sum", "mean", "median", "std", "min", "max", + "range", "minority", "majority" and "variance". + - "exactextract": `exactextract documentation `_ + nb_parallel (int): number of parallel processes to use. on_missing_image (str): what to do when an image is missing. Options are: @@ -88,14 +100,13 @@ def calculate_periodic_timeseries( if temp_dir == "None": temp_dir = Path(tempfile.gettempdir()) - logger.info(f"Calculating timeseries for {len(images_bands)} images") zonal_stats_bulk.zonal_stats( vector_path=input_parcel_path, id_column=conf.columns["id"], rasters_bands=images_bands, output_dir=timeseries_periodic_dir, - stats=["count", "mean", "median", "std", "min", "max"], - engine="pyqgis", + stats=stats, + engine=engine, nb_parallel=nb_parallel, force=force, ) diff --git a/cropclassification/preprocess/timeseries.py b/cropclassification/preprocess/timeseries.py index 8fc19cd9..ae6f3793 100644 --- a/cropclassification/preprocess/timeseries.py +++ b/cropclassification/preprocess/timeseries.py @@ -64,6 +64,8 @@ def calc_timeseries_data( imageprofiles=conf.image_profiles, images_periodic_dir=conf.paths.getpath("images_periodic_dir"), timeseries_periodic_dir=timeseries_periodic_dir, + engine=conf.timeseries.get("engine"), + stats=conf.timeseries.getjsonlist("stats"), nb_parallel=conf.general.getint("nb_parallel", -1), on_missing_image=conf.images.get("on_missing_image", "calculate_raise"), force=force, diff --git a/cropclassification/util/zonal_stats_bulk/__init__.py b/cropclassification/util/zonal_stats_bulk/__init__.py index 58d399da..d214cf38 100644 --- a/cropclassification/util/zonal_stats_bulk/__init__.py +++ b/cropclassification/util/zonal_stats_bulk/__init__.py @@ -1,5 +1,6 @@ """Calculate zonal statistics for a vector file with many raster files.""" +import logging from pathlib import Path from . import ( @@ -9,16 +10,18 @@ ) from ._raster_helper import * # noqa: F403 +logger = logging.getLogger(__name__) + def zonal_stats( vector_path: Path, id_column: str, rasters_bands: list[tuple[Path, list[str]]], output_dir: Path, + engine: str, stats: list[str] | str | None = None, cloud_filter_band: str | None = None, calc_bands_parallel: bool = True, - engine: str = "rasterstats", nb_parallel: int = -1, force: bool = False, ) -> None: @@ -31,21 +34,19 @@ def zonal_stats( rasters_bands (List[Tuple[Path, List[str]]]): List of tuples with the path to the raster files and the bands to calculate the zonal statistics on. output_dir (Path): directory to write the results to. + engine (str): the engine to use for the calculation. Options are + "exactextract", "rasterstats" and "pyqgis". stats (List[str]): statistics to calculate. Default to ["count", "median"]. Available statistics and special options are dependent on the `engine` specified: - - "rasterstats": `rasterstats documentation `_ - "pyqgis": "count", "sum", "mean", "median", "std", "min", "max", "range", "minority", "majority" and "variance". - "exactextract": `exactextract documentation `_ - cloud_filter_band (str, optional): the band to use as a cloud filter. Only supported for engine "rasterstats". Defaults to None. calc_bands_parallel (bool, optional): True to calculate the bands in parallel. Only supported for engine "rasterstats". Defaults to True. - engine (str, optional): the engine to use for the calculation. Options are - "exactextract", "rasterstats" and "pyqgis". Defaults to "rasterstats". nb_parallel (int, optional): the number of parallel processes to use. Defaults to -1: use all available processors. force (bool, optional): False to skip calculating existing output files. True to diff --git a/cropclassification/util/zonal_stats_bulk/_zonal_stats_bulk_exactextract.py b/cropclassification/util/zonal_stats_bulk/_zonal_stats_bulk_exactextract.py index c77536da..41e1be36 100644 --- a/cropclassification/util/zonal_stats_bulk/_zonal_stats_bulk_exactextract.py +++ b/cropclassification/util/zonal_stats_bulk/_zonal_stats_bulk_exactextract.py @@ -137,6 +137,7 @@ def zonal_stats( tmp_dir=tmp_dir, include_cols=columns, output_paths=output_paths, + force=force, ) calc_queue[future] = { "vector_path": vector_path, @@ -236,9 +237,10 @@ def zonal_stats_band( output="pandas", include_cols=include_cols, ) - - except Exception: - raise + except Exception as ex: + message = f"Error calculating zonal stats {stats}: {ex}" + logger.error(message) + raise ValueError(message) from ex return stats_df @@ -253,13 +255,6 @@ def zonal_stats_band_tofile( include_cols: list[str], force: bool = False, ) -> dict[str, Path]: - # Init - if all(output_path.exists() for output_path in output_paths.values()): - if force: - for output_path in output_paths.values(): - output_path.unlink(missing_ok=True) - return output_paths - stats_df = zonal_stats_band( vector_path=vector_path, raster_path=raster_path, @@ -273,17 +268,26 @@ def zonal_stats_band_tofile( for band in bands: index = raster_info.bands[band].band_index band_columns = include_cols.copy() - band_columns.extend( - [f"band_{index}_{stat}" for stat in [stat.split("(")[0] for stat in stats]] - ) - band_stats_df = stats_df[band_columns].copy() - band_stats_df.rename( - columns={ - f"band_{index}_{stat}": stat - for stat in [stat.split("(")[0] for stat in stats] - }, - inplace=True, - ) + if len(bands) == 1: + band_columns.extend( + [f"{stat}" for stat in [stat.split("(")[0] for stat in stats]] + ) + band_stats_df = stats_df[band_columns].copy() + else: + band_columns.extend( + [ + f"band_{index}_{stat}" + for stat in [stat.split("(")[0] for stat in stats] + ] + ) + band_stats_df = stats_df[band_columns].copy() + band_stats_df.rename( + columns={ + f"band_{index}_{stat}": stat + for stat in [stat.split("(")[0] for stat in stats] + }, + inplace=True, + ) # Add fid column to the beginning of the dataframe band_stats_df.insert(0, "fid", range(len(band_stats_df))) diff --git a/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-asc-weekly/s1-grd-sigma0-vvdvh-asc-weekly_2024-03-04_2024-03-10_vvdvh_last.tif b/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-asc-weekly/s1-grd-sigma0-vvdvh-asc-weekly_2024-03-04_2024-03-10_vvdvh_last.tif new file mode 100644 index 00000000..5fcf6c23 Binary files /dev/null and b/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-asc-weekly/s1-grd-sigma0-vvdvh-asc-weekly_2024-03-04_2024-03-10_vvdvh_last.tif differ diff --git a/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-asc-weekly/s1-grd-sigma0-vvdvh-asc-weekly_2024-03-04_2024-03-10_vvdvh_last.tif.json b/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-asc-weekly/s1-grd-sigma0-vvdvh-asc-weekly_2024-03-04_2024-03-10_vvdvh_last.tif.json new file mode 100644 index 00000000..632a425a --- /dev/null +++ b/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-asc-weekly/s1-grd-sigma0-vvdvh-asc-weekly_2024-03-04_2024-03-10_vvdvh_last.tif.json @@ -0,0 +1,31 @@ +{ + "imageprofile": "s1-grd-sigma0-vvdvh-asc-weekly", + "collection": null, + "index_type": "vvdvh", + "max_cloud_cover": null, + "image_source": "local", + "base_imageprofile": "s1-grd-sigma0-asc-weekly", + "pixel_type": "FLOAT32", + "satellite": "s1", + "roi_bounds": [ + 161400.0, + 188000.0, + 161900.0, + 188500.0 + ], + "roi_crs": "31370", + "start_date": "2024-03-04", + "end_date_incl": "2024-03-10", + "end_date": "2024-03-11", + "period_name": "weekly", + "weeks": [ + 10 + ], + "bands": [ + "vvdvh" + ], + "time_reducer": "last", + "path": "C:/Users/local_KRIWAY/Temp/1/pytest-of-KRIWAY/pytest-5/test_task_calc_periodic_mosaic0/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-asc-weekly/s1-grd-sigma0-vvdvh-asc-weekly_2024-03-04_2024-03-10_vvdvh_last.tif", + "job_options": null, + "process_options": null +} \ No newline at end of file diff --git a/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-asc-weekly/s1-grd-sigma0-vvdvh-asc-weekly_2024-03-11_2024-03-17_vvdvh_last.tif b/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-asc-weekly/s1-grd-sigma0-vvdvh-asc-weekly_2024-03-11_2024-03-17_vvdvh_last.tif new file mode 100644 index 00000000..e4f1496a Binary files /dev/null and b/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-asc-weekly/s1-grd-sigma0-vvdvh-asc-weekly_2024-03-11_2024-03-17_vvdvh_last.tif differ diff --git a/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-asc-weekly/s1-grd-sigma0-vvdvh-asc-weekly_2024-03-11_2024-03-17_vvdvh_last.tif.json b/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-asc-weekly/s1-grd-sigma0-vvdvh-asc-weekly_2024-03-11_2024-03-17_vvdvh_last.tif.json new file mode 100644 index 00000000..9087840d --- /dev/null +++ b/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-asc-weekly/s1-grd-sigma0-vvdvh-asc-weekly_2024-03-11_2024-03-17_vvdvh_last.tif.json @@ -0,0 +1,31 @@ +{ + "imageprofile": "s1-grd-sigma0-vvdvh-asc-weekly", + "collection": null, + "index_type": "vvdvh", + "max_cloud_cover": null, + "image_source": "local", + "base_imageprofile": "s1-grd-sigma0-asc-weekly", + "pixel_type": "FLOAT32", + "satellite": "s1", + "roi_bounds": [ + 161400.0, + 188000.0, + 161900.0, + 188500.0 + ], + "roi_crs": "31370", + "start_date": "2024-03-11", + "end_date_incl": "2024-03-17", + "end_date": "2024-03-18", + "period_name": "weekly", + "weeks": [ + 11 + ], + "bands": [ + "vvdvh" + ], + "time_reducer": "last", + "path": "C:/Users/local_KRIWAY/Temp/1/pytest-of-KRIWAY/pytest-5/test_task_calc_periodic_mosaic0/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-asc-weekly/s1-grd-sigma0-vvdvh-asc-weekly_2024-03-11_2024-03-17_vvdvh_last.tif", + "job_options": null, + "process_options": null +} \ No newline at end of file diff --git a/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-desc-weekly/s1-grd-sigma0-vvdvh-desc-weekly_2024-03-04_2024-03-10_vvdvh_last.tif.json b/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-desc-weekly/s1-grd-sigma0-vvdvh-desc-weekly_2024-03-04_2024-03-10_vvdvh_last.tif.json new file mode 100644 index 00000000..3f0edb06 --- /dev/null +++ b/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-desc-weekly/s1-grd-sigma0-vvdvh-desc-weekly_2024-03-04_2024-03-10_vvdvh_last.tif.json @@ -0,0 +1,31 @@ +{ + "imageprofile": "s1-grd-sigma0-vvdvh-desc-weekly", + "collection": null, + "index_type": "vvvh", + "max_cloud_cover": null, + "image_source": "local", + "base_imageprofile": "s1-grd-sigma0-desc-weekly", + "pixel_type": "FLOAT32", + "satellite": "s1", + "roi_bounds": [ + 161400.0, + 188000.0, + 161900.0, + 188500.0 + ], + "roi_crs": "31370", + "start_date": "2024-03-04", + "end_date_incl": "2024-03-10", + "end_date": "2024-03-11", + "period_name": "weekly", + "weeks": [ + 10 + ], + "bands": [ + "vvdvh" + ], + "time_reducer": "last", + "path": "C:/Users/local_KRIWAY/Temp/1/pytest-of-KRIWAY/pytest-5/test_task_calc_periodic_mosaic0/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-desc-weekly/s1-grd-sigma0-vvdvh-desc-weekly_2024-03-04_2024-03-10_vvdvh_last.tif", + "job_options": null, + "process_options": null +} \ No newline at end of file diff --git a/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-desc-weekly/s1-grd-sigma0-vvdvh-desc-weekly_2024-03-11_2024-03-17_vvdvh_last.tif.json b/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-desc-weekly/s1-grd-sigma0-vvdvh-desc-weekly_2024-03-11_2024-03-17_vvdvh_last.tif.json new file mode 100644 index 00000000..1422516f --- /dev/null +++ b/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-desc-weekly/s1-grd-sigma0-vvdvh-desc-weekly_2024-03-11_2024-03-17_vvdvh_last.tif.json @@ -0,0 +1,31 @@ +{ + "imageprofile": "s1-grd-sigma0-vvdvh-desc-weekly", + "collection": null, + "index_type": "vvvh", + "max_cloud_cover": null, + "image_source": "local", + "base_imageprofile": "s1-grd-sigma0-desc-weekly", + "pixel_type": "FLOAT32", + "satellite": "s1", + "roi_bounds": [ + 161400.0, + 188000.0, + 161900.0, + 188500.0 + ], + "roi_crs": "31370", + "start_date": "2024-03-11", + "end_date_incl": "2024-03-17", + "end_date": "2024-03-18", + "period_name": "weekly", + "weeks": [ + 11 + ], + "bands": [ + "vvdvh" + ], + "time_reducer": "last", + "path": "C:/Users/local_KRIWAY/Temp/1/pytest-of-KRIWAY/pytest-5/test_task_calc_periodic_mosaic0/markers/_images_periodic/roi_test/s1-grd-sigma0-vvdvh-desc-weekly/s1-grd-sigma0-vvdvh-desc-weekly_2024-03-11_2024-03-17_vvdvh_last.tif", + "job_options": null, + "process_options": null +} \ No newline at end of file diff --git a/tests/test_end2end.py b/tests/test_end2end.py index 6199f86f..2684197a 100644 --- a/tests/test_end2end.py +++ b/tests/test_end2end.py @@ -85,7 +85,7 @@ def test_cropclass(tmp_path, balancing_strategy, cross_pred_models): today_str = datetime.now().strftime("%Y-%m-%d") run_dir = markers_dir / f"2024_CROPGROUP/Run_{today_str}_001" assert run_dir.exists() - base_stem = "Prc_BEFL_2023_2023-07-24_bufm5_weekly_predict_all" + base_stem = "Prc_BEFL_2023_2023-07-24_bufm0_weekly_predict_all" assert (run_dir / f"{base_stem}.gpkg").exists() assert (run_dir / f"{base_stem}.sqlite_accuracy_report.html").exists() details_gpgk_path = ( diff --git a/tests/test_helper.py b/tests/test_helper.py index be49ef44..97c5b2bd 100644 --- a/tests/test_helper.py +++ b/tests/test_helper.py @@ -39,6 +39,16 @@ class SampleData: / "s2-agri-weekly" / "s2-agri-weekly_2024-03-04_2024-03-10_B02-B03-B04-B08-B11-B12_best.tif" ) + image_s1_grd_vvdvh_asc_weekly_path = ( + image_roi_dir + / "s1-grd-sigma0-vvdvh-asc-weekly" + / "s1-grd-sigma0-vvdvh-asc-weekly_2024-03-04_2024-03-10_vvdvh_last.tif.tif" + ) + image_s1_grd_vvdvh_desc_weekly_path = ( + image_roi_dir + / "s1-grd-sigma0-vvdvh-desc-weekly" + / "s1-grd-sigma0-vvdvh-desc-weekly_2024-03-04_2024-03-10_vvdvh_last.tif.tif" + ) start_date = datetime(2024, 3, 4) end_date = datetime(2024, 3, 11) diff --git a/tests/test_raster_index_util.py b/tests/test_raster_index_util.py index 12e3e0ab..b7e64022 100644 --- a/tests/test_raster_index_util.py +++ b/tests/test_raster_index_util.py @@ -126,11 +126,38 @@ def test_calc_index_invalid(tmp_path): "index, pixel_type, process_options, expected_bands", [ ("dprvi", "BYTE", {}, ["dprvi"]), - ("dprvi", "FLOAT16", None, ["dprvi"]), + pytest.param( + "dprvi", + "FLOAT16", + None, + ["dprvi"], + marks=pytest.mark.skipif( + rasterio.__version__ == "1.4.4", + reason="Requires rasterio <> 1.4.4", + ), + ), ("dprvi", "FLOAT32", {}, ["dprvi"]), ("rvi", "BYTE", {}, ["rvi"]), - ("vvdvh", "FLOAT16", {}, ["vvdvh"]), - ("sarrgb", "FLOAT16", {}, ["vv", "vh", "vvdvh"]), + pytest.param( + "vvdvh", + "FLOAT16", + {}, + ["vvdvh"], + marks=pytest.mark.skipif( + rasterio.__version__ == "1.4.4", + reason="Requires rasterio <> 1.4.4", + ), + ), + pytest.param( + "sarrgb", + "FLOAT16", + {}, + ["vv", "vh", "vvdvh"], + marks=pytest.mark.skipif( + rasterio.__version__ == "1.4.4", + reason="Requires rasterio <> 1.4.4", + ), + ), ("sarrgb", "FLOAT32", {"log10": True}, ["vvdb", "vhdb", "vvdvhdb"]), ("sarrgb", "BYTE", {"log10": True}, ["vvdb", "vhdb", "vvdvhdb"]), ( @@ -211,7 +238,26 @@ def test_calc_index_s1_error( @pytest.mark.parametrize( - "index, pixel_type", [("ndvi", "BYTE"), ("ndvi", "FLOAT16"), ("bsi", "FLOAT16")] + "index, pixel_type", + [ + ("ndvi", "BYTE"), + pytest.param( + "ndvi", + "FLOAT16", + marks=pytest.mark.skipif( + rasterio.__version__ == "1.4.4", + reason="Requires rasterio <> 1.4.4", + ), + ), + pytest.param( + "bsi", + "FLOAT16", + marks=pytest.mark.skipif( + rasterio.__version__ == "1.4.4", + reason="Requires rasterio <> 1.4.4", + ), + ), + ], ) def test_calc_index_s2(tmp_path, index, pixel_type): # Prepare test data @@ -236,20 +282,108 @@ def test_calc_index_s2(tmp_path, index, pixel_type): [ ("ndvi", "BYTE", gdal.GDT_UInt16, 32676, ["B04", "B08", "b1"], "uint8", 255), ("ndvi", "BYTE", gdal.GDT_Float32, np.nan, ["B04", "B08"], "uint8", 255), - ("ndvi", "FLOAT16", gdal.GDT_UInt16, 32676, ["B04", "B08"], "float32", np.nan), - ( + pytest.param( + "ndvi", + "FLOAT16", + gdal.GDT_UInt16, + 32676, + ["B04", "B08"], + "float16", + np.nan, + marks=pytest.mark.skipif( + rasterio.__version__ < "1.5", reason="Requires rasterio 1.5 or higher" + ), + ), + pytest.param( "ndvi", "FLOAT16", gdal.GDT_Float32, np.nan, ["B04", "B08"], + "float16", + np.nan, + marks=pytest.mark.skipif( + rasterio.__version__ < "1.5", reason="Requires rasterio 1.5 or higher" + ), + ), + pytest.param( + "ndvi", + "FLOAT16", + gdal.GDT_UInt16, + 32676, + ["B04", "B08"], "float32", np.nan, + marks=pytest.mark.skipif( + rasterio.__version__ >= "1.5" or rasterio.__version__ == "1.4.4", + reason="Requires rasterio < 1.5", + ), + ), + pytest.param( + "ndvi", + "FLOAT16", + gdal.GDT_Float32, + np.nan, + ["B04", "B08"], + "float32", + np.nan, + marks=pytest.mark.skipif( + rasterio.__version__ >= "1.5" or rasterio.__version__ == "1.4.4", + reason="Requires rasterio < 1.5", + ), ), ("dprvi", "BYTE", gdal.GDT_UInt16, 32676, ["VH", "VV"], "uint8", 255), ("dprvi", "BYTE", gdal.GDT_Float32, np.nan, ["VH", "VV"], "uint8", 255), - ("dprvi", "FLOAT16", gdal.GDT_UInt16, 32676, ["VH", "VV"], "float32", np.nan), - ("dprvi", "FLOAT16", gdal.GDT_Float32, np.nan, ["VH", "VV"], "float32", np.nan), + pytest.param( + "dprvi", + "FLOAT16", + gdal.GDT_UInt16, + 32676, + ["VH", "VV"], + "float16", + np.nan, + marks=pytest.mark.skipif( + rasterio.__version__ < "1.5", reason="Requires rasterio 1.5 or higher" + ), + ), + pytest.param( + "dprvi", + "FLOAT16", + gdal.GDT_Float32, + np.nan, + ["VH", "VV"], + "float16", + np.nan, + marks=pytest.mark.skipif( + rasterio.__version__ < "1.5", reason="Requires rasterio 1.5 or higher" + ), + ), + pytest.param( + "dprvi", + "FLOAT16", + gdal.GDT_UInt16, + 32676, + ["VH", "VV"], + "float32", + np.nan, + marks=pytest.mark.skipif( + rasterio.__version__ >= "1.5" or rasterio.__version__ == "1.4.4", + reason="Requires rasterio < 1.5", + ), + ), + pytest.param( + "dprvi", + "FLOAT16", + gdal.GDT_Float32, + np.nan, + ["VH", "VV"], + "float32", + np.nan, + marks=pytest.mark.skipif( + rasterio.__version__ >= "1.5" or rasterio.__version__ == "1.4.4", + reason="Requires rasterio < 1.5", + ), + ), ], ) def test_calc_index_by_gdal_raster( diff --git a/tests/test_zonal_stats_bulk.py b/tests/test_zonal_stats_bulk.py index 6276a859..9448113b 100644 --- a/tests/test_zonal_stats_bulk.py +++ b/tests/test_zonal_stats_bulk.py @@ -1,11 +1,14 @@ import shutil import geofileops as gfo +import geopandas as gpd import pytest +import shapely from cropclassification.helpers import config_helper as conf from cropclassification.helpers import pandas_helper as pdh from cropclassification.util import zonal_stats_bulk +from cropclassification.util.zonal_stats_bulk import _zonal_stats_bulk_exactextract from cropclassification.util.zonal_stats_bulk._zonal_stats_bulk_pyqgis import HAS_QGIS from tests.test_helper import SampleData @@ -13,18 +16,27 @@ @pytest.mark.parametrize( "engine, stats", [ - ("pyqgis", ["mean", "count"]), - ("rasterstats", ["mean", "count"]), + ("pyqgis", ["mean", "count", "std"]), + ("rasterstats", ["mean", "count", "std"]), ( "exactextract", [ - "mean(min_coverage_frac=0.5,coverage_weight=none)", - "count(min_coverage_frac=0.5,coverage_weight=none)", + "mean(min_coverage_frac=0.8,coverage_weight=none)", + "count(min_coverage_frac=0.8,coverage_weight=none)", + "stdev(min_coverage_frac=0.8,coverage_weight=none)", ], ), ], ) -def test_zonal_stats_bulk(tmp_path, engine, stats): +@pytest.mark.parametrize( + "bands, exp_results_path", + [ + (["vvdvh"], 2), + (["VV", "VH"], 8), + (["B02", "B03", "B04", "B08", "B11", "B12"], 18), + ], +) +def test_zonal_stats_bulk(tmp_path, engine, stats, bands, exp_results_path): if engine == "pyqgis" and not HAS_QGIS: pytest.skip("QGIS is not available on this system.") @@ -45,11 +57,25 @@ def test_zonal_stats_bulk(tmp_path, engine, stats): # Make sure the s2-agri input file was copied test_image_roi_dir = test_dir / SampleData.image_dir.name / SampleData.roi_name - test_s1_asc_dir = test_image_roi_dir / SampleData.image_s1_asc_path.parent.name - test_s1_desc_dir = test_image_roi_dir / SampleData.image_s1_desc_path.parent.name - test_image_paths = list(test_s1_asc_dir.glob("*.tif")) - test_image_paths.extend(test_s1_desc_dir.glob("*.tif")) - images_bands = [(path, ["VV", "VH"]) for path in test_image_paths] + if bands == ["VV", "VH"]: + image1_dir = test_image_roi_dir / SampleData.image_s1_asc_path.parent.name + image2_dir = test_image_roi_dir / SampleData.image_s1_desc_path.parent.name + elif bands == ["vvdvh"]: + image1_dir = ( + test_image_roi_dir + / SampleData.image_s1_grd_vvdvh_asc_weekly_path.parent.name + ) + image2_dir = ( + test_image_roi_dir + / SampleData.image_s1_grd_vvdvh_desc_weekly_path.parent.name + ) + elif bands == ["B02", "B03", "B04", "B08", "B11", "B12"]: + image1_dir = test_image_roi_dir / SampleData.image_s2_mean_path.parent.name + image2_dir = None + test_image_paths = list(image1_dir.glob("*.tif")) + if image2_dir: + test_image_paths.extend(image2_dir.glob("*.tif")) + images_bands = [(path, bands) for path in test_image_paths] vector_path = test_dir / SampleData.input_dir.name / "Prc_BEFL_2023_2023-07-24.gpkg" vector_info = gfo.get_layerinfo(vector_path) @@ -63,10 +89,103 @@ def test_zonal_stats_bulk(tmp_path, engine, stats): ) result_paths = list(tmp_path.glob("*.sqlite")) - assert len(result_paths) == 8 + assert len(result_paths) == exp_results_path for result_path in result_paths: result_df = pdh.read_file(result_path) # The result should have the same number of rows as the input vector file. assert len(result_df) == vector_info.featurecount # The calculates stats should not be nan for any row. assert not any(result_df["mean"].isna()) + + +def test_exactextract_invalid_stats(tmp_path): + # Prepare test data + sample_dir = SampleData.markers_dir + test_dir = tmp_path / sample_dir.name + shutil.copytree(sample_dir, test_dir) + + vector_path = test_dir / SampleData.input_dir.name / "Prc_BEFL_2023_2023-07-24.gpkg" + + with pytest.raises(ValueError, match="Unsupported stat: means"): + _zonal_stats_bulk_exactextract.zonal_stats_band( + vector_path=vector_path, + raster_path=test_dir / SampleData.image_s1_asc_path, + tmp_dir=tmp_path, + stats=["means"], + include_cols=["index", "UID", "x_ref"], + ) + + +@pytest.mark.parametrize( + "min_coverage_frac, exp_count", + [ + (0.0, [49, 42, 42, 42]), + (0.2, [25, 30, 30, 30]), + (0.5, [25, 30, 30, 20]), + (0.8, [25, 20, 30, 20]), + (1.0, [25, 20, 20, 20]), + ], +) +def test_exactextract_min_coverage_frac(tmp_path, min_coverage_frac, exp_count): + # Prepare test data + sample_dir = SampleData.markers_dir + test_dir = tmp_path / sample_dir.name + shutil.copytree(sample_dir, test_dir) + + geoms = [ + ( + "POLYGON ((161405.210003 188506.174464, 161456.432668 188505.089414," + " 161455.476062 188453.597478, 161403.717327 188454.559035, 161405.210003" + " 188506.174464))" + ), + ( + "POLYGON ((161495.16002 188499.432483, 161546.38515 188498.480826," + " 161545.277792 188446.05759, 161493.919267 188447.011718, 161495.16002" + " 188499.432483))" + ), + ( + "POLYGON ((161433.705572 188423.843391, 161484.794828 188422.760828," + " 161483.986074 188364.860805, 161432.368184 188366.22007, 161433.705572" + " 188423.843391))" + ), + ( + "POLYGON ((161513.51231 188417.022976, 161565.006707 188416.199771," + " 161563.840308 188367.780984, 161512.222428 188369.140255, 161513.51231" + " 188417.022976))" + ), + ] + + box_geoms = shapely.from_wkt(geoms) + + gdf_geoms = gpd.GeoDataFrame(geometry=box_geoms, crs="EPSG:31370") + + # write the geofile to a file + vector_path = tmp_path / "vector.gpkg" + gfo.to_file(gdf=gdf_geoms, path=vector_path) + gfo.add_column(path=vector_path, name="UID", type="TEXT", expression="fid") + + test_image_roi_dir = test_dir / SampleData.image_dir.name / SampleData.roi_name + image1_dir = test_image_roi_dir / SampleData.image_s1_asc_path.parent.name + image2_dir = test_image_roi_dir / SampleData.image_s1_desc_path.parent.name + test_image_paths = list(image1_dir.glob("*.tif")) + if image2_dir: + test_image_paths.extend(image2_dir.glob("*.tif")) + bands = ["VV", "VH"] + images_bands = [(path, bands) for path in test_image_paths] + output_path = tmp_path / f"min_cov_frac_{min_coverage_frac}" + stats = [f"count(min_coverage_frac={min_coverage_frac},coverage_weight=none)"] + + zonal_stats_bulk.zonal_stats( + vector_path=vector_path, + id_column="UID", + rasters_bands=images_bands, + output_dir=output_path, + stats=stats, + engine="exactextract", + ) + + result_paths = list(output_path.glob("*.sqlite")) + result_df = pdh.read_file(result_paths[0]) + for index in range(4): + count = result_df[result_df["index"] == index]["count"].values[0] + assert count == exp_count[index]