diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3e7c226..a010a07 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -28,7 +28,7 @@ jobs: matrix: os: [ubuntu-latest] dev: [false] - python: ["3.10", "3.11", "3.12"] + python: ["3.10", "3.11", "3.12", "3.13"] env: ["latest"] # Use openblas instead of mkl saves 600 MB. Linux OK, 50% slower on Windows and OSX! extra: ["nomkl"] @@ -44,7 +44,7 @@ jobs: - env: latest os: windows-latest dev: false - python: "3.12" + python: "3.13" steps: - uses: actions/checkout@v6 diff --git a/cropclassification/calc_periodic_mosaic.py b/cropclassification/calc_periodic_mosaic.py index c55df6f..fff522e 100644 --- a/cropclassification/calc_periodic_mosaic.py +++ b/cropclassification/calc_periodic_mosaic.py @@ -1,6 +1,6 @@ """Generate periodic mosaics.""" -from datetime import datetime, timedelta +from datetime import datetime from pathlib import Path import cropclassification.helpers.config_helper as conf @@ -32,11 +32,6 @@ def calc_periodic_mosaic_task(config_paths: list[Path], default_basedir: Path) - end_date = datetime(now.year, now.month, now.day) else: end_date = datetime.fromisoformat(conf.period["end_date"]) - images_available_delay = conf.period["images_available_delay"] - if images_available_delay is not None: - now = datetime.now() - today = datetime(now.year, now.month, now.day) - end_date = today - timedelta(int(images_available_delay)) imageprofiles_to_get = list(conf.parse_image_config(conf.images["images"])) imageprofiles = conf._get_image_profiles( @@ -52,5 +47,6 @@ def calc_periodic_mosaic_task(config_paths: list[Path], default_basedir: Path) - output_base_dir=conf.paths.getpath("images_periodic_dir"), imageprofiles_to_get=imageprofiles_to_get, imageprofiles=imageprofiles, + images_available_delay=conf.period["images_available_delay"], force=False, ) diff --git a/cropclassification/preprocess/_timeseries_calc_openeo.py b/cropclassification/preprocess/_timeseries_calc_openeo.py index 88a3cd3..922dd0f 100644 --- a/cropclassification/preprocess/_timeseries_calc_openeo.py +++ b/cropclassification/preprocess/_timeseries_calc_openeo.py @@ -27,6 +27,7 @@ def calculate_periodic_timeseries( timeseries_periodic_dir: Path, nb_parallel: int, on_missing_image: str, + images_available_delay: int | None = None, force: bool = False, ) -> None: """Calculate timeseries data for the input parcels. @@ -50,12 +51,13 @@ def calculate_periodic_timeseries( - ignore: ignore that the image, don't try to download it - calculate_raise: calculate the image and raise an error if it fails - calculate_ignore: calculate the image and ignore the error if it fails + images_available_delay (Optional[int]): delay in days for images to be available force (bool = False): whether to force recalculation of existing data. (will not redownload images) """ info = gfo.get_layerinfo(input_parcel_path) if info.crs is not None and not info.crs.equals(roi_crs): - raise ValueError(f"parcel crs ({info.crs}) <> roi crs ({roi_crs})") + raise ValueError(f"parcel crs ({info.crs}) <> roi_crs ({roi_crs})") if not shapely.box(*info.total_bounds).within(shapely.box(*roi_bounds)): raise ValueError( f"parcel bounds ({info.total_bounds}) not within roi_bounds ({roi_bounds})" @@ -70,6 +72,7 @@ def calculate_periodic_timeseries( imageprofiles_to_get=imageprofiles_to_get, imageprofiles=imageprofiles, on_missing_image=on_missing_image, + images_available_delay=images_available_delay, force=False, # dont redownload on force ) diff --git a/cropclassification/preprocess/timeseries.py b/cropclassification/preprocess/timeseries.py index 8fc19cd..7e6a7b5 100644 --- a/cropclassification/preprocess/timeseries.py +++ b/cropclassification/preprocess/timeseries.py @@ -66,6 +66,7 @@ def calc_timeseries_data( timeseries_periodic_dir=timeseries_periodic_dir, nb_parallel=conf.general.getint("nb_parallel", -1), on_missing_image=conf.images.get("on_missing_image", "calculate_raise"), + images_available_delay=conf.period["images_available_delay"], force=force, ) diff --git a/cropclassification/util/mosaic_util.py b/cropclassification/util/mosaic_util.py index 2cd4d40..dac32c8 100644 --- a/cropclassification/util/mosaic_util.py +++ b/cropclassification/util/mosaic_util.py @@ -162,6 +162,7 @@ def calc_periodic_mosaic( output_base_dir: Path, delete_existing_openeo_jobs: bool = False, on_missing_image: str = "calculate_raise", + images_available_delay: int | None = None, force: bool = False, ) -> list[dict[str, Any]]: """Generate a periodic mosaic. @@ -202,6 +203,8 @@ def calc_periodic_mosaic( - ignore: ignore that the image, don't try to download it - calculate_raise: calculate the image and raise an error if it fails - calculate_ignore: calculate the image and ignore the error if it fails + images_available_delay (int | None, optional): number of days to wait before the + images are available. Defaults to None. force (bool, optional): True to force recreation of existing output files. Defaults to False. @@ -242,9 +245,12 @@ def calc_periodic_mosaic( # Make sure band information is embedded in the image for image in images_from_openeo: if image["path"].exists(): - raster_util.set_band_descriptions( - image["path"], band_descriptions=image["bands"], overwrite=False - ) + if not _is_image_outdated( + image=image, images_available_delay=images_available_delay + ): + raster_util.set_band_descriptions( + image["path"], band_descriptions=image["bands"], overwrite=False + ) # First get all mosaic images from openeo openeo_util.get_images( @@ -269,6 +275,29 @@ def calc_periodic_mosaic( return periodic_mosaic_params +def _is_image_outdated( + image: dict[str, Any], images_available_delay: int | None = None +) -> bool: + creation_date = datetime.fromtimestamp(image["path"].stat().st_ctime) + # Check if the creation date is different from the current date + if creation_date != datetime.now(): + # Check if the creation date is greater then or equal to the end date + if images_available_delay is not None: + day = datetime(creation_date.year, creation_date.month, creation_date.day) + new_date = day - timedelta(int(images_available_delay)) + else: + new_date = creation_date + if not new_date >= image["end_date"]: + # Delete the image + logger.info( + f"Image {image['path']} is older than the end date " + f"{image['end_date']}: deleting it." + ) + image["path"].unlink(missing_ok=True) + return True + return False + + def _prepare_periods( start_date: datetime, end_date: datetime, diff --git a/tests/test_mosaic_util.py b/tests/test_mosaic_util.py index 741990a..9730f34 100644 --- a/tests/test_mosaic_util.py +++ b/tests/test_mosaic_util.py @@ -1,6 +1,7 @@ import shutil -from datetime import datetime +from datetime import datetime, timedelta from pathlib import Path +from time import sleep import pytest @@ -233,3 +234,33 @@ def test_prepare_mosaic_image_path(): "/tmp/s2-agri-weekly/s2-agri-weekly_2024-01-01_2024-01-02_B01-B02_mean.tif" ) assert result_path == expected_path + + +@pytest.mark.parametrize( + "end_date, is_outdated", + [ + ( + datetime(2024, 3, 11, 0, 0), + False, + ), + ( + datetime.now() - timedelta(days=1), + True, + ), + ], +) +def test_is_image_outdated(tmp_path, end_date, is_outdated): + # Prepare test data + image_path = SampleData.image_s2_mean_path + test_dir = tmp_path / image_path.name + shutil.copyfile(image_path, test_dir) + sleep(1) # Ensure the file has a different timestamp + + image = { + "end_date": end_date, + "path": test_dir, + } + + result = mosaic_util._is_image_outdated(image=image, images_available_delay=3) + assert result == is_outdated + assert image["path"].exists() if not is_outdated else not image["path"].exists() diff --git a/tests/test_raster_index_util.py b/tests/test_raster_index_util.py index 12e3e0a..b7e6402 100644 --- a/tests/test_raster_index_util.py +++ b/tests/test_raster_index_util.py @@ -126,11 +126,38 @@ def test_calc_index_invalid(tmp_path): "index, pixel_type, process_options, expected_bands", [ ("dprvi", "BYTE", {}, ["dprvi"]), - ("dprvi", "FLOAT16", None, ["dprvi"]), + pytest.param( + "dprvi", + "FLOAT16", + None, + ["dprvi"], + marks=pytest.mark.skipif( + rasterio.__version__ == "1.4.4", + reason="Requires rasterio <> 1.4.4", + ), + ), ("dprvi", "FLOAT32", {}, ["dprvi"]), ("rvi", "BYTE", {}, ["rvi"]), - ("vvdvh", "FLOAT16", {}, ["vvdvh"]), - ("sarrgb", "FLOAT16", {}, ["vv", "vh", "vvdvh"]), + pytest.param( + "vvdvh", + "FLOAT16", + {}, + ["vvdvh"], + marks=pytest.mark.skipif( + rasterio.__version__ == "1.4.4", + reason="Requires rasterio <> 1.4.4", + ), + ), + pytest.param( + "sarrgb", + "FLOAT16", + {}, + ["vv", "vh", "vvdvh"], + marks=pytest.mark.skipif( + rasterio.__version__ == "1.4.4", + reason="Requires rasterio <> 1.4.4", + ), + ), ("sarrgb", "FLOAT32", {"log10": True}, ["vvdb", "vhdb", "vvdvhdb"]), ("sarrgb", "BYTE", {"log10": True}, ["vvdb", "vhdb", "vvdvhdb"]), ( @@ -211,7 +238,26 @@ def test_calc_index_s1_error( @pytest.mark.parametrize( - "index, pixel_type", [("ndvi", "BYTE"), ("ndvi", "FLOAT16"), ("bsi", "FLOAT16")] + "index, pixel_type", + [ + ("ndvi", "BYTE"), + pytest.param( + "ndvi", + "FLOAT16", + marks=pytest.mark.skipif( + rasterio.__version__ == "1.4.4", + reason="Requires rasterio <> 1.4.4", + ), + ), + pytest.param( + "bsi", + "FLOAT16", + marks=pytest.mark.skipif( + rasterio.__version__ == "1.4.4", + reason="Requires rasterio <> 1.4.4", + ), + ), + ], ) def test_calc_index_s2(tmp_path, index, pixel_type): # Prepare test data @@ -236,20 +282,108 @@ def test_calc_index_s2(tmp_path, index, pixel_type): [ ("ndvi", "BYTE", gdal.GDT_UInt16, 32676, ["B04", "B08", "b1"], "uint8", 255), ("ndvi", "BYTE", gdal.GDT_Float32, np.nan, ["B04", "B08"], "uint8", 255), - ("ndvi", "FLOAT16", gdal.GDT_UInt16, 32676, ["B04", "B08"], "float32", np.nan), - ( + pytest.param( + "ndvi", + "FLOAT16", + gdal.GDT_UInt16, + 32676, + ["B04", "B08"], + "float16", + np.nan, + marks=pytest.mark.skipif( + rasterio.__version__ < "1.5", reason="Requires rasterio 1.5 or higher" + ), + ), + pytest.param( "ndvi", "FLOAT16", gdal.GDT_Float32, np.nan, ["B04", "B08"], + "float16", + np.nan, + marks=pytest.mark.skipif( + rasterio.__version__ < "1.5", reason="Requires rasterio 1.5 or higher" + ), + ), + pytest.param( + "ndvi", + "FLOAT16", + gdal.GDT_UInt16, + 32676, + ["B04", "B08"], "float32", np.nan, + marks=pytest.mark.skipif( + rasterio.__version__ >= "1.5" or rasterio.__version__ == "1.4.4", + reason="Requires rasterio < 1.5", + ), + ), + pytest.param( + "ndvi", + "FLOAT16", + gdal.GDT_Float32, + np.nan, + ["B04", "B08"], + "float32", + np.nan, + marks=pytest.mark.skipif( + rasterio.__version__ >= "1.5" or rasterio.__version__ == "1.4.4", + reason="Requires rasterio < 1.5", + ), ), ("dprvi", "BYTE", gdal.GDT_UInt16, 32676, ["VH", "VV"], "uint8", 255), ("dprvi", "BYTE", gdal.GDT_Float32, np.nan, ["VH", "VV"], "uint8", 255), - ("dprvi", "FLOAT16", gdal.GDT_UInt16, 32676, ["VH", "VV"], "float32", np.nan), - ("dprvi", "FLOAT16", gdal.GDT_Float32, np.nan, ["VH", "VV"], "float32", np.nan), + pytest.param( + "dprvi", + "FLOAT16", + gdal.GDT_UInt16, + 32676, + ["VH", "VV"], + "float16", + np.nan, + marks=pytest.mark.skipif( + rasterio.__version__ < "1.5", reason="Requires rasterio 1.5 or higher" + ), + ), + pytest.param( + "dprvi", + "FLOAT16", + gdal.GDT_Float32, + np.nan, + ["VH", "VV"], + "float16", + np.nan, + marks=pytest.mark.skipif( + rasterio.__version__ < "1.5", reason="Requires rasterio 1.5 or higher" + ), + ), + pytest.param( + "dprvi", + "FLOAT16", + gdal.GDT_UInt16, + 32676, + ["VH", "VV"], + "float32", + np.nan, + marks=pytest.mark.skipif( + rasterio.__version__ >= "1.5" or rasterio.__version__ == "1.4.4", + reason="Requires rasterio < 1.5", + ), + ), + pytest.param( + "dprvi", + "FLOAT16", + gdal.GDT_Float32, + np.nan, + ["VH", "VV"], + "float32", + np.nan, + marks=pytest.mark.skipif( + rasterio.__version__ >= "1.5" or rasterio.__version__ == "1.4.4", + reason="Requires rasterio < 1.5", + ), + ), ], ) def test_calc_index_by_gdal_raster(